1 files changed, 4541 insertions, 1835 deletions
diff --git a/src/third-party/rapidyaml/ryml_all.hpp b/src/third-party/rapidyaml/ryml_all.hpp
index 27ed7a6..03734a1 100644
--- a/src/third-party/rapidyaml/ryml_all.hpp
+++ b/src/third-party/rapidyaml/ryml_all.hpp
@@ -1,4 +1,6 @@
 #ifndef _RYML_SINGLE_HEADER_AMALGAMATED_HPP_
+#define _RYML_SINGLE_HEADER_AMALGAMATED_HPP_
+
 //
 // Rapid YAML - a library to parse and emit YAML, and do it fast.
 //
@@ -83,6 +85,8 @@
 //********************************************************************************
 
 #ifndef _C4CORE_SINGLE_HEADER_AMALGAMATED_HPP_
+#define _C4CORE_SINGLE_HEADER_AMALGAMATED_HPP_
+
 //
 // c4core - C++ utilities
 //
@@ -339,10 +343,10 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 #   else
 #       error "Unknown Apple platform"
 #   endif
-#elif defined(__linux)
+#elif defined(__linux__) || defined(__linux)
 #   define C4_UNIX
 #   define C4_LINUX
-#elif defined(__unix)
+#elif defined(__unix__) || defined(__unix)
 #   define C4_UNIX
 #elif defined(__arm__) || defined(__aarch64__)
 #   define C4_ARM
@@ -352,7 +356,7 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 #   error "unknown platform"
 #endif
 
-#if defined(__posix) || defined(__unix__) || defined(__linux)
+#if defined(__posix) || defined(C4_UNIX) || defined(C4_LINUX)
 #   define C4_POSIX
 #endif
 
@@ -363,7 +367,7 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 // (end https://github.com/biojppm/c4core/src/c4/platform.hpp)
 
 
-#if 0
+
 //********************************************************************************
 //--------------------------------------------------------------------------------
 // src/c4/cpu.hpp
@@ -416,21 +420,25 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
    #else
        #define C4_CPU_ARM
        #define C4_WORDSIZE 4
-       #if defined(__ARM_ARCH_8__) || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 8)
+       #if defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__)  \
+        || (defined(__ARCH_ARM) && __ARCH_ARM >= 8)
+        || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 8)  \
            #define C4_CPU_ARMV8
        #elif defined(__ARM_ARCH_7__) || defined(_ARM_ARCH_7)    \
         || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) \
         || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) \
+        || defined(__ARM_ARCH_7EM__) \
         || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 7) \
         || (defined(_M_ARM) && _M_ARM >= 7)
            #define C4_CPU_ARMV7
        #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
         || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) \
         || defined(__ARM_ARCH_6K__)  || defined(__ARM_ARCH_6ZK__) \
-        || defined(__ARM_ARCH_6M__) \
+        || defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6KZ__) \
         || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 6)
            #define C4_CPU_ARMV6
        #elif defined(__ARM_ARCH_5TEJ__) \
+        || defined(__ARM_ARCH_5TE__) \
         || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 5)
            #define C4_CPU_ARMV5
        #elif defined(__ARM_ARCH_4T__) \
@@ -441,7 +449,11 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
        #endif
    #endif
    #if defined(__ARMEL__) || defined(__LITTLE_ENDIAN__) || defined(__AARCH64EL__) \
-       || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+       || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) \
+       || defined(_MSC_VER) // winarm64 does not provide any of the above macros,
+                            // but advises little-endianess:
+                            // https://docs.microsoft.com/en-us/cpp/build/overview-of-arm-abi-conventions?view=msvc-170
+                            // So if it is visual studio compiling, we'll assume little endian.
        #define C4_BYTE_ORDER _C4EL
    #elif defined(__ARMEB__) || defined(__BIG_ENDIAN__) || defined(__AARCH64EB__) \
        || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
@@ -505,7 +517,7 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 
 
 // (end https://github.com/biojppm/c4core/src/c4/cpu.hpp)
-#endif
+
 
 
 //********************************************************************************
@@ -628,9 +640,9 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 // amalgamate: removed include of
 // https://github.com/biojppm/c4core/src/c4/gcc-4.8.hpp
 //#               include "c4/gcc-4.8.hpp"
-#if !defined(C4_GCC-4_8_HPP_) && !defined(_C4_GCC-4_8_HPP_)
+#if !defined(C4_GCC_4_8_HPP_) && !defined(_C4_GCC_4_8_HPP_)
 #error "amalgamate: file c4/gcc-4.8.hpp must have been included at this point"
-#endif /* C4_GCC-4_8_HPP_ */
+#endif /* C4_GCC_4_8_HPP_ */
 
 #           else
 // we do not support GCC < 4.8:
@@ -952,6 +964,8 @@ typedef long double max_align_t ;
 #   define C4_RESTRICT_FN __attribute__((restrict))
 #   define C4_NO_INLINE __attribute__((noinline))
 #   define C4_ALWAYS_INLINE inline __attribute__((always_inline))
+#   define C4_CONST __attribute__((const))
+#   define C4_PURE __attribute__((pure))
 /** force inlining of every callee function */
 #   define C4_FLATTEN __atribute__((flatten))
 /** mark a function as hot, ie as having a visible impact in CPU time
@@ -973,6 +987,8 @@ typedef long double max_align_t ;
 #   define C4_NO_INLINE __declspec(noinline)
 #   define C4_ALWAYS_INLINE inline __forceinline
 /** these are not available in VS AFAIK */
+#   define C4_CONST
+#   define C4_PURE
 #   define C4_FLATTEN
 #   define C4_HOT         /** @todo */
 #   define C4_COLD        /** @todo */
@@ -1618,9 +1634,9 @@ using index_sequence_for = make_index_sequence<sizeof...(_Tp)>;
 // amalgamate: removed include of
 // https://github.com/biojppm/c4core/src/c4/cpu.hpp
 //#include "c4/cpu.hpp"
-//#if !defined(C4_CPU_HPP_) && !defined(_C4_CPU_HPP_)
-//#error "amalgamate: file c4/cpu.hpp must have been included at this point"
-//#endif /* C4_CPU_HPP_ */
+#if !defined(C4_CPU_HPP_) && !defined(_C4_CPU_HPP_)
+#error "amalgamate: file c4/cpu.hpp must have been included at this point"
+#endif /* C4_CPU_HPP_ */
 
 // amalgamate: removed include of
 // https://github.com/biojppm/c4core/src/c4/compiler.hpp
@@ -1750,8 +1766,8 @@ __inline__ static void trap_instruction(void)
 	/* Known problem:
 	 * Same problem and workaround as Thumb mode */
 }
-#elif defined(__aarch64__) && defined(__APPLE__)
-	#define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_BULTIN_TRAP
+#elif defined(__aarch64__) && defined(__APPLE__) && defined(__clang__)
+	#define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_BULTIN_DEBUGTRAP
 #elif defined(__aarch64__)
 	#define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION
 __attribute__((always_inline))
@@ -1911,7 +1927,7 @@ struct fail_type__ {};
 #endif // _DOXYGEN_
 
 
-#ifdef NDEBUG
+#if defined(NDEBUG) || defined(C4_NO_DEBUG_BREAK)
 #   define C4_DEBUG_BREAK()
 #else
 #   ifdef __clang__
@@ -2312,10 +2328,46 @@ struct srcloc
 #error "amalgamate: file c4/config.hpp must have been included at this point"
 #endif /* C4_CONFIG_HPP_ */
 
+// amalgamate: removed include of
+// https://github.com/biojppm/c4core/src/c4/error.hpp
+//#include "c4/error.hpp"
+#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_)
+#error "amalgamate: file c4/error.hpp must have been included at this point"
+#endif /* C4_ERROR_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/c4core/src/c4/compiler.hpp
+//#include "c4/compiler.hpp"
+#if !defined(C4_COMPILER_HPP_) && !defined(_C4_COMPILER_HPP_)
+#error "amalgamate: file c4/compiler.hpp must have been included at this point"
+#endif /* C4_COMPILER_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/c4core/src/c4/cpu.hpp
+//#include "c4/cpu.hpp"
+#if !defined(C4_CPU_HPP_) && !defined(_C4_CPU_HPP_)
+#error "amalgamate: file c4/cpu.hpp must have been included at this point"
+#endif /* C4_CPU_HPP_ */
 
+#ifdef C4_MSVC
+#include <intrin.h>
+#endif
 //included above:
 //#include <string.h>
 
+#if (defined(__GNUC__) && __GNUC__ >= 10) || defined(__has_builtin)
+#define _C4_USE_LSB_INTRINSIC(which) __has_builtin(which)
+#define _C4_USE_MSB_INTRINSIC(which) __has_builtin(which)
+#elif defined(C4_MSVC)
+#define _C4_USE_LSB_INTRINSIC(which) true
+#define _C4_USE_MSB_INTRINSIC(which) true
+#else
+// let's try our luck
+#define _C4_USE_LSB_INTRINSIC(which) true
+#define _C4_USE_MSB_INTRINSIC(which) true
+#endif
+
+
 /** @file memory_util.hpp Some memory utilities. */
 
 namespace c4 {
@@ -2338,7 +2390,11 @@ C4_ALWAYS_INLINE void mem_zero(T* mem)
     memset(mem, 0, sizeof(T));
 }
 
-bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb);
+C4_ALWAYS_INLINE C4_CONST bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb)
+{
+    // thanks @timwynants
+    return (((const char*)b + szb) > a && b < ((const char*)a+sza));
+}
 
 void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num_times);
 
@@ -2348,9 +2404,9 @@ void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num
 //-----------------------------------------------------------------------------
 
 template<class T>
-bool is_aligned(T *ptr, size_t alignment=alignof(T))
+C4_ALWAYS_INLINE C4_CONST bool is_aligned(T *ptr, uintptr_t alignment=alignof(T))
 {
-    return (uintptr_t(ptr) & (alignment - 1)) == 0u;
+    return (uintptr_t(ptr) & (alignment - uintptr_t(1))) == uintptr_t(0);
 }
 
 
@@ -2359,38 +2415,165 @@ bool is_aligned(T *ptr, size_t alignment=alignof(T))
 //-----------------------------------------------------------------------------
 // least significant bit
 
-/** least significant bit; this function is constexpr-14 because of the local
- * variable */
+/** @name msb Compute the least significant bit
+ * @note the input value must be nonzero
+ * @note the input type must be unsigned
+ */
+/** @{ */
+
+// https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightLinear
+#define _c4_lsb_fallback                                                \
+    unsigned c = 0;                                                     \
+    v = (v ^ (v - 1)) >> 1; /* Set v's trailing 0s to 1s and zero rest */ \
+    for(; v; ++c)                                                       \
+        v >>= 1;                                                        \
+    return (unsigned) c
+
+// u8
 template<class I>
-C4_CONSTEXPR14 I lsb(I v)
-{
-    if(!v) return 0;
-    I b = 0;
-    while((v & I(1)) == I(0))
-    {
-        v >>= 1;
-        ++b;
-    }
-    return b;
+C4_CONSTEXPR14
+auto lsb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 1u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_LSB_INTRINSIC(__builtin_ctz)
+        // upcast to use the intrinsic, it's cheaper.
+        #ifdef C4_MSVC
+            #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanForward(&bit, (unsigned long)v);
+                return bit;
+            #else
+                _c4_lsb_fallback;
+            #endif
+        #else
+            return (unsigned)__builtin_ctz((unsigned)v);
+        #endif
+    #else
+        _c4_lsb_fallback;
+    #endif
 }
 
-namespace detail {
+// u16
+template<class I>
+C4_CONSTEXPR14
+auto lsb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 2u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_LSB_INTRINSIC(__builtin_ctz)
+        // upcast to use the intrinsic, it's cheaper.
+        // Then remember that the upcast makes it to 31bits
+        #ifdef C4_MSVC
+            #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanForward(&bit, (unsigned long)v);
+                return bit;
+            #else
+                _c4_lsb_fallback;
+            #endif
+        #else
+            return (unsigned)__builtin_ctz((unsigned)v);
+        #endif
+    #else
+        _c4_lsb_fallback;
+    #endif
+}
+
+// u32
+template<class I>
+C4_CONSTEXPR14
+auto lsb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 4u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_LSB_INTRINSIC(__builtin_ctz)
+        #ifdef C4_MSVC
+            #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanForward(&bit, v);
+                return bit;
+            #else
+                _c4_lsb_fallback;
+            #endif
+        #else
+            return (unsigned)__builtin_ctz((unsigned)v);
+        #endif
+    #else
+        _c4_lsb_fallback;
+    #endif
+}
 
-template<class I, I val, I num_bits, bool finished>
-struct _lsb11;
+// u64 in 64bits
+template<class I>
+C4_CONSTEXPR14
+auto lsb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 8u && sizeof(unsigned long) == 8u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_LSB_INTRINSIC(__builtin_ctzl)
+        #if defined(C4_MSVC)
+            #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanForward64(&bit, v);
+                return bit;
+            #else
+                _c4_lsb_fallback;
+            #endif
+        #else
+            return (unsigned)__builtin_ctzl((unsigned long)v);
+        #endif
+    #else
+        _c4_lsb_fallback;
+    #endif
+}
 
-template<class I, I val, I num_bits>
-struct _lsb11< I, val, num_bits, false>
+// u64 in 32bits
+template<class I>
+C4_CONSTEXPR14
+auto lsb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 8u && sizeof(unsigned long long) == 8u && sizeof(unsigned long) != sizeof(unsigned long long), unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_LSB_INTRINSIC(__builtin_ctzll)
+        #if defined(C4_MSVC)
+            #if !defined(C4_CPU_X86) && !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanForward64(&bit, v);
+                return bit;
+            #else
+                _c4_lsb_fallback;
+            #endif
+        #else
+            return (unsigned)__builtin_ctzll((unsigned long long)v);
+        #endif
+    #else
+        _c4_lsb_fallback;
+    #endif
+}
+
+#undef _c4_lsb_fallback
+
+/** @} */
+
+
+namespace detail {
+template<class I, I val, unsigned num_bits, bool finished> struct _lsb11;
+template<class I, I val, unsigned num_bits>
+struct _lsb11<I, val, num_bits, false>
 {
-    enum : I { num = _lsb11<I, (val>>1), num_bits+I(1), (((val>>1)&I(1))!=I(0))>::num };
+    enum : unsigned { num = _lsb11<I, (val>>1), num_bits+I(1), (((val>>1)&I(1))!=I(0))>::num };
 };
-
-template<class I, I val, I num_bits>
+template<class I, I val, unsigned num_bits>
 struct _lsb11<I, val, num_bits, true>
 {
-    enum : I { num = num_bits };
+    enum : unsigned { num = num_bits };
 };
-
 } // namespace detail
 
 
@@ -2402,7 +2585,7 @@ template<class I, I number>
 struct lsb11
 {
     static_assert(number != 0, "lsb: number must be nonzero");
-    enum : I { value = detail::_lsb11<I, number, 0, ((number&I(1))!=I(0))>::num};
+    enum : unsigned { value = detail::_lsb11<I, number, 0, ((number&I(1))!=I(0))>::num};
 };
 
 
@@ -2411,51 +2594,199 @@ struct lsb11
 //-----------------------------------------------------------------------------
 // most significant bit
 
-/** most significant bit; this function is constexpr-14 because of the local
- * variable
- * @todo implement faster version
- * @see https://stackoverflow.com/questions/2589096/find-most-significant-bit-left-most-that-is-set-in-a-bit-array
+
+/** @name msb Compute the most significant bit
+ * @note the input value must be nonzero
+ * @note the input type must be unsigned
  */
+/** @{ */
+
+
+#define _c4_msb8_fallback                       \
+    unsigned n = 0;                             \
+    if(v & I(0xf0)) v >>= 4, n |= I(4);         \
+    if(v & I(0x0c)) v >>= 2, n |= I(2);         \
+    if(v & I(0x02)) v >>= 1, n |= I(1);         \
+    return n
+
+#define _c4_msb16_fallback                      \
+    unsigned n = 0;                             \
+    if(v & I(0xff00)) v >>= 8, n |= I(8);       \
+    if(v & I(0x00f0)) v >>= 4, n |= I(4);       \
+    if(v & I(0x000c)) v >>= 2, n |= I(2);       \
+    if(v & I(0x0002)) v >>= 1, n |= I(1);       \
+    return n
+
+#define _c4_msb32_fallback                      \
+    unsigned n = 0;                             \
+    if(v & I(0xffff0000)) v >>= 16, n |= 16;    \
+    if(v & I(0x0000ff00)) v >>= 8, n |= 8;      \
+    if(v & I(0x000000f0)) v >>= 4, n |= 4;      \
+    if(v & I(0x0000000c)) v >>= 2, n |= 2;      \
+    if(v & I(0x00000002)) v >>= 1, n |= 1;      \
+    return n
+
+#define _c4_msb64_fallback                              \
+    unsigned n = 0;                                     \
+    if(v & I(0xffffffff00000000)) v >>= 32, n |= I(32); \
+    if(v & I(0x00000000ffff0000)) v >>= 16, n |= I(16); \
+    if(v & I(0x000000000000ff00)) v >>= 8, n |= I(8);   \
+    if(v & I(0x00000000000000f0)) v >>= 4, n |= I(4);   \
+    if(v & I(0x000000000000000c)) v >>= 2, n |= I(2);   \
+    if(v & I(0x0000000000000002)) v >>= 1, n |= I(1);   \
+    return n
+
+
+// u8
 template<class I>
-C4_CONSTEXPR14 I msb(I v)
-{
-    // TODO:
-    //
-    //int n;
-    //if(input_num & uint64_t(0xffffffff00000000)) input_num >>= 32, n |= 32;
-    //if(input_num & uint64_t(        0xffff0000)) input_num >>= 16, n |= 16;
-    //if(input_num & uint64_t(            0xff00)) input_num >>=  8, n |=  8;
-    //if(input_num & uint64_t(              0xf0)) input_num >>=  4, n |=  4;
-    //if(input_num & uint64_t(               0xc)) input_num >>=  2, n |=  2;
-    //if(input_num & uint64_t(               0x2)) input_num >>=  1, n |=  1;
-    if(!v) return static_cast<I>(-1);
-    I b = 0;
-    while(v != 0)
-    {
-        v >>= 1;
-        ++b;
-    }
-    return b-1;
+C4_CONSTEXPR14
+auto msb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 1u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_MSB_INTRINSIC(__builtin_clz)
+        // upcast to use the intrinsic, it's cheaper.
+        // Then remember that the upcast makes it to 31bits
+        #ifdef C4_MSVC
+            #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanReverse(&bit, (unsigned long)v);
+                return bit;
+            #else
+                _c4_msb8_fallback;
+            #endif
+        #else
+            return 31u - (unsigned)__builtin_clz((unsigned)v);
+        #endif
+    #else
+        _c4_msb8_fallback;
+    #endif
 }
 
-namespace detail {
+// u16
+template<class I>
+C4_CONSTEXPR14
+auto msb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 2u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_MSB_INTRINSIC(__builtin_clz)
+        // upcast to use the intrinsic, it's cheaper.
+        // Then remember that the upcast makes it to 31bits
+        #ifdef C4_MSVC
+            #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanReverse(&bit, (unsigned long)v);
+                return bit;
+            #else
+                _c4_msb16_fallback;
+            #endif
+        #else
+            return 31u - (unsigned)__builtin_clz((unsigned)v);
+        #endif
+    #else
+        _c4_msb16_fallback;
+    #endif
+}
 
-template<class I, I val, I num_bits, bool finished>
-struct _msb11;
+// u32
+template<class I>
+C4_CONSTEXPR14
+auto msb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 4u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_MSB_INTRINSIC(__builtin_clz)
+        #ifdef C4_MSVC
+            #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanReverse(&bit, v);
+                return bit;
+            #else
+                _c4_msb32_fallback;
+            #endif
+        #else
+            return 31u - (unsigned)__builtin_clz((unsigned)v);
+        #endif
+    #else
+        _c4_msb32_fallback;
+    #endif
+}
+
+// u64 in 64bits
+template<class I>
+C4_CONSTEXPR14
+auto msb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 8u && sizeof(unsigned long) == 8u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_MSB_INTRINSIC(__builtin_clzl)
+        #ifdef C4_MSVC
+            #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanReverse64(&bit, v);
+                return bit;
+            #else
+                _c4_msb64_fallback;
+            #endif
+        #else
+            return 63u - (unsigned)__builtin_clzl((unsigned long)v);
+        #endif
+    #else
+        _c4_msb64_fallback;
+    #endif
+}
 
+// u64 in 32bits
+template<class I>
+C4_CONSTEXPR14
+auto msb(I v) noexcept
+    -> typename std::enable_if<sizeof(I) == 8u && sizeof(unsigned long long) == 8u && sizeof(unsigned long) != sizeof(unsigned long long), unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_unsigned<I>::value);
+    C4_ASSERT(v != 0);
+    #if _C4_USE_MSB_INTRINSIC(__builtin_clzll)
+        #ifdef C4_MSVC
+            #if !defined(C4_CPU_X86) && !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM)
+                unsigned long bit;
+                _BitScanReverse64(&bit, v);
+                return bit;
+            #else
+                _c4_msb64_fallback;
+            #endif
+        #else
+            return 63u - (unsigned)__builtin_clzll((unsigned long long)v);
+        #endif
+    #else
+        _c4_msb64_fallback;
+    #endif
+}
+
+#undef _c4_msb8_fallback
+#undef _c4_msb16_fallback
+#undef _c4_msb32_fallback
+#undef _c4_msb64_fallback
+
+/** @} */
+
+
+namespace detail {
+template<class I, I val, I num_bits, bool finished> struct _msb11;
 template<class I, I val, I num_bits>
 struct _msb11< I, val, num_bits, false>
 {
-    enum : I { num = _msb11<I, (val>>1), num_bits+I(1), ((val>>1)==I(0))>::num };
+    enum : unsigned { num = _msb11<I, (val>>1), num_bits+I(1), ((val>>1)==I(0))>::num };
 };
-
 template<class I, I val, I num_bits>
 struct _msb11<I, val, num_bits, true>
 {
     static_assert(val == 0, "bad implementation");
-    enum : I { num = num_bits-1 };
+    enum : unsigned { num = (unsigned)(num_bits-1) };
 };
-
 } // namespace detail
 
 
@@ -2466,10 +2797,126 @@ struct _msb11<I, val, num_bits, true>
 template<class I, I number>
 struct msb11
 {
-    enum : I { value = detail::_msb11<I, number, 0, (number==I(0))>::num };
+    enum : unsigned { value = detail::_msb11<I, number, 0, (number==I(0))>::num };
 };
 
 
+
+#undef _C4_USE_LSB_INTRINSIC
+#undef _C4_USE_MSB_INTRINSIC
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+// there is an implicit conversion below; it happens when E or B are
+// narrower than int, and thus any operation will upcast the result to
+// int, and then downcast to assign
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wconversion")
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(B base, E exponent) noexcept -> typename std::enable_if<std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    if(exponent >= 0)
+    {
+        for(E e = 0; e < exponent; ++e)
+            r *= base;
+    }
+    else
+    {
+        exponent *= E(-1);
+        for(E e = 0; e < exponent; ++e)
+            r /= base;
+    }
+    return r;
+}
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, B base, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if<std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    if(exponent >= 0)
+    {
+        for(E e = 0; e < exponent; ++e)
+            r *= base;
+    }
+    else
+    {
+        exponent *= E(-1);
+        for(E e = 0; e < exponent; ++e)
+            r /= base;
+    }
+    return r;
+}
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, class Base, Base base, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if<std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    B bbase = B(base);
+    if(exponent >= 0)
+    {
+        for(E e = 0; e < exponent; ++e)
+            r *= bbase;
+    }
+    else
+    {
+        exponent *= E(-1);
+        for(E e = 0; e < exponent; ++e)
+            r /= bbase;
+    }
+    return r;
+}
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(B base, E exponent) noexcept -> typename std::enable_if<!std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    for(E e = 0; e < exponent; ++e)
+        r *= base;
+    return r;
+}
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, B base, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if<!std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    for(E e = 0; e < exponent; ++e)
+        r *= base;
+    return r;
+}
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, class Base, Base base, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if<!std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    B bbase = B(base);
+    for(E e = 0; e < exponent; ++e)
+        r *= bbase;
+    return r;
+}
+
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
+
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -2480,10 +2927,9 @@ template<class I>
 C4_CONSTEXPR14 I contiguous_mask(I first_bit, I last_bit)
 {
     I r = 0;
-    constexpr const I o = 1;
     for(I i = first_bit; i < last_bit; ++i)
     {
-        r |= (o << i);
+        r |= (I(1) << i);
     }
     return r;
 }
@@ -4852,28 +5298,28 @@ public:
     /** @name Standard accessor methods */
     /** @{ */
 
-    bool   has_str()   const { return ! empty() && str[0] != C(0); }
-    bool   empty()     const { return (len == 0 || str == nullptr); }
-    bool   not_empty() const { return (len != 0 && str != nullptr); }
-    size_t size()      const { return len; }
+    C4_ALWAYS_INLINE C4_PURE bool   has_str()   const noexcept { return ! empty() && str[0] != C(0); }
+    C4_ALWAYS_INLINE C4_PURE bool   empty()     const noexcept { return (len == 0 || str == nullptr); }
+    C4_ALWAYS_INLINE C4_PURE bool   not_empty() const noexcept { return (len != 0 && str != nullptr); }
+    C4_ALWAYS_INLINE C4_PURE size_t size()      const noexcept { return len; }
 
-    iterator begin() { return str; }
-    iterator end  () { return str + len; }
+    C4_ALWAYS_INLINE C4_PURE iterator begin() noexcept { return str; }
+    C4_ALWAYS_INLINE C4_PURE iterator end  () noexcept { return str + len; }
 
-    const_iterator begin() const { return str; }
-    const_iterator end  () const { return str + len; }
+    C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { return str; }
+    C4_ALWAYS_INLINE C4_PURE const_iterator end  () const noexcept { return str + len; }
 
-    C      * data()       { return str; }
-    C const* data() const { return str; }
+    C4_ALWAYS_INLINE C4_PURE C      * data()       noexcept { return str; }
+    C4_ALWAYS_INLINE C4_PURE C const* data() const noexcept { return str; }
 
-    inline C      & operator[] (size_t i)       { C4_ASSERT(i >= 0 && i < len); return str[i]; }
-    inline C const& operator[] (size_t i) const { C4_ASSERT(i >= 0 && i < len); return str[i]; }
+    C4_ALWAYS_INLINE C4_PURE C      & operator[] (size_t i)       noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
+    C4_ALWAYS_INLINE C4_PURE C const& operator[] (size_t i) const noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
 
-    inline C      & front()       { C4_ASSERT(len > 0 && str != nullptr); return *str; }
-    inline C const& front() const { C4_ASSERT(len > 0 && str != nullptr); return *str; }
+    C4_ALWAYS_INLINE C4_PURE C      & front()       noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
+    C4_ALWAYS_INLINE C4_PURE C const& front() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
 
-    inline C      & back()       { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
-    inline C const& back() const { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
+    C4_ALWAYS_INLINE C4_PURE C      & back()       noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
+    C4_ALWAYS_INLINE C4_PURE C const& back() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
 
     /** @} */
 
@@ -4882,28 +5328,35 @@ public:
     /** @name Comparison methods */
     /** @{ */
 
-    int compare(C const c) const
+    C4_PURE int compare(C const c) const noexcept
     {
         C4_XASSERT((str != nullptr) || len == 0);
-        if( ! len)
+        if(C4_LIKELY(str != nullptr && len > 0))
+            return (*str != c) ? *str - c : (static_cast<int>(len) - 1);
+        else
             return -1;
-        if(*str == c)
-            return static_cast<int>(len - 1);
-        return *str - c;
     }
 
-    int compare(const char *that, size_t sz) const
+    C4_PURE int compare(const char *C4_RESTRICT that, size_t sz) const noexcept
     {
         C4_XASSERT(that || sz  == 0);
         C4_XASSERT(str  || len == 0);
         if(C4_LIKELY(str && that))
         {
-            int ret = strncmp(str, that, len < sz ? len : sz);
-            if(ret == 0 && len != sz)
-                ret = len < sz ? -1 : 1;
-            return ret;
+            {
+                const size_t min = len < sz ? len : sz;
+                for(size_t i = 0; i < min; ++i)
+                    if(str[i] != that[i])
+                        return str[i] < that[i] ? -1 : 1;
+            }
+            if(len < sz)
+                return -1;
+            else if(len == sz)
+                return 0;
+            else
+                return 1;
         }
-        if((!str && !that) || (len == sz))
+        else if(len == sz)
         {
             C4_XASSERT(len == 0 && sz == 0);
             return 0;
@@ -4911,31 +5364,31 @@ public:
         return len < sz ? -1 : 1;
     }
 
-    C4_ALWAYS_INLINE int compare(ro_substr const that) const { return this->compare(that.str, that.len); }
+    C4_ALWAYS_INLINE C4_PURE int compare(ro_substr const that) const noexcept { return this->compare(that.str, that.len); }
 
-    C4_ALWAYS_INLINE bool operator== (std::nullptr_t) const { return str == nullptr || len == 0; }
-    C4_ALWAYS_INLINE bool operator!= (std::nullptr_t) const { return str != nullptr || len == 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return str == nullptr; }
+    C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return str != nullptr; }
 
-    C4_ALWAYS_INLINE bool operator== (C const c) const { return this->compare(c) == 0; }
-    C4_ALWAYS_INLINE bool operator!= (C const c) const { return this->compare(c) != 0; }
-    C4_ALWAYS_INLINE bool operator<  (C const c) const { return this->compare(c) <  0; }
-    C4_ALWAYS_INLINE bool operator>  (C const c) const { return this->compare(c) >  0; }
-    C4_ALWAYS_INLINE bool operator<= (C const c) const { return this->compare(c) <= 0; }
-    C4_ALWAYS_INLINE bool operator>= (C const c) const { return this->compare(c) >= 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator== (C const c) const noexcept { return this->compare(c) == 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator!= (C const c) const noexcept { return this->compare(c) != 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator<  (C const c) const noexcept { return this->compare(c) <  0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator>  (C const c) const noexcept { return this->compare(c) >  0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator<= (C const c) const noexcept { return this->compare(c) <= 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator>= (C const c) const noexcept { return this->compare(c) >= 0; }
 
-    template<class U> C4_ALWAYS_INLINE bool operator== (basic_substring<U> const that) const { return this->compare(that) == 0; }
-    template<class U> C4_ALWAYS_INLINE bool operator!= (basic_substring<U> const that) const { return this->compare(that) != 0; }
-    template<class U> C4_ALWAYS_INLINE bool operator<  (basic_substring<U> const that) const { return this->compare(that) <  0; }
-    template<class U> C4_ALWAYS_INLINE bool operator>  (basic_substring<U> const that) const { return this->compare(that) >  0; }
-    template<class U> C4_ALWAYS_INLINE bool operator<= (basic_substring<U> const that) const { return this->compare(that) <= 0; }
-    template<class U> C4_ALWAYS_INLINE bool operator>= (basic_substring<U> const that) const { return this->compare(that) >= 0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator== (basic_substring<U> const that) const noexcept { return this->compare(that) == 0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator!= (basic_substring<U> const that) const noexcept { return this->compare(that) != 0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator<  (basic_substring<U> const that) const noexcept { return this->compare(that) <  0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator>  (basic_substring<U> const that) const noexcept { return this->compare(that) >  0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator<= (basic_substring<U> const that) const noexcept { return this->compare(that) <= 0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator>= (basic_substring<U> const that) const noexcept { return this->compare(that) >= 0; }
 
-    template<size_t N> C4_ALWAYS_INLINE bool operator== (const char (&that)[N]) const { return this->compare(that, N-1) == 0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator!= (const char (&that)[N]) const { return this->compare(that, N-1) != 0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator<  (const char (&that)[N]) const { return this->compare(that, N-1) <  0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator>  (const char (&that)[N]) const { return this->compare(that, N-1) >  0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator<= (const char (&that)[N]) const { return this->compare(that, N-1) <= 0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator>= (const char (&that)[N]) const { return this->compare(that, N-1) >= 0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator== (const char (&that)[N]) const noexcept { return this->compare(that, N-1) == 0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator!= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) != 0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator<  (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <  0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator>  (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >  0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator<= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <= 0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator>= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >= 0; }
 
     /** @} */
 
@@ -4945,39 +5398,38 @@ public:
     /** @{ */
 
     /** true if *this is a substring of that (ie, from the same buffer) */
-    inline bool is_sub(ro_substr const that) const
+    C4_ALWAYS_INLINE C4_PURE bool is_sub(ro_substr const that) const noexcept
     {
         return that.is_super(*this);
     }
 
     /** true if that is a substring of *this (ie, from the same buffer) */
-    inline bool is_super(ro_substr const that) const
+    C4_ALWAYS_INLINE C4_PURE bool is_super(ro_substr const that) const noexcept
     {
-        if(C4_UNLIKELY(len == 0))
-        {
+        if(C4_LIKELY(len > 0))
+            return that.str >= str && that.str+that.len <= str+len;
+        else
             return that.len == 0 && that.str == str && str != nullptr;
-        }
-        return that.begin() >= begin() && that.end() <= end();
     }
 
     /** true if there is overlap of at least one element between that and *this */
-    inline bool overlaps(ro_substr const that) const
+    C4_ALWAYS_INLINE C4_PURE bool overlaps(ro_substr const that) const noexcept
     {
         // thanks @timwynants
-        return (that.end() > begin() && that.begin() < end());
+        return that.str+that.len > str && that.str < str+len;
     }
 
 public:
 
     /** return [first,len[ */
-    basic_substring sub(size_t first) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first) const noexcept
     {
         C4_ASSERT(first >= 0 && first <= len);
         return basic_substring(str + first, len - first);
     }
 
     /** return [first,first+num[. If num==npos, return [first,len[ */
-    basic_substring sub(size_t first, size_t num) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first, size_t num) const noexcept
     {
         C4_ASSERT(first >= 0 && first <= len);
         C4_ASSERT((num >= 0 && num <= len) || (num == npos));
@@ -4987,7 +5439,7 @@ public:
     }
 
     /** return [first,last[. If last==npos, return [first,len[ */
-    basic_substring range(size_t first, size_t last=npos) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring range(size_t first, size_t last=npos) const noexcept
     {
         C4_ASSERT(first >= 0 && first <= len);
         last = last != npos ? last : len;
@@ -4996,24 +5448,26 @@ public:
         return basic_substring(str + first, last - first);
     }
 
-    /** return [0,num[*/
-    basic_substring first(size_t num) const
+    /** return the first @p num elements: [0,num[*/
+    C4_ALWAYS_INLINE C4_PURE basic_substring first(size_t num) const noexcept
     {
-        return sub(0, num);
+        C4_ASSERT(num <= len || num == npos);
+        return basic_substring(str, num != npos ? num : len);
     }
 
-    /** return [len-num,len[*/
-    basic_substring last(size_t num) const
+    /** return the last @num elements: [len-num,len[*/
+    C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept
     {
-        if(num == npos)
-            return *this;
-        return sub(len - num);
+        C4_ASSERT(num <= len || num == npos);
+        return num != npos ?
+            basic_substring(str + len - num, num) :
+            *this;
     }
 
     /** offset from the ends: return [left,len-right[ ; ie, trim a
         number of characters from the left and right. This is
         equivalent to python's negative list indices. */
-    basic_substring offs(size_t left, size_t right) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring offs(size_t left, size_t right) const noexcept
     {
         C4_ASSERT(left  >= 0 && left  <= len);
         C4_ASSERT(right >= 0 && right <= len);
@@ -5021,27 +5475,47 @@ public:
         return basic_substring(str + left, len - right - left);
     }
 
-    /** return [0, pos+include_pos[ */
-    basic_substring left_of(size_t pos, bool include_pos=false) const
+    /** return [0, pos[ . Same as .first(pos), but provided for compatibility with .right_of() */
+    C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos) const noexcept
     {
-        if(pos == npos)
-            return *this;
-        return first(pos + include_pos);
+        C4_ASSERT(pos <= len || pos == npos);
+        return (pos != npos) ?
+            basic_substring(str, pos) :
+            *this;
+    }
+
+    /** return [0, pos+include_pos[ . Same as .first(pos+1), but provided for compatibility with .right_of() */
+    C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos, bool include_pos) const noexcept
+    {
+        C4_ASSERT(pos <= len || pos == npos);
+        return (pos != npos) ?
+            basic_substring(str, pos+include_pos) :
+            *this;
+    }
+
+    /** return [pos+1, len[ */
+    C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos) const noexcept
+    {
+        C4_ASSERT(pos <= len || pos == npos);
+        return (pos != npos) ?
+            basic_substring(str + (pos + 1), len - (pos + 1)) :
+            basic_substring(str + len, size_t(0));
     }
 
     /** return [pos+!include_pos, len[ */
-    basic_substring right_of(size_t pos, bool include_pos=false) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos, bool include_pos) const noexcept
     {
-        if(pos == npos)
-            return sub(len, 0);
-        return sub(pos + !include_pos);
+        C4_ASSERT(pos <= len || pos == npos);
+        return (pos != npos) ?
+            basic_substring(str + (pos + !include_pos), len - (pos + !include_pos)) :
+            basic_substring(str + len, size_t(0));
     }
 
 public:
 
     /** given @p subs a substring of the current string, get the
      * portion of the current string to the left of it */
-    basic_substring left_of(ro_substr const subs) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring left_of(ro_substr const subs) const noexcept
     {
         C4_ASSERT(is_super(subs) || subs.empty());
         auto ssb = subs.begin();
@@ -5055,7 +5529,7 @@ public:
 
     /** given @p subs a substring of the current string, get the
      * portion of the current string to the right of it */
-    basic_substring right_of(ro_substr const subs) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring right_of(ro_substr const subs) const noexcept
     {
         C4_ASSERT(is_super(subs) || subs.empty());
         auto sse = subs.end();
@@ -5717,55 +6191,64 @@ public:
     basic_substring _first_integral_span(size_t skip_start) const
     {
         C4_ASSERT(!empty());
-        if(skip_start == len) {
+        if(skip_start == len)
             return first(0);
-        }
         C4_ASSERT(skip_start < len);
-        if(first_of_any("0x", "0X")) // hexadecimal
+        if(len >= skip_start + 3)
         {
-            skip_start += 2;
-            if(len == skip_start)
-                return first(0);
-            for(size_t i = skip_start; i < len; ++i)
+            if(str[skip_start] != '0')
             {
-                if( ! _is_hex_char(str[i]))
-                    return _is_delim_char(str[i]) ? first(i) : first(0);
-            }
-        }
-        else if(first_of_any("0o", "0O")) // octal
-        {
-            skip_start += 2;
-            if(len == skip_start)
-                return first(0);
-            for(size_t i = skip_start; i < len; ++i)
-            {
-                char c = str[i];
-                if(c < '0' || c > '7')
-                    return _is_delim_char(str[i]) ? first(i) : first(0);
+                for(size_t i = skip_start; i < len; ++i)
+                {
+                    char c = str[i];
+                    if(c < '0' || c > '9')
+                        return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
+                }
             }
-        }
-        else if(first_of_any("0b", "0B")) // binary
-        {
-            skip_start += 2;
-            if(len == skip_start)
-                return first(0);
-            for(size_t i = skip_start; i < len; ++i)
+            else
             {
-                char c = str[i];
-                if(c != '0' && c != '1')
-                    return _is_delim_char(c) ? first(i) : first(0);
+                char next = str[skip_start + 1];
+                if(next == 'x' || next == 'X')
+                {
+                    skip_start += 2;
+                    for(size_t i = skip_start; i < len; ++i)
+                    {
+                        const char c = str[i];
+                        if( ! _is_hex_char(c))
+                            return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
+                    }
+                    return *this;
+                }
+                else if(next == 'b' || next == 'B')
+                {
+                    skip_start += 2;
+                    for(size_t i = skip_start; i < len; ++i)
+                    {
+                        const char c = str[i];
+                        if(c != '0' && c != '1')
+                            return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
+                    }
+                    return *this;
+                }
+                else if(next == 'o' || next == 'O')
+                {
+                    skip_start += 2;
+                    for(size_t i = skip_start; i < len; ++i)
+                    {
+                        const char c = str[i];
+                        if(c < '0' || c > '7')
+                            return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
+                    }
+                    return *this;
+                }
             }
         }
-        else // otherwise, decimal
+        // must be a decimal, or it is not a an number
+        for(size_t i = skip_start; i < len; ++i)
         {
-            if(len == skip_start)
-                return first(0);
-            for(size_t i = skip_start; i < len; ++i)
-            {
-                char c = str[i];
-                if(c < '0' || c > '9')
-                    return _is_delim_char(c) ? first(i) : first(0);
-            }
+            const char c = str[i];
+            if(c < '0' || c > '9')
+                return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
         }
         return *this;
     }
@@ -5776,125 +6259,436 @@ public:
         basic_substring ne = first_non_empty_span();
         if(ne.empty())
             return ne;
-        size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-') ? 1 : 0;
-        if(ne.first_of_any("0x", "0X")) // hexadecimal
-        {
-            skip_start += 2;
-            if(ne.len == skip_start)
-                return ne.first(0);
-            for(size_t i = skip_start; i < ne.len; ++i)
+        size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-');
+        C4_ASSERT(skip_start == 0 || skip_start == 1);
+        // if we have at least three digits after the leading sign, it
+        // can be decimal, or hex, or bin or oct. Ex:
+        // non-decimal: 0x0, 0b0, 0o0
+        // decimal: 1.0, 10., 1e1, 100, inf, nan, infinity
+        if(ne.len >= skip_start+3)
+        {
+            // if it does not have leading 0, it must be decimal, or it is not a real
+            if(ne.str[skip_start] != '0')
             {
-                char c = ne.str[i];
-                if(( ! _is_hex_char(c)) && c != '.' && c != 'p' && c != 'P')
+                if(ne.str[skip_start] == 'i') // is it infinity or inf?
                 {
-                    if(c == '-' || c == '+')
-                    {
-                        // we can also have a sign for the exponent
-                        if(i > 1 && (ne[i-1] == 'p' || ne[i-1] == 'P'))
-                        {
-                            continue;
-                        }
-                    }
-                    return _is_delim_char(c) ? ne.first(i) : ne.first(0);
+                    basic_substring word = ne._word_follows(skip_start + 1, "nfinity");
+                    if(word.len)
+                        return word;
+                    return ne._word_follows(skip_start + 1, "nf");
                 }
-            }
-        }
-        else if(ne.first_of_any("0b", "0B")) // binary
-        {
-            skip_start += 2;
-            if(ne.len == skip_start)
-                return ne.first(0);
-            for(size_t i = skip_start; i < ne.len; ++i)
-            {
-                char c = ne.str[i];
-                if(c != '0' && c != '1' && c != '.')
+                else if(ne.str[skip_start] == 'n') // is it nan?
                 {
-                    return _is_delim_char(c) ? ne.first(i) : ne.first(0);
+                    return ne._word_follows(skip_start + 1, "an");
                 }
-            }
-        }
-        else if(ne.first_of_any("0o", "0O")) // octal
-        {
-            skip_start += 2;
-            if(ne.len == skip_start)
-                return ne.first(0);
-            for(size_t i = skip_start; i < ne.len; ++i)
-            {
-                char c = ne.str[i];
-                if((c < '0' || c > '7') && c != '.')
+                else // must be a decimal, or it is not a real
                 {
-                    return _is_delim_char(c) ? ne.first(i) : ne.first(0);
+                    return ne._first_real_span_dec(skip_start);
                 }
             }
-        }
-        else // assume decimal
-        {
-            if(ne.len == skip_start)
-                return ne.first(0);
-            for(size_t i = skip_start; i < ne.len; ++i)
+            else // starts with 0. is it 0x, 0b or 0o?
             {
-                char c = ne.str[i];
-                if((c < '0' || c > '9') && (c != '.' && c != 'e' && c != 'E'))
-                {
-                    if(c == '-' || c == '+')
-                    {
-                        // we can also have a sign for the exponent
-                        if(i > 1 && (ne[i-1] == 'e' || ne[i-1] == 'E'))
-                        {
-                            continue;
-                        }
-                    }
-                    else if(i == skip_start)
-                    {
-                        if(c == 'i')
-                        {
-                            if(ne.len >= skip_start + 8 && ne.sub(skip_start, 8) == "infinity")
-                                return _is_delim_char(ne.str[skip_start + 8]) ? ne.first(skip_start + 8) : ne.first(0);
-                            else if(ne.len >= skip_start + 3 && ne.sub(skip_start, 3) == "inf")
-                                return _is_delim_char(ne.str[skip_start + 3]) ? ne.first(skip_start + 3) : ne.first(0);
-                            else
-                                return ne.first(0);
-                        }
-                        else if(c == 'n')
-                        {
-                            if(ne.len >= skip_start + 3 && ne.sub(skip_start, 3) == "nan")
-                                return _is_delim_char(ne.str[skip_start + 3]) ? ne.first(skip_start + 3) : ne.first(0);
-                            else
-                                return ne.first(0);
-                        }
-                        else
-                        {
-                            return ne.first(0);
-                        }
-                    }
-                    else
-                    {
-                        return _is_delim_char(c) ? ne.first(i) : ne.first(0);
-                    }
-                }
+                const char next = ne.str[skip_start + 1];
+                // hexadecimal
+                if(next == 'x' || next == 'X')
+                    return ne._first_real_span_hex(skip_start + 2);
+                // binary
+                else if(next == 'b' || next == 'B')
+                    return ne._first_real_span_bin(skip_start + 2);
+                // octal
+                else if(next == 'o' || next == 'O')
+                    return ne._first_real_span_oct(skip_start + 2);
+                // none of the above. may still be a decimal.
+                else
+                    return ne._first_real_span_dec(skip_start); // do not skip the 0.
             }
         }
-        return ne;
+        // less than 3 chars after the leading sign. It is either a
+        // decimal or it is not a real. (cannot be any of 0x0, etc).
+        return ne._first_real_span_dec(skip_start);
     }
 
     /** true if the character is a delimiter character *at the end* */
-    static constexpr C4_ALWAYS_INLINE bool _is_delim_char(char c) noexcept
+    static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_delim_char(char c) noexcept
     {
-        return c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\0'
+        return c == ' ' || c == '\n'
             || c == ']' || c == ')'  || c == '}'
-            || c == ',' || c == ';';
+            || c == ',' || c == ';' || c == '\r' || c == '\t' || c == '\0';
     }
 
     /** true if the character is in [0-9a-fA-F] */
-    static constexpr C4_ALWAYS_INLINE bool _is_hex_char(char c) noexcept
+    static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_hex_char(char c) noexcept
     {
         return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
     }
 
-    /** true if the character is in [0-9a-fA-F] */
-    static constexpr C4_ALWAYS_INLINE bool _is_oct_char(char c) noexcept
+    C4_NO_INLINE C4_PURE basic_substring _word_follows(size_t pos, csubstr word) const noexcept
     {
-        return (c >= '0' && c <= '7');
+        size_t posend = pos + word.len;
+        if(len >= posend && sub(pos, word.len) == word)
+            if(len == posend || _is_delim_char(str[posend]))
+                return first(posend);
+        return first(0);
+    }
+
+    // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
+    C4_NO_INLINE C4_PURE basic_substring _first_real_span_dec(size_t pos) const noexcept
+    {
+        bool intchars = false;
+        bool fracchars = false;
+        bool powchars;
+        // integral part
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+            {
+                intchars = true;
+            }
+            else if(c == '.')
+            {
+                ++pos;
+                goto fractional_part_dec;
+            }
+            else if(c == 'e' || c == 'E')
+            {
+                ++pos;
+                goto power_part_dec;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        // no . or p were found; this is either an integral number
+        // or not a number at all
+        return intchars ?
+            *this :
+            first(0);
+    fractional_part_dec:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == '.');
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+            {
+                fracchars = true;
+            }
+            else if(c == 'e' || c == 'E')
+            {
+                ++pos;
+                goto power_part_dec;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars || fracchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        return intchars || fracchars ?
+            *this :
+            first(0);
+    power_part_dec:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E');
+        // either a + or a - is expected here, followed by more chars.
+        // also, using (pos+1) in this check will cause an early
+        // return when no more chars follow the sign.
+        if(len <= (pos+1) || ((!intchars) && (!fracchars)))
+            return first(0);
+        ++pos; // this was the sign.
+        // ... so the (pos+1) ensures that we enter the loop and
+        // hence that there exist chars in the power part
+        powchars = false;
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+                powchars = true;
+            else if(powchars && _is_delim_char(c))
+                return first(pos);
+            else
+                return first(0);
+        }
+        return *this;
+    }
+
+    // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
+    C4_NO_INLINE C4_PURE basic_substring _first_real_span_hex(size_t pos) const noexcept
+    {
+        bool intchars = false;
+        bool fracchars = false;
+        bool powchars;
+        // integral part
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(_is_hex_char(c))
+            {
+                intchars = true;
+            }
+            else if(c == '.')
+            {
+                ++pos;
+                goto fractional_part_hex;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_hex;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        // no . or p were found; this is either an integral number
+        // or not a number at all
+        return intchars ?
+            *this :
+            first(0);
+    fractional_part_hex:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == '.');
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(_is_hex_char(c))
+            {
+                fracchars = true;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_hex;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars || fracchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        return intchars || fracchars ?
+            *this :
+            first(0);
+    power_part_hex:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
+        // either a + or a - is expected here, followed by more chars.
+        // also, using (pos+1) in this check will cause an early
+        // return when no more chars follow the sign.
+        if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
+            return first(0);
+        ++pos; // this was the sign.
+        // ... so the (pos+1) ensures that we enter the loop and
+        // hence that there exist chars in the power part
+        powchars = false;
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+                powchars = true;
+            else if(powchars && _is_delim_char(c))
+                return first(pos);
+            else
+                return first(0);
+        }
+        return *this;
+    }
+
+    // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
+    C4_NO_INLINE C4_PURE basic_substring _first_real_span_bin(size_t pos) const noexcept
+    {
+        bool intchars = false;
+        bool fracchars = false;
+        bool powchars;
+        // integral part
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c == '0' || c == '1')
+            {
+                intchars = true;
+            }
+            else if(c == '.')
+            {
+                ++pos;
+                goto fractional_part_bin;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_bin;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        // no . or p were found; this is either an integral number
+        // or not a number at all
+        return intchars ?
+            *this :
+            first(0);
+    fractional_part_bin:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == '.');
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c == '0' || c == '1')
+            {
+                fracchars = true;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_bin;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars || fracchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        return intchars || fracchars ?
+            *this :
+            first(0);
+    power_part_bin:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
+        // either a + or a - is expected here, followed by more chars.
+        // also, using (pos+1) in this check will cause an early
+        // return when no more chars follow the sign.
+        if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
+            return first(0);
+        ++pos; // this was the sign.
+        // ... so the (pos+1) ensures that we enter the loop and
+        // hence that there exist chars in the power part
+        powchars = false;
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+                powchars = true;
+            else if(powchars && _is_delim_char(c))
+                return first(pos);
+            else
+                return first(0);
+        }
+        return *this;
+    }
+
+    // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
+    C4_NO_INLINE C4_PURE basic_substring _first_real_span_oct(size_t pos) const noexcept
+    {
+        bool intchars = false;
+        bool fracchars = false;
+        bool powchars;
+        // integral part
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '7')
+            {
+                intchars = true;
+            }
+            else if(c == '.')
+            {
+                ++pos;
+                goto fractional_part_oct;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_oct;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        // no . or p were found; this is either an integral number
+        // or not a number at all
+        return intchars ?
+            *this :
+            first(0);
+    fractional_part_oct:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == '.');
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '7')
+            {
+                fracchars = true;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_oct;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars || fracchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        return intchars || fracchars ?
+            *this :
+            first(0);
+    power_part_oct:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
+        // either a + or a - is expected here, followed by more chars.
+        // also, using (pos+1) in this check will cause an early
+        // return when no more chars follow the sign.
+        if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
+            return first(0);
+        ++pos; // this was the sign.
+        // ... so the (pos+1) ensures that we enter the loop and
+        // hence that there exist chars in the power part
+        powchars = false;
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+                powchars = true;
+            else if(powchars && _is_delim_char(c))
+                return first(pos);
+            else
+                return first(0);
+        }
+        return *this;
     }
 
     /** @} */
@@ -6265,7 +7059,11 @@ public:
         num = num != npos ? num : len - ifirst;
         num = num < that.len ? num : that.len;
         C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
-        memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num);
+        // calling memcpy with null strings is undefined behavior
+        // and will wreak havoc in calling code's branches.
+        // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+        if(num)
+            memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num);
     }
 
 public:
@@ -6383,7 +7181,7 @@ public:
             {                                                           \
                 C4_ASSERT((last) >= (first));                           \
                 size_t num = static_cast<size_t>((last) - (first));     \
-                if(sz + num <= dst.len)                                 \
+                if(num > 0 && sz + num <= dst.len)                      \
                 {                                                       \
                     memcpy(dst.str + sz, first, num * sizeof(C));       \
                 }                                                       \
@@ -6586,44 +7384,40 @@ inline OStream& operator<< (OStream& os, basic_substring<C> s)
 
 // fast_float by Daniel Lemire
 // fast_float by João Paulo Magalhaes
-//
+
+
 // with contributions from Eugene Golushkov
 // with contributions from Maksim Kita
 // with contributions from Marcin Wojdyr
 // with contributions from Neal Richardson
 // with contributions from Tim Paine
 // with contributions from Fabio Pellacini
-//
-// MIT License Notice
-//
-//    MIT License
-//    
-//    Copyright (c) 2021 The fast_float authors
-//    
-//    Permission is hereby granted, free of charge, to any
-//    person obtaining a copy of this software and associated
-//    documentation files (the "Software"), to deal in the
-//    Software without restriction, including without
-//    limitation the rights to use, copy, modify, merge,
-//    publish, distribute, sublicense, and/or sell copies of
-//    the Software, and to permit persons to whom the Software
-//    is furnished to do so, subject to the following
-//    conditions:
-//    
-//    The above copyright notice and this permission notice
-//    shall be included in all copies or substantial portions
-//    of the Software.
-//    
-//    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
-//    ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-//    TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
-//    PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
-//    SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-//    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-//    OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
-//    IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-//    DEALINGS IN THE SOFTWARE.
-//
+
+
+// Permission is hereby granted, free of charge, to any
+// person obtaining a copy of this software and associated
+// documentation files (the "Software"), to deal in the
+// Software without restriction, including without
+// limitation the rights to use, copy, modify, merge,
+// publish, distribute, sublicense, and/or sell copies of
+// the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice
+// shall be included in all copies or substantial portions
+// of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+
 
 #ifndef FASTFLOAT_FAST_FLOAT_H
 #define FASTFLOAT_FAST_FLOAT_H
@@ -6688,6 +7482,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 }
 #endif // FASTFLOAT_FAST_FLOAT_H
 
+
 #ifndef FASTFLOAT_FLOAT_COMMON_H
 #define FASTFLOAT_FLOAT_COMMON_H
 
@@ -6697,8 +7492,6 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 #include <cassert>
 //included above:
 //#include <cstring>
-//included above:
-//#include <type_traits>
 
 #if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64)   \
        || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \
@@ -6727,16 +7520,15 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 #endif
 
 #if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__))
-#include <intrin.h>
+//included above:
+//#include <intrin.h>
 #endif
 
 #if defined(_MSC_VER) && !defined(__clang__)
 #define FASTFLOAT_VISUAL_STUDIO 1
 #endif
 
-#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
-#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#elif defined _WIN32
+#ifdef _WIN32
 #define FASTFLOAT_IS_BIG_ENDIAN 0
 #else
 #if defined(__APPLE__) || defined(__FreeBSD__)
@@ -6916,8 +7708,6 @@ constexpr static float powers_of_ten_float[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5,
                                                 1e6, 1e7, 1e8, 1e9, 1e10};
 
 template <typename T> struct binary_format {
-  using equiv_uint = typename std::conditional<sizeof(T) == 4, uint32_t, uint64_t>::type;
-
   static inline constexpr int mantissa_explicit_bits();
   static inline constexpr int minimum_exponent();
   static inline constexpr int infinite_power();
@@ -6931,9 +7721,6 @@ template <typename T> struct binary_format {
   static inline constexpr int smallest_power_of_ten();
   static inline constexpr T exact_power_of_ten(int64_t power);
   static inline constexpr size_t max_digits();
-  static inline constexpr equiv_uint exponent_mask();
-  static inline constexpr equiv_uint mantissa_mask();
-  static inline constexpr equiv_uint hidden_bit_mask();
 };
 
 template <> inline constexpr int binary_format<double>::mantissa_explicit_bits() {
@@ -7041,33 +7828,6 @@ template <> inline constexpr size_t binary_format<float>::max_digits() {
   return 114;
 }
 
-template <> inline constexpr binary_format<float>::equiv_uint
-    binary_format<float>::exponent_mask() {
-  return 0x7F800000;
-}
-template <> inline constexpr binary_format<double>::equiv_uint
-    binary_format<double>::exponent_mask() {
-  return 0x7FF0000000000000;
-}
-
-template <> inline constexpr binary_format<float>::equiv_uint
-    binary_format<float>::mantissa_mask() {
-  return 0x007FFFFF;
-}
-template <> inline constexpr binary_format<double>::equiv_uint
-    binary_format<double>::mantissa_mask() {
-  return 0x000FFFFFFFFFFFFF;
-}
-
-template <> inline constexpr binary_format<float>::equiv_uint
-    binary_format<float>::hidden_bit_mask() {
-  return 0x00800000;
-}
-template <> inline constexpr binary_format<double>::equiv_uint
-    binary_format<double>::hidden_bit_mask() {
-  return 0x0010000000000000;
-}
-
 template<typename T>
 fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) {
   uint64_t word = am.mantissa;
@@ -7090,6 +7850,7 @@ fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &va
 
 #endif
 
+
 #ifndef FASTFLOAT_ASCII_NUMBER_H
 #define FASTFLOAT_ASCII_NUMBER_H
 
@@ -7324,6 +8085,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
 
 #endif
 
+
 #ifndef FASTFLOAT_FAST_TABLE_H
 #define FASTFLOAT_FAST_TABLE_H
 
@@ -8025,6 +8787,7 @@ using powers = powers_template<>;
 
 #endif
 
+
 #ifndef FASTFLOAT_DECIMAL_TO_BINARY_H
 #define FASTFLOAT_DECIMAL_TO_BINARY_H
 
@@ -8221,6 +8984,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
 
 #endif
 
+
 #ifndef FASTFLOAT_BIGINT_H
 #define FASTFLOAT_BIGINT_H
 
@@ -8814,6 +9578,7 @@ struct bigint {
 
 #endif
 
+
 #ifndef FASTFLOAT_ASCII_NUMBER_H
 #define FASTFLOAT_ASCII_NUMBER_H
 
@@ -9049,6 +9814,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
 
 #endif
 
+
 #ifndef FASTFLOAT_DIGIT_COMPARISON_H
 #define FASTFLOAT_DIGIT_COMPARISON_H
 
@@ -9096,24 +9862,40 @@ fastfloat_really_inline int32_t scientific_exponent(parsed_number_string& num) n
 // this converts a native floating-point number to an extended-precision float.
 template <typename T>
 fastfloat_really_inline adjusted_mantissa to_extended(T value) noexcept {
-  using equiv_uint = typename binary_format<T>::equiv_uint;
-  constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
-  constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
-  constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
-
   adjusted_mantissa am;
   int32_t bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
-  equiv_uint bits;
-  ::memcpy(&bits, &value, sizeof(T));
-  if ((bits & exponent_mask) == 0) {
-    // denormal
-    am.power2 = 1 - bias;
-    am.mantissa = bits & mantissa_mask;
+  if (std::is_same<T, float>::value) {
+    constexpr uint32_t exponent_mask = 0x7F800000;
+    constexpr uint32_t mantissa_mask = 0x007FFFFF;
+    constexpr uint64_t hidden_bit_mask = 0x00800000;
+    uint32_t bits;
+    ::memcpy(&bits, &value, sizeof(T));
+    if ((bits & exponent_mask) == 0) {
+      // denormal
+      am.power2 = 1 - bias;
+      am.mantissa = bits & mantissa_mask;
+    } else {
+      // normal
+      am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
+      am.power2 -= bias;
+      am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
+    }
   } else {
-    // normal
-    am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
-    am.power2 -= bias;
-    am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
+    constexpr uint64_t exponent_mask = 0x7FF0000000000000;
+    constexpr uint64_t mantissa_mask = 0x000FFFFFFFFFFFFF;
+    constexpr uint64_t hidden_bit_mask = 0x0010000000000000;
+    uint64_t bits;
+    ::memcpy(&bits, &value, sizeof(T));
+    if ((bits & exponent_mask) == 0) {
+      // denormal
+      am.power2 = 1 - bias;
+      am.mantissa = bits & mantissa_mask;
+    } else {
+      // normal
+      am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
+      am.power2 -= bias;
+      am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
+    }
   }
 
   return am;
@@ -9138,7 +9920,7 @@ fastfloat_really_inline void round(adjusted_mantissa& am, callback cb) noexcept
   if (-am.power2 >= mantissa_shift) {
     // have a denormal float
     int32_t shift = -am.power2 + 1;
-    cb(am, std::min<int32_t>(shift, 64));
+    cb(am, std::min(shift, 64));
     // check for round-up: if rounding-nearest carried us to the hidden bit.
     am.power2 = (am.mantissa < (uint64_t(1) << binary_format<T>::mantissa_explicit_bits())) ? 0 : 1;
     return;
@@ -9458,6 +10240,7 @@ inline adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa
 
 #endif
 
+
 #ifndef FASTFLOAT_PARSE_NUMBER_H
 #define FASTFLOAT_PARSE_NUMBER_H
 
@@ -9605,10 +10388,17 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 
 // forward declarations for std::vector
 #if defined(__GLIBCXX__) || defined(__GLIBCPP__) || defined(_MSC_VER)
+#if defined(_MSC_VER)
+__pragma(warning(push))
+__pragma(warning(disable : 4643))
+#endif
 namespace std {
 template<typename> class allocator;
 template<typename T, typename Alloc> class vector;
 } // namespace std
+#if defined(_MSC_VER)
+__pragma(warning(pop))
+#endif
 #elif defined(_LIBCPP_ABI_NAMESPACE)
 namespace std {
 inline namespace _LIBCPP_ABI_NAMESPACE {
@@ -9707,8 +10497,8 @@ using string = basic_string<char, char_traits<char>, allocator<char>>;
 
 namespace c4 {
 
-c4::substr to_substr(std::string &s);
-c4::csubstr to_csubstr(std::string const& s);
+C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept;
+C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string const& s) noexcept;
 
 bool operator== (c4::csubstr ss, std::string const& s);
 bool operator!= (c4::csubstr ss, std::string const& s);
@@ -9805,6 +10595,10 @@ bool from_chars(c4::csubstr buf, std::string * s);
  * // Read a value from the string, which must be
  * // trimmed to the value (ie, no leading/trailing whitespace).
  * // return true if the conversion succeeded.
+ * // There is no check for overflow; the value wraps around in a way similar
+ * // to the standard C/C++ overflow behavior. For example,
+ * // from_chars<int8_t>("128", &val) returns true and val will be
+ * // set tot 0.
  * template<class T> bool c4::from_chars(csubstr buf, T * C4_RESTRICT val);
  *
  *
@@ -9870,44 +10664,61 @@ bool from_chars(c4::csubstr buf, std::string * s);
 
 
 #ifndef C4CORE_NO_FAST_FLOAT
-    C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wsign-conversion")
-    C4_SUPPRESS_WARNING_GCC("-Warray-bounds")
-#if __GNUC__ >= 5
-    C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow")
-#endif
-// amalgamate: removed include of
-// https://github.com/biojppm/c4core/src/c4/ext/fast_float.hpp
-//#   include "c4/ext/fast_float.hpp"
-#if !defined(C4_EXT_FAST_FLOAT_HPP_) && !defined(_C4_EXT_FAST_FLOAT_HPP_)
-#error "amalgamate: file c4/ext/fast_float.hpp must have been included at this point"
-#endif /* C4_EXT_FAST_FLOAT_HPP_ */
-
-    C4_SUPPRESS_WARNING_GCC_POP
-#   define C4CORE_HAVE_FAST_FLOAT 1
-#   define C4CORE_HAVE_STD_FROMCHARS 0
 #   if (C4_CPP >= 17)
 #       if defined(_MSC_VER)
-#           if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019)
+#           if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros
 #               include <charconv>
 #               define C4CORE_HAVE_STD_TOCHARS 1
+#               define C4CORE_HAVE_STD_FROMCHARS 0 // prefer fast_float with MSVC
+#               define C4CORE_HAVE_FAST_FLOAT 1
 #           else
 #               define C4CORE_HAVE_STD_TOCHARS 0
+#               define C4CORE_HAVE_STD_FROMCHARS 0
+#               define C4CORE_HAVE_FAST_FLOAT 1
 #           endif
-#       else  // VS2017 and lower do not have these macros
-#           if __has_include(<charconv>) && __cpp_lib_to_chars
-#               define C4CORE_HAVE_STD_TOCHARS 1
+#       else
+#           if __has_include(<charconv>)
 //included above:
 //#               include <charconv>
+#               if defined(__cpp_lib_to_chars)
+#                   define C4CORE_HAVE_STD_TOCHARS 1
+#                   define C4CORE_HAVE_STD_FROMCHARS 0 // glibc uses fast_float internally
+#                   define C4CORE_HAVE_FAST_FLOAT 1
+#               else
+#                   define C4CORE_HAVE_STD_TOCHARS 0
+#                   define C4CORE_HAVE_STD_FROMCHARS 0
+#                   define C4CORE_HAVE_FAST_FLOAT 1
+#               endif
 #           else
 #               define C4CORE_HAVE_STD_TOCHARS 0
+#               define C4CORE_HAVE_STD_FROMCHARS 0
+#               define C4CORE_HAVE_FAST_FLOAT 1
 #           endif
 #       endif
 #   else
 #       define C4CORE_HAVE_STD_TOCHARS 0
+#       define C4CORE_HAVE_STD_FROMCHARS 0
+#       define C4CORE_HAVE_FAST_FLOAT 1
+#   endif
+#   if C4CORE_HAVE_FAST_FLOAT
+        C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wsign-conversion")
+        C4_SUPPRESS_WARNING_GCC("-Warray-bounds")
+#       if __GNUC__ >= 5
+            C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow")
+#       endif
+// amalgamate: removed include of
+// https://github.com/biojppm/c4core/src/c4/ext/fast_float.hpp
+//#       include "c4/ext/fast_float.hpp"
+#if !defined(C4_EXT_FAST_FLOAT_HPP_) && !defined(_C4_EXT_FAST_FLOAT_HPP_)
+#error "amalgamate: file c4/ext/fast_float.hpp must have been included at this point"
+#endif /* C4_EXT_FAST_FLOAT_HPP_ */
+
+        C4_SUPPRESS_WARNING_GCC_POP
 #   endif
 #elif (C4_CPP >= 17)
+#   define C4CORE_HAVE_FAST_FLOAT 0
 #   if defined(_MSC_VER)
-#       if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019)
+#       if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros
 //included above:
 //#           include <charconv>
 #           define C4CORE_HAVE_STD_TOCHARS 1
@@ -9916,12 +10727,17 @@ bool from_chars(c4::csubstr buf, std::string * s);
 #           define C4CORE_HAVE_STD_TOCHARS 0
 #           define C4CORE_HAVE_STD_FROMCHARS 0
 #       endif
-#   else  // VS2017 and lower do not have these macros
-#       if __has_include(<charconv>) && __cpp_lib_to_chars
-#           define C4CORE_HAVE_STD_TOCHARS 1
-#           define C4CORE_HAVE_STD_FROMCHARS 1
+#   else
+#       if __has_include(<charconv>)
 //included above:
 //#           include <charconv>
+#           if defined(__cpp_lib_to_chars)
+#               define C4CORE_HAVE_STD_TOCHARS 1
+#               define C4CORE_HAVE_STD_FROMCHARS 1 // glibc uses fast_float internally
+#           else
+#               define C4CORE_HAVE_STD_TOCHARS 0
+#               define C4CORE_HAVE_STD_FROMCHARS 0
+#           endif
 #       else
 #           define C4CORE_HAVE_STD_TOCHARS 0
 #           define C4CORE_HAVE_STD_FROMCHARS 0
@@ -9930,10 +10746,11 @@ bool from_chars(c4::csubstr buf, std::string * s);
 #else
 #   define C4CORE_HAVE_STD_TOCHARS 0
 #   define C4CORE_HAVE_STD_FROMCHARS 0
+#   define C4CORE_HAVE_FAST_FLOAT 0
 #endif
 
 
-#if !C4CORE_HAVE_STD_FROMCHARS && !defined(C4CORE_HAVE_FAST_FLOAT)
+#if !C4CORE_HAVE_STD_FROMCHARS
 #include <cstdio>
 #endif
 
@@ -9959,52 +10776,33 @@ bool from_chars(c4::csubstr buf, std::string * s);
 
 namespace c4 {
 
-typedef enum : uint8_t {
+#if C4CORE_HAVE_STD_TOCHARS
+/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */
+typedef enum : std::underlying_type<std::chars_format>::type {
     /** print the real number in floating point format (like %f) */
-    FTOA_FLOAT = 0,
+    FTOA_FLOAT = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::fixed),
     /** print the real number in scientific format (like %e) */
-    FTOA_SCIENT = 1,
+    FTOA_SCIENT = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::scientific),
     /** print the real number in flexible format (like %g) */
-    FTOA_FLEX = 2,
+    FTOA_FLEX = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::general),
     /** print the real number in hexadecimal format (like %a) */
-    FTOA_HEXA = 3,
-    _FTOA_COUNT
+    FTOA_HEXA = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::hex),
 } RealFormat_e;
+#else
+/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */
+typedef enum : char {
+    /** print the real number in floating point format (like %f) */
+    FTOA_FLOAT = 'f',
+    /** print the real number in scientific format (like %e) */
+    FTOA_SCIENT = 'e',
+    /** print the real number in flexible format (like %g) */
+    FTOA_FLEX = 'g',
+    /** print the real number in hexadecimal format (like %a) */
+    FTOA_HEXA = 'a',
+} RealFormat_e;
+#endif
 
 
-inline C4_CONSTEXPR14 char to_c_fmt(RealFormat_e f)
-{
-    constexpr const char fmt[] = {
-        'f',  // FTOA_FLOAT
-        'e',  // FTOA_SCIENT
-        'g',  // FTOA_FLEX
-        'a',  // FTOA_HEXA
-    };
-    C4_STATIC_ASSERT(C4_COUNTOF(fmt) == _FTOA_COUNT);
-    #if C4_CPP > 14
-    C4_ASSERT(f < _FTOA_COUNT);
-    #endif
-    return fmt[f];
-}
-
-
-#if C4CORE_HAVE_STD_TOCHARS
-inline C4_CONSTEXPR14 std::chars_format to_std_fmt(RealFormat_e f)
-{
-    constexpr const std::chars_format fmt[] = {
-        std::chars_format::fixed,       // FTOA_FLOAT
-        std::chars_format::scientific,  // FTOA_SCIENT
-        std::chars_format::general,     // FTOA_FLEX
-        std::chars_format::hex,         // FTOA_HEXA
-    };
-    C4_STATIC_ASSERT(C4_COUNTOF(fmt) == _FTOA_COUNT);
-    #if C4_CPP >= 14
-    C4_ASSERT(f < _FTOA_COUNT);
-    #endif
-    return fmt[f];
-}
-#endif // C4CORE_HAVE_STD_TOCHARS
-
 /** in some platforms, int,unsigned int
  *  are not any of int8_t...int64_t and
  *  long,unsigned long are not any of uint8_t...uint64_t */
@@ -10046,105 +10844,494 @@ struct is_fixed_length
 #   endif
 #endif
 
-// Helper macros, undefined below
+namespace detail {
+
+/* python command to get the values below:
+def dec(v):
+    return str(v)
+for bits in (8, 16, 32, 64):
+    imin, imax, umax = (-(1 << (bits - 1))), (1 << (bits - 1)) - 1, (1 << bits) - 1
+    for vname, v in (("imin", imin), ("imax", imax), ("umax", umax)):
+        for f in (bin, oct, dec, hex):
+            print(f"{bits}b: {vname}={v} {f.__name__}: len={len(f(v)):2d}: {v} {f(v)}")
+*/
+
+// do not use the type as the template argument because in some
+// platforms long!=int32 and long!=int64. Just use the numbytes
+// which is more generic and spares lengthy SFINAE code.
+template<size_t num_bytes, bool is_signed> struct charconv_digits_;
+template<class T> using charconv_digits = charconv_digits_<sizeof(T), std::is_signed<T>::value>;
+
+template<> struct charconv_digits_<1u, true> // int8_t
+{
+    enum : size_t {
+        maxdigits_bin       = 1 + 2 + 8, // -128==-0b10000000
+        maxdigits_oct       = 1 + 2 + 3, // -128==-0o200
+        maxdigits_dec       = 1     + 3, // -128
+        maxdigits_hex       = 1 + 2 + 2, // -128==-0x80
+        maxdigits_bin_nopfx =         8, // -128==-0b10000000
+        maxdigits_oct_nopfx =         3, // -128==-0o200
+        maxdigits_dec_nopfx =         3, // -128
+        maxdigits_hex_nopfx =         2, // -128==-0x80
+    };
+    // min values without sign!
+    static constexpr csubstr min_value_dec() noexcept { return csubstr("128"); }
+    static constexpr csubstr min_value_hex() noexcept { return csubstr("80"); }
+    static constexpr csubstr min_value_oct() noexcept { return csubstr("200"); }
+    static constexpr csubstr min_value_bin() noexcept { return csubstr("10000000"); }
+    static constexpr csubstr max_value_dec() noexcept { return csubstr("127"); }
+    static constexpr bool    is_oct_overflow(csubstr str) noexcept { return !((str.len < 3) || (str.len == 3 && str[0] <= '1')); }
+};
+template<> struct charconv_digits_<1u, false> // uint8_t
+{
+    enum : size_t {
+        maxdigits_bin       = 2 + 8, // 255 0b11111111
+        maxdigits_oct       = 2 + 3, // 255 0o377
+        maxdigits_dec       =     3, // 255
+        maxdigits_hex       = 2 + 2, // 255 0xff
+        maxdigits_bin_nopfx =     8, // 255 0b11111111
+        maxdigits_oct_nopfx =     3, // 255 0o377
+        maxdigits_dec_nopfx =     3, // 255
+        maxdigits_hex_nopfx =     2, // 255 0xff
+    };
+    static constexpr csubstr max_value_dec() noexcept { return csubstr("255"); }
+    static constexpr bool    is_oct_overflow(csubstr str) noexcept { return !((str.len < 3) || (str.len == 3 && str[0] <= '3')); }
+};
+template<> struct charconv_digits_<2u, true> // int16_t
+{
+    enum : size_t {
+        maxdigits_bin       = 1 + 2 + 16, // -32768 -0b1000000000000000
+        maxdigits_oct       = 1 + 2 +  6, // -32768 -0o100000
+        maxdigits_dec       = 1     +  5, // -32768 -32768
+        maxdigits_hex       = 1 + 2 +  4, // -32768 -0x8000
+        maxdigits_bin_nopfx =         16, // -32768 -0b1000000000000000
+        maxdigits_oct_nopfx =          6, // -32768 -0o100000
+        maxdigits_dec_nopfx =          5, // -32768 -32768
+        maxdigits_hex_nopfx =          4, // -32768 -0x8000
+    };
+    // min values without sign!
+    static constexpr csubstr min_value_dec() noexcept { return csubstr("32768"); }
+    static constexpr csubstr min_value_hex() noexcept { return csubstr("8000"); }
+    static constexpr csubstr min_value_oct() noexcept { return csubstr("100000"); }
+    static constexpr csubstr min_value_bin() noexcept { return csubstr("1000000000000000"); }
+    static constexpr csubstr max_value_dec() noexcept { return csubstr("32767"); }
+    static constexpr bool    is_oct_overflow(csubstr str) noexcept { return !((str.len < 6)); }
+};
+template<> struct charconv_digits_<2u, false> // uint16_t
+{
+    enum : size_t {
+        maxdigits_bin       = 2 + 16, // 65535 0b1111111111111111
+        maxdigits_oct       = 2 +  6, // 65535 0o177777
+        maxdigits_dec       =      6, // 65535 65535
+        maxdigits_hex       = 2 +  4, // 65535 0xffff
+        maxdigits_bin_nopfx =     16, // 65535 0b1111111111111111
+        maxdigits_oct_nopfx =      6, // 65535 0o177777
+        maxdigits_dec_nopfx =      6, // 65535 65535
+        maxdigits_hex_nopfx =      4, // 65535 0xffff
+    };
+    static constexpr csubstr max_value_dec() noexcept { return csubstr("65535"); }
+    static constexpr bool    is_oct_overflow(csubstr str) noexcept { return !((str.len < 6) || (str.len == 6 && str[0] <= '1')); }
+};
+template<> struct charconv_digits_<4u, true> // int32_t
+{
+    enum : size_t {
+        maxdigits_bin       = 1 + 2 + 32, // len=35: -2147483648 -0b10000000000000000000000000000000
+        maxdigits_oct       = 1 + 2 + 11, // len=14: -2147483648 -0o20000000000
+        maxdigits_dec       = 1     + 10, // len=11: -2147483648 -2147483648
+        maxdigits_hex       = 1 + 2 +  8, // len=11: -2147483648 -0x80000000
+        maxdigits_bin_nopfx =         32, // len=35: -2147483648 -0b10000000000000000000000000000000
+        maxdigits_oct_nopfx =         11, // len=14: -2147483648 -0o20000000000
+        maxdigits_dec_nopfx =         10, // len=11: -2147483648 -2147483648
+        maxdigits_hex_nopfx =          8, // len=11: -2147483648 -0x80000000
+    };
+    // min values without sign!
+    static constexpr csubstr min_value_dec() noexcept { return csubstr("2147483648"); }
+    static constexpr csubstr min_value_hex() noexcept { return csubstr("80000000"); }
+    static constexpr csubstr min_value_oct() noexcept { return csubstr("20000000000"); }
+    static constexpr csubstr min_value_bin() noexcept { return csubstr("10000000000000000000000000000000"); }
+    static constexpr csubstr max_value_dec() noexcept { return csubstr("2147483647"); }
+    static constexpr bool    is_oct_overflow(csubstr str) noexcept { return !((str.len < 11) || (str.len == 11 && str[0] <= '1')); }
+};
+template<> struct charconv_digits_<4u, false> // uint32_t
+{
+    enum : size_t {
+        maxdigits_bin       = 2 + 32, // len=34: 4294967295 0b11111111111111111111111111111111
+        maxdigits_oct       = 2 + 11, // len=13: 4294967295 0o37777777777
+        maxdigits_dec       =     10, // len=10: 4294967295 4294967295
+        maxdigits_hex       = 2 +  8, // len=10: 4294967295 0xffffffff
+        maxdigits_bin_nopfx =     32, // len=34: 4294967295 0b11111111111111111111111111111111
+        maxdigits_oct_nopfx =     11, // len=13: 4294967295 0o37777777777
+        maxdigits_dec_nopfx =     10, // len=10: 4294967295 4294967295
+        maxdigits_hex_nopfx =      8, // len=10: 4294967295 0xffffffff
+    };
+    static constexpr csubstr max_value_dec() noexcept { return csubstr("4294967295"); }
+    static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 11) || (str.len == 11 && str[0] <= '3')); }
+};
+template<> struct charconv_digits_<8u, true> // int32_t
+{
+    enum : size_t {
+        maxdigits_bin       = 1 + 2 + 64, // len=67: -9223372036854775808 -0b1000000000000000000000000000000000000000000000000000000000000000
+        maxdigits_oct       = 1 + 2 + 22, // len=25: -9223372036854775808 -0o1000000000000000000000
+        maxdigits_dec       = 1     + 19, // len=20: -9223372036854775808 -9223372036854775808
+        maxdigits_hex       = 1 + 2 + 16, // len=19: -9223372036854775808 -0x8000000000000000
+        maxdigits_bin_nopfx =         64, // len=67: -9223372036854775808 -0b1000000000000000000000000000000000000000000000000000000000000000
+        maxdigits_oct_nopfx =         22, // len=25: -9223372036854775808 -0o1000000000000000000000
+        maxdigits_dec_nopfx =         19, // len=20: -9223372036854775808 -9223372036854775808
+        maxdigits_hex_nopfx =         16, // len=19: -9223372036854775808 -0x8000000000000000
+    };
+    static constexpr csubstr min_value_dec() noexcept { return csubstr("9223372036854775808"); }
+    static constexpr csubstr min_value_hex() noexcept { return csubstr("8000000000000000"); }
+    static constexpr csubstr min_value_oct() noexcept { return csubstr("1000000000000000000000"); }
+    static constexpr csubstr min_value_bin() noexcept { return csubstr("1000000000000000000000000000000000000000000000000000000000000000"); }
+    static constexpr csubstr max_value_dec() noexcept { return csubstr("9223372036854775807"); }
+    static constexpr bool    is_oct_overflow(csubstr str) noexcept { return !((str.len < 22)); }
+};
+template<> struct charconv_digits_<8u, false>
+{
+    enum : size_t {
+        maxdigits_bin       = 2 + 64, // len=66: 18446744073709551615 0b1111111111111111111111111111111111111111111111111111111111111111
+        maxdigits_oct       = 2 + 22, // len=24: 18446744073709551615 0o1777777777777777777777
+        maxdigits_dec       =     20, // len=20: 18446744073709551615 18446744073709551615
+        maxdigits_hex       = 2 + 16, // len=18: 18446744073709551615 0xffffffffffffffff
+        maxdigits_bin_nopfx =     64, // len=66: 18446744073709551615 0b1111111111111111111111111111111111111111111111111111111111111111
+        maxdigits_oct_nopfx =     22, // len=24: 18446744073709551615 0o1777777777777777777777
+        maxdigits_dec_nopfx =     20, // len=20: 18446744073709551615 18446744073709551615
+        maxdigits_hex_nopfx =     16, // len=18: 18446744073709551615 0xffffffffffffffff
+    };
+    static constexpr csubstr max_value_dec() noexcept { return csubstr("18446744073709551615"); }
+    static constexpr bool    is_oct_overflow(csubstr str) noexcept { return !((str.len < 22) || (str.len == 22 && str[0] <= '1')); }
+};
+} // namespace detail
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
+// Helper macros, undefined below
 #define _c4append(c) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = static_cast<char>(c); } else { ++pos; } }
 #define _c4appendhex(i) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = hexchars[i]; } else { ++pos; } }
 
+/** @name digits_dec return the number of digits required to encode a
+ * decimal number.
+ *
+ * @note At first sight this code may look heavily branchy and
+ * therefore inefficient. However, measurements revealed this to be
+ * the fastest among the alternatives.
+ *
+ * @see https://github.com/biojppm/c4core/pull/77 */
+/** @{ */
+
+template<class T>
+C4_CONSTEXPR14 C4_ALWAYS_INLINE
+auto digits_dec(T v) noexcept
+    -> typename std::enable_if<sizeof(T) == 1u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    return ((v >= 100) ? 3u : ((v >= 10) ? 2u : 1u));
 }
-#include <iostream>
-namespace c4 {
+
+template<class T>
+C4_CONSTEXPR14 C4_ALWAYS_INLINE
+auto digits_dec(T v) noexcept
+    -> typename std::enable_if<sizeof(T) == 2u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    return ((v >= 10000) ? 5u : (v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u);
+}
+
+template<class T>
+C4_CONSTEXPR14 C4_ALWAYS_INLINE
+auto digits_dec(T v) noexcept
+    -> typename std::enable_if<sizeof(T) == 4u, unsigned>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    return ((v >= 1000000000) ? 10u : (v >= 100000000) ? 9u : (v >= 10000000) ? 8u :
+            (v >= 1000000) ? 7u : (v >= 100000) ? 6u : (v >= 10000) ? 5u :
+            (v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u);
+}
+
+template<class T>
+C4_CONSTEXPR14 C4_ALWAYS_INLINE
+auto digits_dec(T v) noexcept
+    -> typename std::enable_if<sizeof(T) == 8u, unsigned>::type
+{
+    // thanks @fargies!!!
+    // https://github.com/biojppm/c4core/pull/77#issuecomment-1063753568
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    if(v >= 1000000000) // 10
+    {
+        if(v >= 100000000000000) // 15 [15-20] range
+        {
+            if(v >= 100000000000000000) // 18 (15 + (20 - 15) / 2)
+            {
+                if((typename std::make_unsigned<T>::type)v >= 10000000000000000000u) // 20
+                    return 20u;
+                else
+                    return (v >= 1000000000000000000) ? 19u : 18u;
+            }
+            else if(v >= 10000000000000000) // 17
+                return 17u;
+            else
+                return(v >= 1000000000000000) ? 16u : 15u;
+        }
+        else if(v >= 1000000000000) // 13
+            return (v >= 10000000000000) ? 14u : 13u;
+        else if(v >= 100000000000) // 12
+            return 12;
+        else
+            return(v >= 10000000000) ? 11u : 10u;
+    }
+    else if(v >= 10000) // 5 [5-9] range
+    {
+        if(v >= 10000000) // 8
+            return (v >= 100000000) ? 9u : 8u;
+        else if(v >= 1000000) // 7
+            return 7;
+        else
+            return (v >= 100000) ? 6u : 5u;
+    }
+    else if(v >= 100)
+        return (v >= 1000) ? 4u : 3u;
+    else
+        return (v >= 10) ? 2u : 1u;
+}
+
+/** @} */
+
+
+template<class T>
+C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_hex(T v) noexcept
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    return v ? 1u + (msb((typename std::make_unsigned<T>::type)v) >> 2u) : 1u;
+}
+
+template<class T>
+C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_bin(T v) noexcept
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    return v ? 1u + msb((typename std::make_unsigned<T>::type)v) : 1u;
+}
+
+template<class T>
+C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_oct(T v_) noexcept
+{
+    // TODO: is there a better way?
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v_ >= 0);
+    using U = typename
+        std::conditional<sizeof(T) <= sizeof(unsigned),
+                         unsigned,
+                         typename std::make_unsigned<T>::type>::type;
+    U v = (U) v_;  // safe because we require v_ >= 0
+    unsigned __n = 1;
+    const unsigned __b2 = 64u;
+    const unsigned __b3 = __b2 * 8u;
+    const unsigned long __b4 = __b3 * 8u;
+    while(true)
+	{
+        if(v < 8u)
+            return __n;
+        if(v < __b2)
+            return __n + 1;
+        if(v < __b3)
+            return __n + 2;
+        if(v < __b4)
+            return __n + 3;
+        v /= (U) __b4;
+        __n += 4;
+	}
+}
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+namespace detail {
 C4_INLINE_CONSTEXPR const char hexchars[] = "0123456789abcdef";
+C4_INLINE_CONSTEXPR const char digits0099[] =
+    "0001020304050607080910111213141516171819"
+    "2021222324252627282930313233343536373839"
+    "4041424344454647484950515253545556575859"
+    "6061626364656667686970717273747576777879"
+    "8081828384858687888990919293949596979899";
+} // namespace detail
+
+C4_SUPPRESS_WARNING_GCC_PUSH
+C4_SUPPRESS_WARNING_GCC("-Warray-bounds")  // gcc has false positives here
+#if (defined(__GNUC__) && (__GNUC__ >= 7))
+C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow")  // gcc has false positives here
+#endif
 
-/** write an integer to a string in decimal format. This is the
- * lowest level (and the fastest) function to do this task.
- * @note does not accept negative numbers
- * @return the number of characters required for the string,
- * even if the string is not long enough for the result.
- * No writes are done past the end of the string. */
 template<class T>
-size_t write_dec(substr buf, T v)
+C4_HOT C4_ALWAYS_INLINE
+void write_dec_unchecked(substr buf, T v, unsigned digits_v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     C4_ASSERT(v >= 0);
-    size_t pos = 0;
+    C4_ASSERT(buf.len >= digits_v);
+    C4_XASSERT(digits_v == digits_dec(v));
+    // in bm_xtoa: checkoncelog_singlediv_write2
+    while(v >= T(100))
+    {
+        const T quo = v / T(100);
+        const auto num = (v - quo * T(100)) << 1u;
+        v = quo;
+        buf.str[--digits_v] = detail::digits0099[num + 1];
+        buf.str[--digits_v] = detail::digits0099[num];
+    }
+    if(v >= T(10))
+    {
+        C4_ASSERT(digits_v == 2);
+        const auto num = v << 1u;
+        buf.str[1] = detail::digits0099[num + 1];
+        buf.str[0] = detail::digits0099[num];
+    }
+    else
+    {
+        C4_ASSERT(digits_v == 1);
+        buf.str[0] = (char)('0' + v);
+    }
+}
+
+
+template<class T>
+C4_HOT C4_ALWAYS_INLINE
+void write_hex_unchecked(substr buf, T v, unsigned digits_v) noexcept
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    C4_ASSERT(buf.len >= digits_v);
+    C4_XASSERT(digits_v == digits_hex(v));
     do {
-        _c4append('0' + (v % T(10)));
-        v /= T(10);
+        buf.str[--digits_v] = detail::hexchars[v & T(15)];
+        v >>= 4;
     } while(v);
-    buf.reverse_range(0, pos <= buf.len ? pos : buf.len);
-    return pos;
+    C4_ASSERT(digits_v == 0);
 }
 
 
+template<class T>
+C4_HOT C4_ALWAYS_INLINE
+void write_oct_unchecked(substr buf, T v, unsigned digits_v) noexcept
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    C4_ASSERT(buf.len >= digits_v);
+    C4_XASSERT(digits_v == digits_oct(v));
+    do {
+        buf.str[--digits_v] = (char)('0' + (v & T(7)));
+        v >>= 3;
+    } while(v);
+    C4_ASSERT(digits_v == 0);
+}
+
+
+template<class T>
+C4_HOT C4_ALWAYS_INLINE
+void write_bin_unchecked(substr buf, T v, unsigned digits_v) noexcept
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    C4_ASSERT(buf.len >= digits_v);
+    C4_XASSERT(digits_v == digits_bin(v));
+    do {
+        buf.str[--digits_v] = (char)('0' + (v & T(1)));
+        v >>= 1;
+    } while(v);
+    C4_ASSERT(digits_v == 0);
+}
+
+
+/** write an integer to a string in decimal format. This is the
+ * lowest level (and the fastest) function to do this task.
+ * @note does not accept negative numbers
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the required size will be returned
+ * @return the number of characters required for the buffer. */
+template<class T>
+C4_ALWAYS_INLINE size_t write_dec(substr buf, T v) noexcept
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    C4_ASSERT(v >= 0);
+    unsigned digits = digits_dec(v);
+    if(C4_LIKELY(buf.len >= digits))
+        write_dec_unchecked(buf, v, digits);
+    return digits;
+}
+
 /** write an integer to a string in hexadecimal format. This is the
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
- * @return the number of characters required for the string,
- * even if the string is not long enough for the result.
- * No writes are done past the end of the string. */
+ * @note does not prefix with 0x
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the required size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
-size_t write_hex(substr buf, T v)
+C4_ALWAYS_INLINE size_t write_hex(substr buf, T v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     C4_ASSERT(v >= 0);
-    size_t pos = 0;
-    do {
-        _c4appendhex(v & T(15));
-        v >>= 4;
-    } while(v);
-    buf.reverse_range(0, pos <= buf.len ? pos : buf.len);
-    return pos;
+    unsigned digits = digits_hex(v);
+    if(C4_LIKELY(buf.len >= digits))
+        write_hex_unchecked(buf, v, digits);
+    return digits;
 }
 
 /** write an integer to a string in octal format. This is the
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
  * @note does not prefix with 0o
- * @return the number of characters required for the string,
- * even if the string is not long enough for the result.
- * No writes are done past the end of the string. */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the required size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
-size_t write_oct(substr buf, T v)
+C4_ALWAYS_INLINE size_t write_oct(substr buf, T v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     C4_ASSERT(v >= 0);
-    size_t pos = 0;
-    do {
-        _c4append('0' + (v & T(7)));
-        v >>= 3;
-    } while(v);
-    buf.reverse_range(0, pos <= buf.len ? pos : buf.len);
-    return pos;
+    unsigned digits = digits_oct(v);
+    if(C4_LIKELY(buf.len >= digits))
+        write_oct_unchecked(buf, v, digits);
+    return digits;
 }
 
 /** write an integer to a string in binary format. This is the
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
  * @note does not prefix with 0b
- * @return the number of characters required for the string,
- * even if the string is not long enough for the result.
- * No writes are done past the end of the string. */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the required size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
-size_t write_bin(substr buf, T v)
+C4_ALWAYS_INLINE size_t write_bin(substr buf, T v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     C4_ASSERT(v >= 0);
-    size_t pos = 0;
-    do {
-        _c4append('0' + (v & T(1)));
-        v >>= 1;
-    } while(v);
-    buf.reverse_range(0, pos <= buf.len ? pos : buf.len);
-    return pos;
+    unsigned digits = digits_bin(v);
+    C4_ASSERT(digits > 0);
+    if(C4_LIKELY(buf.len >= digits))
+        write_bin_unchecked(buf, v, digits);
+    return digits;
 }
 
 
 namespace detail {
 template<class U> using NumberWriter = size_t (*)(substr, U);
-/** @todo pass the writer as a template parameter */
 template<class T, NumberWriter<T> writer>
-size_t write_num_digits(substr buf, T v, size_t num_digits)
+size_t write_num_digits(substr buf, T v, size_t num_digits) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     size_t ret = writer(buf, v);
@@ -10163,40 +11350,42 @@ size_t write_num_digits(substr buf, T v, size_t num_digits)
 
 /** same as c4::write_dec(), but pad with zeroes on the left
  * such that the resulting string is @p num_digits wide.
- * If the given number is wider than num_digits, then the number prevails. */
+ * If the given number is requires more than num_digits, then the number prevails. */
 template<class T>
-size_t write_dec(substr buf, T val, size_t num_digits)
+C4_ALWAYS_INLINE size_t write_dec(substr buf, T val, size_t num_digits) noexcept
 {
     return detail::write_num_digits<T, &write_dec<T>>(buf, val, num_digits);
 }
 
 /** same as c4::write_hex(), but pad with zeroes on the left
  * such that the resulting string is @p num_digits wide.
- * If the given number is wider than num_digits, then the number prevails. */
+ * If the given number is requires more than num_digits, then the number prevails. */
 template<class T>
-size_t write_hex(substr buf, T val, size_t num_digits)
+C4_ALWAYS_INLINE size_t write_hex(substr buf, T val, size_t num_digits) noexcept
 {
     return detail::write_num_digits<T, &write_hex<T>>(buf, val, num_digits);
 }
 
 /** same as c4::write_bin(), but pad with zeroes on the left
  * such that the resulting string is @p num_digits wide.
- * If the given number is wider than num_digits, then the number prevails. */
+ * If the given number is requires more than num_digits, then the number prevails. */
 template<class T>
-size_t write_bin(substr buf, T val, size_t num_digits)
+C4_ALWAYS_INLINE size_t write_bin(substr buf, T val, size_t num_digits) noexcept
 {
     return detail::write_num_digits<T, &write_bin<T>>(buf, val, num_digits);
 }
 
 /** same as c4::write_oct(), but pad with zeroes on the left
  * such that the resulting string is @p num_digits wide.
- * If the given number is wider than num_digits, then the number prevails. */
+ * If the given number is requires more than num_digits, then the number prevails. */
 template<class T>
-size_t write_oct(substr buf, T val, size_t num_digits)
+C4_ALWAYS_INLINE size_t write_oct(substr buf, T val, size_t num_digits) noexcept
 {
     return detail::write_num_digits<T, &write_oct<T>>(buf, val, num_digits);
 }
 
+C4_SUPPRESS_WARNING_GCC_POP
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -10206,11 +11395,18 @@ size_t write_oct(substr buf, T val, size_t num_digits)
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
  * @note The string must be trimmed. Whitespace is not accepted.
- * @return true if the conversion was successful */
+ * @note the string must not be empty
+ * @note there is no check for overflow; the value wraps around
+ * in a way similar to the standard C/C++ overflow behavior.
+ * For example, `read_dec<int8_t>("128", &val)` returns true
+ * and val will be set to 0 because 127 is the max i8 value.
+ * @see overflows<T>() to find out if a number string overflows a type range
+ * @return true if the conversion was successful (no overflow check) */
 template<class I>
-C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v)
+C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<I>::value);
+    C4_ASSERT(!s.empty());
     *v = 0;
     for(char c : s)
     {
@@ -10225,12 +11421,19 @@ C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v)
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
  * @note does not accept leading 0x or 0X
+ * @note the string must not be empty
  * @note the string must be trimmed. Whitespace is not accepted.
- * @return true if the conversion was successful */
+ * @note there is no check for overflow; the value wraps around
+ * in a way similar to the standard C/C++ overflow behavior.
+ * For example, `read_hex<int8_t>("80", &val)` returns true
+ * and val will be set to 0 because 7f is the max i8 value.
+ * @see overflows<T>() to find out if a number string overflows a type range
+ * @return true if the conversion was successful (no overflow check) */
 template<class I>
-C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v)
+C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<I>::value);
+    C4_ASSERT(!s.empty());
     *v = 0;
     for(char c : s)
     {
@@ -10252,12 +11455,19 @@ C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v)
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
  * @note does not accept leading 0b or 0B
+ * @note the string must not be empty
  * @note the string must be trimmed. Whitespace is not accepted.
- * @return true if the conversion was successful */
+ * @note there is no check for overflow; the value wraps around
+ * in a way similar to the standard C/C++ overflow behavior.
+ * For example, `read_bin<int8_t>("10000000", &val)` returns true
+ * and val will be set to 0 because 1111111 is the max i8 value.
+ * @see overflows<T>() to find out if a number string overflows a type range
+ * @return true if the conversion was successful (no overflow check) */
 template<class I>
-C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v)
+C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<I>::value);
+    C4_ASSERT(!s.empty());
     *v = 0;
     for(char c : s)
     {
@@ -10274,12 +11484,19 @@ C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v)
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
  * @note does not accept leading 0o or 0O
+ * @note the string must not be empty
  * @note the string must be trimmed. Whitespace is not accepted.
- * @return true if the conversion was successful */
+ * @note there is no check for overflow; the value wraps around
+ * in a way similar to the standard C/C++ overflow behavior.
+ * For example, `read_oct<int8_t>("200", &val)` returns true
+ * and val will be set to 0 because 177 is the max i8 value.
+ * @see overflows<T>() to find out if a number string overflows a type range
+ * @return true if the conversion was successful (no overflow check) */
 template<class I>
-C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v)
+C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<I>::value);
+    C4_ASSERT(!s.empty());
     *v = 0;
     for(char c : s)
     {
@@ -10296,177 +11513,219 @@ C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v)
 //-----------------------------------------------------------------------------
 
 namespace detail {
-// do not use the type as the template argument because in some
-// platforms long!=int32 and long!=int64. Just use the numbytes
-// which is more generic and spares lengthy SFINAE code.
-template<size_t numbytes> struct itoa_min;
-template<> struct itoa_min<1>
-{
-    static csubstr value_dec() { return csubstr("128"); }
-    static csubstr value_hex() { return csubstr("80"); }
-    static csubstr value_oct() { return csubstr("200"); }
-    static csubstr value_bin() { return csubstr("10000000"); }
-};
-template<> struct itoa_min<2>
+inline size_t _itoa2buf(substr buf, size_t pos, csubstr val) noexcept
 {
-    static csubstr value_dec() { return csubstr("32768"); }
-    static csubstr value_hex() { return csubstr("8000"); }
-    static csubstr value_oct() { return csubstr("100000"); }
-    static csubstr value_bin() { return csubstr("1000000000000000"); }
-};
-template<> struct itoa_min<4>
-{
-    static csubstr value_dec() { return csubstr("2147483648"); }
-    static csubstr value_hex() { return csubstr("80000000"); }
-    static csubstr value_oct() { return csubstr("20000000000"); }
-    static csubstr value_bin() { return csubstr("10000000000000000000000000000000"); }
-};
-template<> struct itoa_min<8>
-{
-    static csubstr value_dec() { return csubstr("9223372036854775808"); }
-    static csubstr value_hex() { return csubstr("8000000000000000"); }
-    static csubstr value_oct() { return csubstr("1000000000000000000000"); }
-    static csubstr value_bin() { return csubstr("1000000000000000000000000000000000000000000000000000000000000000"); }
-};
-inline size_t _itoa2buf(substr buf, size_t pos, csubstr val)
-{
-    if(C4_LIKELY(pos + val.len <= buf.len))
-        memcpy(buf.str + pos, val.str, val.len);
+    C4_ASSERT(pos + val.len <= buf.len);
+    memcpy(buf.str + pos, val.str, val.len);
     return pos + val.len;
 }
-inline size_t _itoa2bufwithdigits(substr buf, size_t pos, size_t num_digits, csubstr val)
+inline size_t _itoa2bufwithdigits(substr buf, size_t pos, size_t num_digits, csubstr val) noexcept
 {
     num_digits = num_digits > val.len ? num_digits - val.len : 0;
+    C4_ASSERT(num_digits + val.len <= buf.len);
     for(size_t i = 0; i < num_digits; ++i)
         _c4append('0');
-    return _itoa2buf(buf, pos, val);
+    return detail::_itoa2buf(buf, pos, val);
 }
-template<class T>
-size_t _itoadec2buf(substr buf)
+template<class I>
+C4_NO_INLINE size_t _itoadec2buf(substr buf) noexcept
 {
-    if(C4_LIKELY(buf.len > 0))
-    {
-        buf.str[0] = '-';
-        return detail::_itoa2buf(buf, 1, detail::itoa_min<sizeof(T)>::value_dec());
-    }
-    else
-    {
-        return detail::_itoa2buf({}, 1, detail::itoa_min<sizeof(T)>::value_dec());
-    }
-    C4_UNREACHABLE();
+    using digits_type = detail::charconv_digits<I>;
+    if(C4_UNLIKELY(buf.len < digits_type::maxdigits_dec))
+        return digits_type::maxdigits_dec;
+    buf.str[0] = '-';
+    return detail::_itoa2buf(buf, 1, digits_type::min_value_dec());
 }
 template<class I>
-size_t _itoa2buf(substr buf, I radix)
+C4_NO_INLINE size_t _itoa2buf(substr buf, I radix) noexcept
 {
+    using digits_type = detail::charconv_digits<I>;
     size_t pos = 0;
-    _c4append('-');
+    if(C4_LIKELY(buf.len > 0))
+        buf.str[pos++] = '-';
     switch(radix)
     {
     case I(10):
-        /*...........................*/ return _itoa2buf(buf, pos, itoa_min<sizeof(I)>::value_dec());
+        if(C4_UNLIKELY(buf.len < digits_type::maxdigits_dec))
+            return digits_type::maxdigits_dec;
+        pos =_itoa2buf(buf, pos, digits_type::min_value_dec());
+        break;
     case I(16):
-        _c4append('0'); _c4append('x'); return _itoa2buf(buf, pos, itoa_min<sizeof(I)>::value_hex());
+        if(C4_UNLIKELY(buf.len < digits_type::maxdigits_hex))
+            return digits_type::maxdigits_hex;
+        buf.str[pos++] = '0';
+        buf.str[pos++] = 'x';
+        pos = _itoa2buf(buf, pos, digits_type::min_value_hex());
+        break;
     case I( 2):
-        _c4append('0'); _c4append('b'); return _itoa2buf(buf, pos, itoa_min<sizeof(I)>::value_bin());
+        if(C4_UNLIKELY(buf.len < digits_type::maxdigits_bin))
+            return digits_type::maxdigits_bin;
+        buf.str[pos++] = '0';
+        buf.str[pos++] = 'b';
+        pos = _itoa2buf(buf, pos, digits_type::min_value_bin());
+        break;
     case I( 8):
-        _c4append('0'); _c4append('o'); return _itoa2buf(buf, pos, itoa_min<sizeof(I)>::value_oct());
+        if(C4_UNLIKELY(buf.len < digits_type::maxdigits_oct))
+            return digits_type::maxdigits_oct;
+        buf.str[pos++] = '0';
+        buf.str[pos++] = 'o';
+        pos = _itoa2buf(buf, pos, digits_type::min_value_oct());
+        break;
     }
-    C4_ERROR("unknown radix");
-    return 0;
+    return pos;
 }
 template<class I>
-size_t _itoa2buf(substr buf, I radix, size_t num_digits)
+C4_NO_INLINE size_t _itoa2buf(substr buf, I radix, size_t num_digits) noexcept
 {
+    using digits_type = detail::charconv_digits<I>;
     size_t pos = 0;
-    _c4append('-');
+    size_t needed_digits = 0;
+    if(C4_LIKELY(buf.len > 0))
+        buf.str[pos++] = '-';
     switch(radix)
     {
     case I(10):
-        /*...........................*/ return _itoa2bufwithdigits(buf, pos, num_digits, itoa_min<sizeof(I)>::value_dec());
+        // add 1 to account for -
+        needed_digits = num_digits+1 > digits_type::maxdigits_dec ? num_digits+1 : digits_type::maxdigits_dec;
+        if(C4_UNLIKELY(buf.len < needed_digits))
+            return needed_digits;
+        pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_dec());
+        break;
     case I(16):
-        _c4append('0'); _c4append('x'); return _itoa2bufwithdigits(buf, pos, num_digits, itoa_min<sizeof(I)>::value_hex());
+        // add 3 to account for -0x
+        needed_digits = num_digits+3 > digits_type::maxdigits_hex ? num_digits+3 : digits_type::maxdigits_hex;
+        if(C4_UNLIKELY(buf.len < needed_digits))
+            return needed_digits;
+        buf.str[pos++] = '0';
+        buf.str[pos++] = 'x';
+        pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_hex());
+        break;
     case I( 2):
-        _c4append('0'); _c4append('b'); return _itoa2bufwithdigits(buf, pos, num_digits, itoa_min<sizeof(I)>::value_bin());
+        // add 3 to account for -0b
+        needed_digits = num_digits+3 > digits_type::maxdigits_bin ? num_digits+3 : digits_type::maxdigits_bin;
+        if(C4_UNLIKELY(buf.len < needed_digits))
+            return needed_digits;
+        C4_ASSERT(buf.len >= digits_type::maxdigits_bin);
+        buf.str[pos++] = '0';
+        buf.str[pos++] = 'b';
+        pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_bin());
+        break;
     case I( 8):
-        _c4append('0'); _c4append('o'); return _itoa2bufwithdigits(buf, pos, num_digits, itoa_min<sizeof(I)>::value_oct());
+        // add 3 to account for -0o
+        needed_digits = num_digits+3 > digits_type::maxdigits_oct ? num_digits+3 : digits_type::maxdigits_oct;
+        if(C4_UNLIKELY(buf.len < needed_digits))
+            return needed_digits;
+        C4_ASSERT(buf.len >= digits_type::maxdigits_oct);
+        buf.str[pos++] = '0';
+        buf.str[pos++] = 'o';
+        pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_oct());
+        break;
     }
-    C4_ERROR("unknown radix");
-    return 0;
+    return pos;
 }
 } // namespace detail
 
 
 /** convert an integral signed decimal to a string.
- * The resulting string is NOT zero-terminated.
- * Writing stops at the buffer's end.
- * @return the number of characters needed for the result, even if the buffer size is insufficient */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
-size_t itoa(substr buf, T v)
+C4_ALWAYS_INLINE size_t itoa(substr buf, T v) noexcept
 {
     C4_STATIC_ASSERT(std::is_signed<T>::value);
-    if(v >= 0)
+    if(v >= T(0))
     {
+        // write_dec() checks the buffer size, so no need to check here
         return write_dec(buf, v);
     }
-    else
+    // when T is the min value (eg i8: -128), negating it
+    // will overflow, so treat the min as a special case
+    else if(C4_LIKELY(v != std::numeric_limits<T>::min()))
     {
-        if(C4_LIKELY(v != std::numeric_limits<T>::min()))
+        v = -v;
+        unsigned digits = digits_dec(v);
+        if(C4_LIKELY(buf.len >= digits + 1u))
         {
-            if(C4_LIKELY(buf.len > 0))
-            {
-                buf.str[0] = '-';
-                return size_t(1) + write_dec(buf.sub(1), -v);
-            }
-            else
-            {
-                return size_t(1) + write_dec({}, -v);
-            }
-            C4_UNREACHABLE();
+            buf.str[0] = '-';
+            write_dec_unchecked(buf.sub(1), v, digits);
         }
-        else
-        {
-            // when T is the min value (eg i8: -128), negating it
-            // will overflow. so we just use the explicit value
-            return detail::_itoadec2buf<T>(buf);
-        }
-        C4_UNREACHABLE();
+        return digits + 1u;
     }
-    C4_UNREACHABLE();
+    return detail::_itoadec2buf<T>(buf);
 }
 
 /** convert an integral signed integer to a string, using a specific
  * radix. The radix must be 2, 8, 10 or 16.
  *
- * The resulting string is NOT zero-terminated.
- * Writing stops at the buffer's end.
- * @return the number of characters needed for the result, even if the buffer size is insufficient */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
-size_t itoa(substr buf, T v, T radix)
+C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix) noexcept
 {
     C4_STATIC_ASSERT(std::is_signed<T>::value);
     C4_ASSERT(radix == 2 || radix == 8 || radix == 10 || radix == 16);
+    C4_SUPPRESS_WARNING_GCC_PUSH
+    #if (defined(__GNUC__) && (__GNUC__ >= 7))
+        C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow")  // gcc has a false positive here
+    #endif
     // when T is the min value (eg i8: -128), negating it
-    // will overflow
+    // will overflow, so treat the min as a special case
     if(C4_LIKELY(v != std::numeric_limits<T>::min()))
     {
-        size_t pos = 0;
+        unsigned pos = 0;
         if(v < 0)
         {
             v = -v;
-            _c4append('-');
+            if(C4_LIKELY(buf.len > 0))
+                buf.str[pos] = '-';
+            ++pos;
         }
+        unsigned digits = 0;
         switch(radix)
         {
-        case 10:
-            /*............................*/return pos + write_dec(pos < buf.len ? buf.sub(pos) : substr(), v);
-        case 16:
-            _c4append('0'); _c4append('x'); return pos + write_hex(pos < buf.len ? buf.sub(pos) : substr(), v);
-        case 2:
-            _c4append('0'); _c4append('b'); return pos + write_bin(pos < buf.len ? buf.sub(pos) : substr(), v);
-        case 8:
-            _c4append('0'); _c4append('o'); return pos + write_oct(pos < buf.len ? buf.sub(pos) : substr(), v);
+        case T(10):
+            digits = digits_dec(v);
+            if(C4_LIKELY(buf.len >= pos + digits))
+                write_dec_unchecked(buf.sub(pos), v, digits);
+            break;
+        case T(16):
+            digits = digits_hex(v);
+            if(C4_LIKELY(buf.len >= pos + 2u + digits))
+            {
+                buf.str[pos + 0] = '0';
+                buf.str[pos + 1] = 'x';
+                write_hex_unchecked(buf.sub(pos + 2), v, digits);
+            }
+            digits += 2u;
+            break;
+        case T(2):
+            digits = digits_bin(v);
+            if(C4_LIKELY(buf.len >= pos + 2u + digits))
+            {
+                buf.str[pos + 0] = '0';
+                buf.str[pos + 1] = 'b';
+                write_bin_unchecked(buf.sub(pos + 2), v, digits);
+            }
+            digits += 2u;
+            break;
+        case T(8):
+            digits = digits_oct(v);
+            if(C4_LIKELY(buf.len >= pos + 2u + digits))
+            {
+                buf.str[pos + 0] = '0';
+                buf.str[pos + 1] = 'o';
+                write_oct_unchecked(buf.sub(pos + 2), v, digits);
+            }
+            digits += 2u;
+            break;
         }
+        return pos + digits;
     }
+    C4_SUPPRESS_WARNING_GCC_POP
     // when T is the min value (eg i8: -128), negating it
     // will overflow
     return detail::_itoa2buf<T>(buf, radix);
@@ -10474,37 +11733,77 @@ size_t itoa(substr buf, T v, T radix)
 
 
 /** same as c4::itoa(), but pad with zeroes on the left such that the
- * resulting string is @p num_digits wide. The @p radix must be 2,
- * 8, 10 or 16.  The resulting string is NOT zero-terminated.  Writing
- * stops at the buffer's end.
+ * resulting string is @p num_digits wide, not accounting for radix
+ * prefix (0x,0o,0b). The @p radix must be 2, 8, 10 or 16.
  *
- * @return the number of characters needed for the result, even if
- * the buffer size is insufficient */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
-size_t itoa(substr buf, T v, T radix, size_t num_digits)
+C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix, size_t num_digits) noexcept
 {
     C4_STATIC_ASSERT(std::is_signed<T>::value);
     C4_ASSERT(radix == 2 || radix == 8 || radix == 10 || radix == 16);
+    C4_SUPPRESS_WARNING_GCC_PUSH
+    #if (defined(__GNUC__) && (__GNUC__ >= 7))
+        C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow")  // gcc has a false positive here
+    #endif
+    // when T is the min value (eg i8: -128), negating it
+    // will overflow, so treat the min as a special case
     if(C4_LIKELY(v != std::numeric_limits<T>::min()))
     {
-        size_t pos = 0;
+        unsigned pos = 0;
         if(v < 0)
         {
             v = -v;
-            _c4append('-');
+            if(C4_LIKELY(buf.len > 0))
+                buf.str[pos] = '-';
+            ++pos;
         }
+        unsigned total_digits = 0;
         switch(radix)
         {
-        case 10:
-            /*............................*/return pos + write_dec(pos < buf.len ? buf.sub(pos) : substr(), v, num_digits);
-        case 16:
-            _c4append('0'); _c4append('x'); return pos + write_hex(pos < buf.len ? buf.sub(pos) : substr(), v, num_digits);
-        case 2:
-            _c4append('0'); _c4append('b'); return pos + write_bin(pos < buf.len ? buf.sub(pos) : substr(), v, num_digits);
-        case 8:
-            _c4append('0'); _c4append('o'); return pos + write_oct(pos < buf.len ? buf.sub(pos) : substr(), v, num_digits);
+        case T(10):
+            total_digits = digits_dec(v);
+            total_digits = pos + (unsigned)(num_digits > total_digits ? num_digits : total_digits);
+            if(C4_LIKELY(buf.len >= total_digits))
+                write_dec(buf.sub(pos), v, num_digits);
+            break;
+        case T(16):
+            total_digits = digits_hex(v);
+            total_digits = pos + 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits);
+            if(C4_LIKELY(buf.len >= total_digits))
+            {
+                buf.str[pos + 0] = '0';
+                buf.str[pos + 1] = 'x';
+                write_hex(buf.sub(pos + 2), v, num_digits);
+            }
+            break;
+        case T(2):
+            total_digits = digits_bin(v);
+            total_digits = pos + 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits);
+            if(C4_LIKELY(buf.len >= total_digits))
+            {
+                buf.str[pos + 0] = '0';
+                buf.str[pos + 1] = 'b';
+                write_bin(buf.sub(pos + 2), v, num_digits);
+            }
+            break;
+        case T(8):
+            total_digits = digits_oct(v);
+            total_digits = pos + 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits);
+            if(C4_LIKELY(buf.len >= total_digits))
+            {
+                buf.str[pos + 0] = '0';
+                buf.str[pos + 1] = 'o';
+                write_oct(buf.sub(pos + 2), v, num_digits);
+            }
+            break;
         }
+        return total_digits;
     }
+    C4_SUPPRESS_WARNING_GCC_POP
     // when T is the min value (eg i8: -128), negating it
     // will overflow
     return detail::_itoa2buf<T>(buf, radix, num_digits);
@@ -10516,67 +11815,127 @@ size_t itoa(substr buf, T v, T radix, size_t num_digits)
 //-----------------------------------------------------------------------------
 
 /** convert an integral unsigned decimal to a string.
- * The resulting string is NOT zero-terminated.
- * Writing stops at the buffer's end.
- * @return the number of characters needed for the result, even if the buffer size is insufficient */
+ *
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
-size_t utoa(substr buf, T v)
+C4_ALWAYS_INLINE size_t utoa(substr buf, T v) noexcept
 {
     C4_STATIC_ASSERT(std::is_unsigned<T>::value);
+    // write_dec() does the buffer length check, so no need to check here
     return write_dec(buf, v);
 }
 
-/** convert an integral unsigned integer to a string, using a specific radix. The radix must be 2, 8, 10 or 16.
- * The resulting string is NOT zero-terminated.
- * Writing stops at the buffer's end.
- * @return the number of characters needed for the result, even if the buffer size is insufficient */
+/** convert an integral unsigned integer to a string, using a specific
+ * radix. The radix must be 2, 8, 10 or 16.
+ *
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
-size_t utoa(substr buf, T v, T radix)
+C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix) noexcept
 {
     C4_STATIC_ASSERT(std::is_unsigned<T>::value);
     C4_ASSERT(radix == 10 || radix == 16 || radix == 2 || radix == 8);
-    size_t pos = 0;
+    unsigned digits = 0;
     switch(radix)
     {
-    case 10:
-        /*............................*/return pos + write_dec(pos < buf.len ? buf.sub(pos) : substr(), v);
-    case 16:
-        _c4append('0'); _c4append('x'); return pos + write_hex(pos < buf.len ? buf.sub(pos) : substr(), v);
-    case 2:
-        _c4append('0'); _c4append('b'); return pos + write_bin(pos < buf.len ? buf.sub(pos) : substr(), v);
-    case 8:
-        _c4append('0'); _c4append('o'); return pos + write_oct(pos < buf.len ? buf.sub(pos) : substr(), v);
+    case T(10):
+        digits = digits_dec(v);
+        if(C4_LIKELY(buf.len >= digits))
+            write_dec_unchecked(buf, v, digits);
+        break;
+    case T(16):
+        digits = digits_hex(v);
+        if(C4_LIKELY(buf.len >= digits+2u))
+        {
+            buf.str[0] = '0';
+            buf.str[1] = 'x';
+            write_hex_unchecked(buf.sub(2), v, digits);
+        }
+        digits += 2u;
+        break;
+    case T(2):
+        digits = digits_bin(v);
+        if(C4_LIKELY(buf.len >= digits+2u))
+        {
+            buf.str[0] = '0';
+            buf.str[1] = 'b';
+            write_bin_unchecked(buf.sub(2), v, digits);
+        }
+        digits += 2u;
+        break;
+    case T(8):
+        digits = digits_oct(v);
+        if(C4_LIKELY(buf.len >= digits+2u))
+        {
+            buf.str[0] = '0';
+            buf.str[1] = 'o';
+            write_oct_unchecked(buf.sub(2), v, digits);
+        }
+        digits += 2u;
+        break;
     }
-    C4_UNREACHABLE();
-    return substr::npos;
+    return digits;
 }
 
 /** same as c4::utoa(), but pad with zeroes on the left such that the
  * resulting string is @p num_digits wide. The @p radix must be 2,
- * 8, 10 or 16.  The resulting string is NOT zero-terminated.  Writing
- * stops at the buffer's end.
+ * 8, 10 or 16.
  *
- * @return the number of characters needed for the result, even if
- * the buffer size is insufficient */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
-size_t utoa(substr buf, T v, T radix, size_t num_digits)
+C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix, size_t num_digits) noexcept
 {
     C4_STATIC_ASSERT(std::is_unsigned<T>::value);
     C4_ASSERT(radix == 10 || radix == 16 || radix == 2 || radix == 8);
-    size_t pos = 0;
+    unsigned total_digits = 0;
     switch(radix)
     {
-    case 10:
-        /*............................*/return pos + write_dec(pos < buf.len ? buf.sub(pos) : substr(), v, num_digits);
-    case 16:
-        _c4append('0'); _c4append('x'); return pos + write_hex(pos < buf.len ? buf.sub(pos) : substr(), v, num_digits);
-    case 2:
-        _c4append('0'); _c4append('b'); return pos + write_bin(pos < buf.len ? buf.sub(pos) : substr(), v, num_digits);
-    case 8:
-        _c4append('0'); _c4append('o'); return pos + write_oct(pos < buf.len ? buf.sub(pos) : substr(), v, num_digits);
+    case T(10):
+        total_digits = digits_dec(v);
+        total_digits = (unsigned)(num_digits > total_digits ? num_digits : total_digits);
+        if(C4_LIKELY(buf.len >= total_digits))
+            write_dec(buf, v, num_digits);
+        break;
+    case T(16):
+        total_digits = digits_hex(v);
+        total_digits = 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits);
+        if(C4_LIKELY(buf.len >= total_digits))
+        {
+            buf.str[0] = '0';
+            buf.str[1] = 'x';
+            write_hex(buf.sub(2), v, num_digits);
+        }
+        break;
+    case T(2):
+        total_digits = digits_bin(v);
+        total_digits = 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits);
+        if(C4_LIKELY(buf.len >= total_digits))
+        {
+            buf.str[0] = '0';
+            buf.str[1] = 'b';
+            write_bin(buf.sub(2), v, num_digits);
+        }
+        break;
+    case T(8):
+        total_digits = digits_oct(v);
+        total_digits = 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits);
+        if(C4_LIKELY(buf.len >= total_digits))
+        {
+            buf.str[0] = '0';
+            buf.str[1] = 'o';
+            write_oct(buf.sub(2), v, num_digits);
+        }
+        break;
     }
-    C4_UNREACHABLE();
-    return substr::npos;
+    return total_digits;
 }
 
 
@@ -10584,12 +11943,13 @@ size_t utoa(substr buf, T v, T radix, size_t num_digits)
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-/** Convert a trimmed string to a signed integral value. The string
- * can be formatted as decimal, binary (prefix 0b or 0B), octal
+/** Convert a trimmed string to a signed integral value. The input
+ * string can be formatted as decimal, binary (prefix 0b or 0B), octal
  * (prefix 0o or 0O) or hexadecimal (prefix 0x or 0X). Strings with
- * leading zeroes are considered as decimal. Every character in the
- * input string is read for the conversion; it must not contain any
- * leading or trailing whitespace.
+ * leading zeroes are considered as decimal and not octal (unlike the
+ * C/C++ convention). Every character in the input string is read for
+ * the conversion; the input string must not contain any leading or
+ * trailing whitespace.
  *
  * @return true if the conversion was successful.
  *
@@ -10598,9 +11958,12 @@ size_t utoa(substr buf, T v, T radix, size_t num_digits)
  * which case the result will wrap around the type's range.
  * This is similar to native behavior.
  *
+ * @note a positive sign is not accepted. ie, the string must not
+ * start with '+'
+ *
  * @see atoi_first() if the string is not trimmed to the value to read. */
 template<class T>
-bool atoi(csubstr str, T * C4_RESTRICT v)
+C4_ALWAYS_INLINE bool atoi(csubstr str, T * C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     C4_STATIC_ASSERT(std::is_signed<T>::value);
@@ -10608,70 +11971,42 @@ bool atoi(csubstr str, T * C4_RESTRICT v)
     if(C4_UNLIKELY(str.len == 0))
         return false;
 
+    C4_ASSERT(str.str[0] != '+');
+
     T sign = 1;
     size_t start = 0;
     if(str.str[0] == '-')
     {
-        if(C4_UNLIKELY(str.len == 1))
+        if(C4_UNLIKELY(str.len == ++start))
             return false;
-        ++start;
         sign = -1;
     }
 
-    if(str.str[start] != '0')
+    bool parsed_ok = true;
+    if(str.str[start] != '0') // this should be the common case, so put it first
     {
-        if(C4_UNLIKELY( ! read_dec(str.sub(start), v)))
-            return false;
+        parsed_ok = read_dec(str.sub(start), v);
     }
-    else
+    else if(str.len > start + 1)
     {
-        if(str.len == start+1)
-        {
-            *v = 0; // because the first character is 0
-            return true;
-        }
+        // starts with 0: is it 0x, 0o, 0b?
+        const char pfx = str.str[start + 1];
+        if(pfx == 'x' || pfx == 'X')
+            parsed_ok = str.len > start + 2 && read_hex(str.sub(start + 2), v);
+        else if(pfx == 'b' || pfx == 'B')
+            parsed_ok = str.len > start + 2 && read_bin(str.sub(start + 2), v);
+        else if(pfx == 'o' || pfx == 'O')
+            parsed_ok = str.len > start + 2 && read_oct(str.sub(start + 2), v);
         else
-        {
-            char pfx = str.str[start+1];
-            if(pfx == 'x' || pfx == 'X') // hexadecimal
-            {
-                if(C4_UNLIKELY(str.len <= start + 2))
-                    return false;
-                if(C4_UNLIKELY( ! read_hex(str.sub(start + 2), v)))
-                    return false;
-            }
-            else if(pfx == 'b' || pfx == 'B') // binary
-            {
-                if(C4_UNLIKELY(str.len <= start + 2))
-                    return false;
-                if(C4_UNLIKELY( ! read_bin(str.sub(start + 2), v)))
-                    return false;
-            }
-            else if(pfx == 'o' || pfx == 'O') // octal
-            {
-                if(C4_UNLIKELY(str.len <= start + 2))
-                    return false;
-                if(C4_UNLIKELY( ! read_oct(str.sub(start + 2), v)))
-                    return false;
-            }
-            else
-            {
-                // we know the first character is 0
-                auto fno = str.first_not_of('0', start + 1);
-                if(fno == csubstr::npos)
-                {
-                    *v = 0;
-                    return true;
-                }
-                if(C4_UNLIKELY( ! read_dec(str.sub(fno), v)))
-                {
-                    return false;
-                }
-            }
-        }
+            parsed_ok = read_dec(str.sub(start + 1), v);
     }
-    *v *= sign;
-    return true;
+    else
+    {
+        parsed_ok = read_dec(str.sub(start), v);
+    }
+    if(C4_LIKELY(parsed_ok))
+        *v *= sign;
+    return parsed_ok;
 }
 
 
@@ -10682,7 +12017,7 @@ bool atoi(csubstr str, T * C4_RESTRICT v)
  * @see atoi() if the string is already trimmed to the value to read.
  * @see csubstr::first_int_span() */
 template<class T>
-inline size_t atoi_first(csubstr str, T * C4_RESTRICT v)
+C4_ALWAYS_INLINE size_t atoi_first(csubstr str, T * C4_RESTRICT v)
 {
     csubstr trimmed = str.first_int_span();
     if(trimmed.len == 0)
@@ -10711,60 +12046,38 @@ inline size_t atoi_first(csubstr str, T * C4_RESTRICT v)
  *
  * @see atou_first() if the string is not trimmed to the value to read. */
 template<class T>
-bool atou(csubstr str, T * C4_RESTRICT v)
+bool atou(csubstr str, T * C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
 
     if(C4_UNLIKELY(str.len == 0 || str.front() == '-'))
         return false;
 
+    bool parsed_ok = true;
     if(str.str[0] != '0')
     {
-        if(C4_UNLIKELY( ! read_dec(str, v)))
-            return false;
+        parsed_ok = read_dec(str, v);
     }
     else
     {
-        if(str.len == 1)
+        if(str.len > 1)
         {
-            *v = 0; // we know the first character is 0
-            return true;
+            const char pfx = str.str[1];
+            if(pfx == 'x' || pfx == 'X')
+                parsed_ok = str.len > 2 && read_hex(str.sub(2), v);
+            else if(pfx == 'b' || pfx == 'B')
+                parsed_ok = str.len > 2 && read_bin(str.sub(2), v);
+            else if(pfx == 'o' || pfx == 'O')
+                parsed_ok = str.len > 2 && read_oct(str.sub(2), v);
+            else
+                parsed_ok = read_dec(str, v);
         }
         else
         {
-            char pfx = str.str[1];
-            if(pfx == 'x' || pfx == 'X') // hexadecimal
-            {
-                if(C4_UNLIKELY(str.len <= 2))
-                    return false;
-                return read_hex(str.sub(2), v);
-            }
-            else if(pfx == 'b' || pfx == 'B') // binary
-            {
-                if(C4_UNLIKELY(str.len <= 2))
-                    return false;
-                return read_bin(str.sub(2), v);
-            }
-            else if(pfx == 'o' || pfx == 'O') // octal
-            {
-                if(C4_UNLIKELY(str.len <= 2))
-                    return false;
-                return read_oct(str.sub(2), v);
-            }
-            else
-            {
-                // we know the first character is 0
-                auto fno = str.first_not_of('0');
-                if(fno == csubstr::npos)
-                {
-                    *v = 0;
-                    return true;
-                }
-                return read_dec(str.sub(fno), v);
-            }
+            *v = 0; // we know the first character is 0
         }
     }
-    return true;
+    return parsed_ok;
 }
 
 
@@ -10775,7 +12088,7 @@ bool atou(csubstr str, T * C4_RESTRICT v)
  * @see atou() if the string is already trimmed to the value to read.
  * @see csubstr::first_uint_span() */
 template<class T>
-inline size_t atou_first(csubstr str, T *v)
+C4_ALWAYS_INLINE size_t atou_first(csubstr str, T *v)
 {
     csubstr trimmed = str.first_uint_span();
     if(trimmed.len == 0)
@@ -10798,21 +12111,209 @@ inline size_t atou_first(csubstr str, T *v)
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
+namespace detail {
+inline bool check_overflow(csubstr str, csubstr limit) noexcept
+{
+    if(str.len == limit.len)
+    {
+        for(size_t i = 0; i < limit.len; ++i)
+        {
+            if(str[i] < limit[i])
+                return false;
+            else if(str[i] > limit[i])
+                return true;
+        }
+        return false;
+    }
+    else
+        return str.len > limit.len;
+}
+} // namespace detail
+
+
+/** Test if the following string would overflow when converted to associated
+ * types.
+ * @return true if number will overflow, false if it fits (or doesn't parse)
+ */
+template<class T>
+auto overflows(csubstr str) noexcept
+    -> typename std::enable_if<std::is_unsigned<T>::value, bool>::type 
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+
+    if(C4_UNLIKELY(str.len == 0))
+    {
+        return false;
+    }
+    else if(str.str[0] == '0')
+    {
+        if (str.len == 1)
+            return false;
+        switch (str.str[1])
+        {
+            case 'x':
+            case 'X':
+            {
+                size_t fno = str.first_not_of('0', 2);
+                if (fno == csubstr::npos)
+                    return false;
+                return !(str.len <= fno + (sizeof(T) * 2));
+            }
+            case 'b':
+            case 'B':
+            {
+                size_t fno = str.first_not_of('0', 2);
+                if (fno == csubstr::npos)
+                    return false;
+                return !(str.len <= fno +(sizeof(T) * 8));
+            }
+            case 'o':
+            case 'O':
+            {
+                size_t fno = str.first_not_of('0', 2);
+                if(fno == csubstr::npos)
+                    return false;
+                return detail::charconv_digits<T>::is_oct_overflow(str.sub(fno));
+            }
+            default:
+            {
+                size_t fno = str.first_not_of('0', 1);
+                if(fno == csubstr::npos)
+                    return false;
+                return detail::check_overflow(str.sub(fno), detail::charconv_digits<T>::max_value_dec());
+            }
+        }
+    }
+    else if(C4_UNLIKELY(str[0] == '-'))
+    {
+        return true;
+    }
+    else
+    {
+        return detail::check_overflow(str, detail::charconv_digits<T>::max_value_dec());
+    }
+}
+
+
+/** Test if the following string would overflow when converted to associated
+ * types.
+ * @return true if number will overflow, false if it fits (or doesn't parse)
+ */
+template<class T>
+auto overflows(csubstr str)
+    -> typename std::enable_if<std::is_signed<T>::value, bool>::type 
+{
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
+    if(C4_UNLIKELY(str.len == 0))
+        return false;
+    if(str.str[0] == '-')
+    {
+        if(str.str[1] == '0')
+        {
+            if(str.len == 2)
+                return false;
+            switch(str.str[2])
+            {
+                case 'x':
+                case 'X':
+                {
+                    size_t fno = str.first_not_of('0', 3);
+                    if (fno == csubstr::npos)
+                        return false;
+                    return detail::check_overflow(str.sub(fno), detail::charconv_digits<T>::min_value_hex());
+                }
+                case 'b':
+                case 'B':
+                {
+                    size_t fno = str.first_not_of('0', 3);
+                    if (fno == csubstr::npos)
+                        return false;
+                    return detail::check_overflow(str.sub(fno), detail::charconv_digits<T>::min_value_bin());
+                }
+                case 'o':
+                case 'O':
+                {
+                    size_t fno = str.first_not_of('0', 3);
+                    if(fno == csubstr::npos)
+                        return false;
+                    return detail::check_overflow(str.sub(fno), detail::charconv_digits<T>::min_value_oct());
+                }
+                default:
+                {
+                    size_t fno = str.first_not_of('0', 2);
+                    if(fno == csubstr::npos)
+                        return false;
+                    return detail::check_overflow(str.sub(fno), detail::charconv_digits<T>::min_value_dec());
+                }
+            }
+        }
+        else
+            return detail::check_overflow(str.sub(1), detail::charconv_digits<T>::min_value_dec());
+    }
+    else if(str.str[0] == '0')
+    {
+        if (str.len == 1)
+            return false;
+        switch(str.str[1])
+        {
+            case 'x':
+            case 'X':
+            {
+                size_t fno = str.first_not_of('0', 2);
+                if (fno == csubstr::npos)
+                    return false;
+                const size_t len = str.len - fno;
+                return !((len < sizeof (T) * 2) || (len == sizeof(T) * 2 && str[fno] <= '7'));
+            }
+            case 'b':
+            case 'B':
+            {
+                size_t fno = str.first_not_of('0', 2);
+                if (fno == csubstr::npos)
+                    return false;
+                return !(str.len <= fno + (sizeof(T) * 8 - 1));
+            }
+            case 'o':
+            case 'O':
+            {
+                size_t fno = str.first_not_of('0', 2);
+                if(fno == csubstr::npos)
+                    return false;
+                return detail::charconv_digits<T>::is_oct_overflow(str.sub(fno));
+            }
+            default:
+            {
+                size_t fno = str.first_not_of('0', 1);
+                if(fno == csubstr::npos)
+                    return false;
+                return detail::check_overflow(str.sub(fno), detail::charconv_digits<T>::max_value_dec());
+            }
+        }
+    }
+    else
+        return detail::check_overflow(str, detail::charconv_digits<T>::max_value_dec());
+}
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
 namespace detail {
 
 
+#if (!C4CORE_HAVE_STD_FROMCHARS)
 /** @see http://www.exploringbinary.com/ for many good examples on float-str conversion */
 template<size_t N>
 void get_real_format_str(char (& C4_RESTRICT fmt)[N], int precision, RealFormat_e formatting, const char* length_modifier="")
 {
     int iret;
     if(precision == -1)
-        iret = snprintf(fmt, sizeof(fmt), "%%%s%c", length_modifier, to_c_fmt(formatting));
+        iret = snprintf(fmt, sizeof(fmt), "%%%s%c", length_modifier, formatting);
     else if(precision == 0)
-        iret = snprintf(fmt, sizeof(fmt), "%%.%s%c", length_modifier, to_c_fmt(formatting));
+        iret = snprintf(fmt, sizeof(fmt), "%%.%s%c", length_modifier, formatting);
     else
-        iret = snprintf(fmt, sizeof(fmt), "%%.%d%s%c", precision, length_modifier, to_c_fmt(formatting));
+        iret = snprintf(fmt, sizeof(fmt), "%%.%d%s%c", precision, length_modifier, formatting);
     C4_ASSERT(iret >= 2 && size_t(iret) < sizeof(fmt));
     C4_UNUSED(iret);
 }
@@ -10857,8 +12358,10 @@ size_t print_one(substr str, const char* full_fmt, T v)
     return ret;
 #endif
 }
+#endif // (!C4CORE_HAVE_STD_FROMCHARS)
+
 
-#if !C4CORE_HAVE_STD_FROMCHARS && !defined(C4CORE_HAVE_FAST_FLOAT)
+#if (!C4CORE_HAVE_STD_FROMCHARS) && (!C4CORE_HAVE_FAST_FLOAT)
 /** scans a string using the given type format, while at the same time
  * allowing non-null-terminated strings AND guaranteeing that the given
  * string length is strictly respected, so that no buffer overflows
@@ -10895,24 +12398,28 @@ inline size_t scan_one(csubstr str, const char *type_fmt, T *v)
     C4_ASSERT(num_chars >= 0);
     return (size_t)(num_chars);
 }
-#endif
+#endif // (!C4CORE_HAVE_STD_FROMCHARS) && (!C4CORE_HAVE_FAST_FLOAT)
 
 
 #if C4CORE_HAVE_STD_TOCHARS
 template<class T>
-size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX)
+C4_ALWAYS_INLINE size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept
 {
     std::to_chars_result result;
     size_t pos = 0;
     if(formatting == FTOA_HEXA)
     {
-        _c4append('0');
-        _c4append('x');
+        if(buf.len > size_t(2))
+        {
+            buf.str[0] = '0';
+            buf.str[1] = 'x';
+        }
+        pos += size_t(2);
     }
     if(precision == -1)
-        result = std::to_chars(buf.str + pos, buf.str + buf.len, v, to_std_fmt(formatting));
+        result = std::to_chars(buf.str + pos, buf.str + buf.len, v, (std::chars_format)formatting);
     else
-        result = std::to_chars(buf.str + pos, buf.str + buf.len, v, to_std_fmt(formatting), precision);
+        result = std::to_chars(buf.str + pos, buf.str + buf.len, v, (std::chars_format)formatting, precision);
     if(result.ec == std::errc())
     {
         // all good, no errors.
@@ -10936,6 +12443,85 @@ size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX
 }
 #endif // C4CORE_HAVE_STD_TOCHARS
 
+
+#if C4CORE_HAVE_FAST_FLOAT
+template<class T>
+C4_ALWAYS_INLINE bool scan_rhex(csubstr s, T *C4_RESTRICT val) noexcept
+{
+    C4_ASSERT(s.len > 0);
+    C4_ASSERT(s.str[0] != '-');
+    C4_ASSERT(s.str[0] != '+');
+    C4_ASSERT(!s.begins_with("0x"));
+    C4_ASSERT(!s.begins_with("0X"));
+    size_t pos = 0;
+    // integer part
+    for( ; pos < s.len; ++pos)
+    {
+        const char c = s.str[pos];
+        if(c >= '0' && c <= '9')
+            *val = *val * T(16) + T(c - '0');
+        else if(c >= 'a' && c <= 'f')
+            *val = *val * T(16) + T(c - 'a');
+        else if(c >= 'A' && c <= 'F')
+            *val = *val * T(16) + T(c - 'A');
+        else if(c == '.')
+        {
+            ++pos;
+            break; // follow on to mantissa
+        }
+        else if(c == 'p' || c == 'P')
+        {
+            ++pos;
+            goto power; // no mantissa given, jump to power
+        }
+        else
+        {
+            return false;
+        }
+    }
+    // mantissa
+    {
+        // 0.0625 == 1/16 == value of first digit after the comma
+        for(T digit = T(0.0625); pos < s.len; ++pos, digit /= T(16))
+        {
+            const char c = s.str[pos];
+            if(c >= '0' && c <= '9')
+                *val += digit * T(c - '0');
+            else if(c >= 'a' && c <= 'f')
+                *val += digit * T(c - 'a');
+            else if(c >= 'A' && c <= 'F')
+                *val += digit * T(c - 'A');
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power; // mantissa finished, jump to power
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+    return true;
+power:
+    if(C4_LIKELY(pos < s.len))
+    {
+        if(s.str[pos] == '+') // atoi() cannot handle a leading '+'
+            ++pos;
+        if(C4_LIKELY(pos < s.len))
+        {
+            int16_t powval = {};
+            if(C4_LIKELY(atoi(s.sub(pos), &powval)))
+            {
+                *val *= ipow<T, int16_t, 16>(powval);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+#endif
+
 } // namespace detail
 
 
@@ -10943,11 +12529,15 @@ size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX
 #undef _c4append
 
 
-/** Convert a single-precision real number to string.
- * The string will in general be NOT null-terminated.
- * For FTOA_FLEX, \p precision is the number of significand digits. Otherwise
- * \p precision is the number of decimals. */
-inline size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formatting=FTOA_FLEX)
+/** Convert a single-precision real number to string.  The string will
+ * in general be NOT null-terminated.  For FTOA_FLEX, \p precision is
+ * the number of significand digits. Otherwise \p precision is the
+ * number of decimals. It is safe to call this function with an empty
+ * or too-small buffer.
+ *
+ * @return the size of the buffer needed to write the number
+ */
+C4_ALWAYS_INLINE size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept
 {
 #if C4CORE_HAVE_STD_TOCHARS
     return detail::rtoa(str, v, precision, formatting);
@@ -10959,14 +12549,15 @@ inline size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formattin
 }
 
 
-/** Convert a double-precision real number to string.
- * The string will in general be NOT null-terminated.
- * For FTOA_FLEX, \p precision is the number of significand digits. Otherwise
- * \p precision is the number of decimals.
+/** Convert a double-precision real number to string.  The string will
+ * in general be NOT null-terminated.  For FTOA_FLEX, \p precision is
+ * the number of significand digits. Otherwise \p precision is the
+ * number of decimals. It is safe to call this function with an empty
+ * or too-small buffer.
  *
- * @return the number of characters written.
+ * @return the size of the buffer needed to write the number
  */
-inline size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatting=FTOA_FLEX)
+C4_ALWAYS_INLINE size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept
 {
 #if C4CORE_HAVE_STD_TOCHARS
     return detail::rtoa(str, v, precision, formatting);
@@ -10984,20 +12575,36 @@ inline size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatti
  * @return true iff the conversion succeeded
  * @see atof_first() if the string is not trimmed
  */
-inline bool atof(csubstr str, float * C4_RESTRICT v)
+C4_ALWAYS_INLINE bool atof(csubstr str, float * C4_RESTRICT v) noexcept
 {
+    C4_ASSERT(str.len > 0);
     C4_ASSERT(str.triml(" \r\t\n").len == str.len);
 #if C4CORE_HAVE_FAST_FLOAT
-    fast_float::from_chars_result result;
-    result = fast_float::from_chars(str.str, str.str + str.len, *v);
-    return result.ec == std::errc();
+    // fastfloat cannot parse hexadecimal floats
+    bool isneg = (str.str[0] == '-');
+    csubstr rem = str.sub(isneg || str.str[0] == '+');
+    if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X'))))
+    {
+        fast_float::from_chars_result result;
+        result = fast_float::from_chars(str.str, str.str + str.len, *v);
+        return result.ec == std::errc();
+    }
+    else if(detail::scan_rhex(rem.sub(2), v))
+    {
+        *v *= isneg ? -1.f : 1.f;
+        return true;
+    }
+    return false;
 #elif C4CORE_HAVE_STD_FROMCHARS
     std::from_chars_result result;
     result = std::from_chars(str.str, str.str + str.len, *v);
     return result.ec == std::errc();
 #else
-    size_t ret = detail::scan_one(str, "f", v);
-    return ret != csubstr::npos;
+    csubstr rem = str.sub(str.str[0] == '-' || str.str[0] == '+');
+    if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X'))))
+        return detail::scan_one(str, "f", v) != csubstr::npos;
+    else
+        return detail::scan_one(str, "a", v) != csubstr::npos;
 #endif
 }
 
@@ -11008,20 +12615,35 @@ inline bool atof(csubstr str, float * C4_RESTRICT v)
  * @return true iff the conversion succeeded
  * @see atod_first() if the string is not trimmed
  */
-inline bool atod(csubstr str, double * C4_RESTRICT v)
+C4_ALWAYS_INLINE bool atod(csubstr str, double * C4_RESTRICT v) noexcept
 {
     C4_ASSERT(str.triml(" \r\t\n").len == str.len);
 #if C4CORE_HAVE_FAST_FLOAT
-    fast_float::from_chars_result result;
-    result = fast_float::from_chars(str.str, str.str + str.len, *v);
-    return result.ec == std::errc();
+    // fastfloat cannot parse hexadecimal floats
+    bool isneg = (str.str[0] == '-');
+    csubstr rem = str.sub(isneg || str.str[0] == '+');
+    if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X'))))
+    {
+        fast_float::from_chars_result result;
+        result = fast_float::from_chars(str.str, str.str + str.len, *v);
+        return result.ec == std::errc();
+    }
+    else if(detail::scan_rhex(rem.sub(2), v))
+    {
+        *v *= isneg ? -1. : 1.;
+        return true;
+    }
+    return false;
 #elif C4CORE_HAVE_STD_FROMCHARS
     std::from_chars_result result;
     result = std::from_chars(str.str, str.str + str.len, *v);
     return result.ec == std::errc();
 #else
-    size_t ret = detail::scan_one(str, "lf", v);
-    return ret != csubstr::npos;
+    csubstr rem = str.sub(str.str[0] == '-' || str.str[0] == '+');
+    if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X'))))
+        return detail::scan_one(str, "lf", v) != csubstr::npos;
+    else
+        return detail::scan_one(str, "la", v) != csubstr::npos;
 #endif
 }
 
@@ -11030,7 +12652,7 @@ inline bool atod(csubstr str, double * C4_RESTRICT v)
  * Leading whitespace is skipped until valid characters are found.
  * @return the number of characters read from the string, or npos if
  * conversion was not successful or if the string was empty */
-inline size_t atof_first(csubstr str, float * C4_RESTRICT v)
+inline size_t atof_first(csubstr str, float * C4_RESTRICT v) noexcept
 {
     csubstr trimmed = str.first_real_span();
     if(trimmed.len == 0)
@@ -11045,7 +12667,7 @@ inline size_t atof_first(csubstr str, float * C4_RESTRICT v)
  * Leading whitespace is skipped until valid characters are found.
  * @return the number of characters read from the string, or npos if
  * conversion was not successful or if the string was empty */
-inline size_t atod_first(csubstr str, double * C4_RESTRICT v)
+inline size_t atod_first(csubstr str, double * C4_RESTRICT v) noexcept
 {
     csubstr trimmed = str.first_real_span();
     if(trimmed.len == 0)
@@ -11061,60 +12683,81 @@ inline size_t atod_first(csubstr str, double * C4_RESTRICT v)
 //-----------------------------------------------------------------------------
 // generic versions
 
-C4_ALWAYS_INLINE size_t xtoa(substr s,  uint8_t v) { return utoa(s, v); }
-C4_ALWAYS_INLINE size_t xtoa(substr s, uint16_t v) { return utoa(s, v); }
-C4_ALWAYS_INLINE size_t xtoa(substr s, uint32_t v) { return utoa(s, v); }
-C4_ALWAYS_INLINE size_t xtoa(substr s, uint64_t v) { return utoa(s, v); }
-C4_ALWAYS_INLINE size_t xtoa(substr s,   int8_t v) { return itoa(s, v); }
-C4_ALWAYS_INLINE size_t xtoa(substr s,  int16_t v) { return itoa(s, v); }
-C4_ALWAYS_INLINE size_t xtoa(substr s,  int32_t v) { return itoa(s, v); }
-C4_ALWAYS_INLINE size_t xtoa(substr s,  int64_t v) { return itoa(s, v); }
-C4_ALWAYS_INLINE size_t xtoa(substr s,    float v) { return ftoa(s, v); }
-C4_ALWAYS_INLINE size_t xtoa(substr s,   double v) { return dtoa(s, v); }
-
-C4_ALWAYS_INLINE bool atox(csubstr s,  uint8_t *C4_RESTRICT v) { return atou(s, v); }
-C4_ALWAYS_INLINE bool atox(csubstr s, uint16_t *C4_RESTRICT v) { return atou(s, v); }
-C4_ALWAYS_INLINE bool atox(csubstr s, uint32_t *C4_RESTRICT v) { return atou(s, v); }
-C4_ALWAYS_INLINE bool atox(csubstr s, uint64_t *C4_RESTRICT v) { return atou(s, v); }
-C4_ALWAYS_INLINE bool atox(csubstr s,   int8_t *C4_RESTRICT v) { return atoi(s, v); }
-C4_ALWAYS_INLINE bool atox(csubstr s,  int16_t *C4_RESTRICT v) { return atoi(s, v); }
-C4_ALWAYS_INLINE bool atox(csubstr s,  int32_t *C4_RESTRICT v) { return atoi(s, v); }
-C4_ALWAYS_INLINE bool atox(csubstr s,  int64_t *C4_RESTRICT v) { return atoi(s, v); }
-C4_ALWAYS_INLINE bool atox(csubstr s,    float *C4_RESTRICT v) { return atof(s, v); }
-C4_ALWAYS_INLINE bool atox(csubstr s,   double *C4_RESTRICT v) { return atod(s, v); }
-
-C4_ALWAYS_INLINE size_t to_chars(substr buf,  uint8_t v) { return utoa(buf, v); }
-C4_ALWAYS_INLINE size_t to_chars(substr buf, uint16_t v) { return utoa(buf, v); }
-C4_ALWAYS_INLINE size_t to_chars(substr buf, uint32_t v) { return utoa(buf, v); }
-C4_ALWAYS_INLINE size_t to_chars(substr buf, uint64_t v) { return utoa(buf, v); }
-C4_ALWAYS_INLINE size_t to_chars(substr buf,   int8_t v) { return itoa(buf, v); }
-C4_ALWAYS_INLINE size_t to_chars(substr buf,  int16_t v) { return itoa(buf, v); }
-C4_ALWAYS_INLINE size_t to_chars(substr buf,  int32_t v) { return itoa(buf, v); }
-C4_ALWAYS_INLINE size_t to_chars(substr buf,  int64_t v) { return itoa(buf, v); }
-C4_ALWAYS_INLINE size_t to_chars(substr buf,    float v) { return ftoa(buf, v); }
-C4_ALWAYS_INLINE size_t to_chars(substr buf,   double v) { return dtoa(buf, v); }
-
-C4_ALWAYS_INLINE bool from_chars(csubstr buf,  uint8_t *C4_RESTRICT v) { return atou(buf, v); }
-C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint16_t *C4_RESTRICT v) { return atou(buf, v); }
-C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint32_t *C4_RESTRICT v) { return atou(buf, v); }
-C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint64_t *C4_RESTRICT v) { return atou(buf, v); }
-C4_ALWAYS_INLINE bool from_chars(csubstr buf,   int8_t *C4_RESTRICT v) { return atoi(buf, v); }
-C4_ALWAYS_INLINE bool from_chars(csubstr buf,  int16_t *C4_RESTRICT v) { return atoi(buf, v); }
-C4_ALWAYS_INLINE bool from_chars(csubstr buf,  int32_t *C4_RESTRICT v) { return atoi(buf, v); }
-C4_ALWAYS_INLINE bool from_chars(csubstr buf,  int64_t *C4_RESTRICT v) { return atoi(buf, v); }
-C4_ALWAYS_INLINE bool from_chars(csubstr buf,    float *C4_RESTRICT v) { return atof(buf, v); }
-C4_ALWAYS_INLINE bool from_chars(csubstr buf,   double *C4_RESTRICT v) { return atod(buf, v); }
-
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  uint8_t *C4_RESTRICT v) { return atou_first(buf, v); }
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint16_t *C4_RESTRICT v) { return atou_first(buf, v); }
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint32_t *C4_RESTRICT v) { return atou_first(buf, v); }
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint64_t *C4_RESTRICT v) { return atou_first(buf, v); }
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,   int8_t *C4_RESTRICT v) { return atoi_first(buf, v); }
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  int16_t *C4_RESTRICT v) { return atoi_first(buf, v); }
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  int32_t *C4_RESTRICT v) { return atoi_first(buf, v); }
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  int64_t *C4_RESTRICT v) { return atoi_first(buf, v); }
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,    float *C4_RESTRICT v) { return atof_first(buf, v); }
-C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,   double *C4_RESTRICT v) { return atod_first(buf, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  uint8_t v) noexcept { return write_dec(s, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, uint16_t v) noexcept { return write_dec(s, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, uint32_t v) noexcept { return write_dec(s, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, uint64_t v) noexcept { return write_dec(s, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,   int8_t v) noexcept { return itoa(s, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  int16_t v) noexcept { return itoa(s, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  int32_t v) noexcept { return itoa(s, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  int64_t v) noexcept { return itoa(s, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,    float v) noexcept { return ftoa(s, v); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,   double v) noexcept { return dtoa(s, v); }
+
+C4_ALWAYS_INLINE size_t xtoa(substr s,  uint8_t v,  uint8_t radix) noexcept { return utoa(s, v, radix); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, uint16_t v, uint16_t radix) noexcept { return utoa(s, v, radix); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, uint32_t v, uint32_t radix) noexcept { return utoa(s, v, radix); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, uint64_t v, uint64_t radix) noexcept { return utoa(s, v, radix); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,   int8_t v,   int8_t radix) noexcept { return itoa(s, v, radix); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  int16_t v,  int16_t radix) noexcept { return itoa(s, v, radix); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  int32_t v,  int32_t radix) noexcept { return itoa(s, v, radix); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  int64_t v,  int64_t radix) noexcept { return itoa(s, v, radix); }
+
+C4_ALWAYS_INLINE size_t xtoa(substr s,  uint8_t v,  uint8_t radix, size_t num_digits) noexcept { return utoa(s, v, radix, num_digits); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, uint16_t v, uint16_t radix, size_t num_digits) noexcept { return utoa(s, v, radix, num_digits); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, uint32_t v, uint32_t radix, size_t num_digits) noexcept { return utoa(s, v, radix, num_digits); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, uint64_t v, uint64_t radix, size_t num_digits) noexcept { return utoa(s, v, radix, num_digits); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,   int8_t v,   int8_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  int16_t v,  int16_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  int32_t v,  int32_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); }
+C4_ALWAYS_INLINE size_t xtoa(substr s,  int64_t v,  int64_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); }
+
+C4_ALWAYS_INLINE size_t xtoa(substr s,  float v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return ftoa(s, v, precision, formatting); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, double v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return dtoa(s, v, precision, formatting); }
+
+C4_ALWAYS_INLINE bool atox(csubstr s,  uint8_t *C4_RESTRICT v) noexcept { return atou(s, v); }
+C4_ALWAYS_INLINE bool atox(csubstr s, uint16_t *C4_RESTRICT v) noexcept { return atou(s, v); }
+C4_ALWAYS_INLINE bool atox(csubstr s, uint32_t *C4_RESTRICT v) noexcept { return atou(s, v); }
+C4_ALWAYS_INLINE bool atox(csubstr s, uint64_t *C4_RESTRICT v) noexcept { return atou(s, v); }
+C4_ALWAYS_INLINE bool atox(csubstr s,   int8_t *C4_RESTRICT v) noexcept { return atoi(s, v); }
+C4_ALWAYS_INLINE bool atox(csubstr s,  int16_t *C4_RESTRICT v) noexcept { return atoi(s, v); }
+C4_ALWAYS_INLINE bool atox(csubstr s,  int32_t *C4_RESTRICT v) noexcept { return atoi(s, v); }
+C4_ALWAYS_INLINE bool atox(csubstr s,  int64_t *C4_RESTRICT v) noexcept { return atoi(s, v); }
+C4_ALWAYS_INLINE bool atox(csubstr s,    float *C4_RESTRICT v) noexcept { return atof(s, v); }
+C4_ALWAYS_INLINE bool atox(csubstr s,   double *C4_RESTRICT v) noexcept { return atod(s, v); }
+
+C4_ALWAYS_INLINE size_t to_chars(substr buf,  uint8_t v) noexcept { return write_dec(buf, v); }
+C4_ALWAYS_INLINE size_t to_chars(substr buf, uint16_t v) noexcept { return write_dec(buf, v); }
+C4_ALWAYS_INLINE size_t to_chars(substr buf, uint32_t v) noexcept { return write_dec(buf, v); }
+C4_ALWAYS_INLINE size_t to_chars(substr buf, uint64_t v) noexcept { return write_dec(buf, v); }
+C4_ALWAYS_INLINE size_t to_chars(substr buf,   int8_t v) noexcept { return itoa(buf, v); }
+C4_ALWAYS_INLINE size_t to_chars(substr buf,  int16_t v) noexcept { return itoa(buf, v); }
+C4_ALWAYS_INLINE size_t to_chars(substr buf,  int32_t v) noexcept { return itoa(buf, v); }
+C4_ALWAYS_INLINE size_t to_chars(substr buf,  int64_t v) noexcept { return itoa(buf, v); }
+C4_ALWAYS_INLINE size_t to_chars(substr buf,    float v) noexcept { return ftoa(buf, v); }
+C4_ALWAYS_INLINE size_t to_chars(substr buf,   double v) noexcept { return dtoa(buf, v); }
+
+C4_ALWAYS_INLINE bool from_chars(csubstr buf,  uint8_t *C4_RESTRICT v) noexcept { return atou(buf, v); }
+C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint16_t *C4_RESTRICT v) noexcept { return atou(buf, v); }
+C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint32_t *C4_RESTRICT v) noexcept { return atou(buf, v); }
+C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint64_t *C4_RESTRICT v) noexcept { return atou(buf, v); }
+C4_ALWAYS_INLINE bool from_chars(csubstr buf,   int8_t *C4_RESTRICT v) noexcept { return atoi(buf, v); }
+C4_ALWAYS_INLINE bool from_chars(csubstr buf,  int16_t *C4_RESTRICT v) noexcept { return atoi(buf, v); }
+C4_ALWAYS_INLINE bool from_chars(csubstr buf,  int32_t *C4_RESTRICT v) noexcept { return atoi(buf, v); }
+C4_ALWAYS_INLINE bool from_chars(csubstr buf,  int64_t *C4_RESTRICT v) noexcept { return atoi(buf, v); }
+C4_ALWAYS_INLINE bool from_chars(csubstr buf,    float *C4_RESTRICT v) noexcept { return atof(buf, v); }
+C4_ALWAYS_INLINE bool from_chars(csubstr buf,   double *C4_RESTRICT v) noexcept { return atod(buf, v); }
+
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  uint8_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); }
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint16_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); }
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint32_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); }
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint64_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); }
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,   int8_t *C4_RESTRICT v) noexcept { return atoi_first(buf, v); }
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  int16_t *C4_RESTRICT v) noexcept { return atoi_first(buf, v); }
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  int32_t *C4_RESTRICT v) noexcept { return atoi_first(buf, v); }
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  int64_t *C4_RESTRICT v) noexcept { return atoi_first(buf, v); }
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,    float *C4_RESTRICT v) noexcept { return atof_first(buf, v); }
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,   double *C4_RESTRICT v) noexcept { return atod_first(buf, v); }
 
 
 //-----------------------------------------------------------------------------
@@ -11124,20 +12767,20 @@ C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,   double *C4_RESTRICT v) {
 #define _C4_IF_NOT_FIXED_LENGTH_I(T, ty) C4_ALWAYS_INLINE typename std::enable_if<std::  is_signed<T>::value && !is_fixed_length<T>::value_i, ty>
 #define _C4_IF_NOT_FIXED_LENGTH_U(T, ty) C4_ALWAYS_INLINE typename std::enable_if<std::is_unsigned<T>::value && !is_fixed_length<T>::value_u, ty>
 
-template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type xtoa(substr buf, T v) { return itoa(buf, v); }
-template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type xtoa(substr buf, T v) { return utoa(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type xtoa(substr buf, T v) noexcept { return itoa(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type xtoa(substr buf, T v) noexcept { return write_dec(buf, v); }
 
-template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, bool  )::type atox(csubstr buf, T *C4_RESTRICT v) { return atoi(buf, v); }
-template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, bool  )::type atox(csubstr buf, T *C4_RESTRICT v) { return atou(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, bool  )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, bool  )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); }
 
-template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type to_chars(substr buf, T v) { return itoa(buf, v); }
-template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type to_chars(substr buf, T v) { return utoa(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type to_chars(substr buf, T v) noexcept { return itoa(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type to_chars(substr buf, T v) noexcept { return write_dec(buf, v); }
 
-template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, bool  )::type from_chars(csubstr buf, T *C4_RESTRICT v) { return atoi(buf, v); }
-template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, bool  )::type from_chars(csubstr buf, T *C4_RESTRICT v) { return atou(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, bool  )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, bool  )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); }
 
-template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) { return atoi_first(buf, v); }
-template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) { return atou_first(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi_first(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) noexcept { return atou_first(buf, v); }
 
 #undef _C4_IF_NOT_FIXED_LENGTH_I
 #undef _C4_IF_NOT_FIXED_LENGTH_U
@@ -11146,11 +12789,11 @@ template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type from_chars_first(c
 //-----------------------------------------------------------------------------
 // for pointers
 
-template <class T> C4_ALWAYS_INLINE size_t xtoa(substr s, T *v) { return itoa(s, (intptr_t)v, (intptr_t)16); }
-template <class T> C4_ALWAYS_INLINE bool   atox(csubstr s, T **v) { intptr_t tmp; bool ret = atox(s, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
-template <class T> C4_ALWAYS_INLINE size_t to_chars(substr s, T *v) { return itoa(s, (intptr_t)v, (intptr_t)16); }
-template <class T> C4_ALWAYS_INLINE bool   from_chars(csubstr buf, T **v) { intptr_t tmp; bool ret = from_chars(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
-template <class T> C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v) { intptr_t tmp; bool ret = from_chars_first(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
+template <class T> C4_ALWAYS_INLINE size_t xtoa(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); }
+template <class T> C4_ALWAYS_INLINE bool   atox(csubstr s, T **v) noexcept { intptr_t tmp; bool ret = atox(s, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
+template <class T> C4_ALWAYS_INLINE size_t to_chars(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); }
+template <class T> C4_ALWAYS_INLINE bool   from_chars(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
+template <class T> C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars_first(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
 
 
 //-----------------------------------------------------------------------------
@@ -11162,7 +12805,7 @@ template <class T> C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v)
  *
  * @see to_chars() */
 template<class T>
-inline substr to_chars_sub(substr buf, T const& C4_RESTRICT v)
+C4_ALWAYS_INLINE substr to_chars_sub(substr buf, T const& C4_RESTRICT v) noexcept
 {
     size_t sz = to_chars(buf, v);
     return buf.left_of(sz <= buf.len ? sz : buf.len);
@@ -11173,13 +12816,13 @@ inline substr to_chars_sub(substr buf, T const& C4_RESTRICT v)
 //-----------------------------------------------------------------------------
 // bool implementation
 
-inline size_t to_chars(substr buf, bool v)
+C4_ALWAYS_INLINE size_t to_chars(substr buf, bool v) noexcept
 {
     int val = v;
     return to_chars(buf, val);
 }
 
-inline bool from_chars(csubstr buf, bool * C4_RESTRICT v)
+inline bool from_chars(csubstr buf, bool * C4_RESTRICT v) noexcept
 {
     if(buf == '0')
     {
@@ -11223,7 +12866,7 @@ inline bool from_chars(csubstr buf, bool * C4_RESTRICT v)
     return ret;
 }
 
-inline size_t from_chars_first(csubstr buf, bool * C4_RESTRICT v)
+inline size_t from_chars_first(csubstr buf, bool * C4_RESTRICT v) noexcept
 {
     csubstr trimmed = buf.first_non_empty_span();
     if(trimmed.len == 0 || !from_chars(buf, v))
@@ -11235,7 +12878,7 @@ inline size_t from_chars_first(csubstr buf, bool * C4_RESTRICT v)
 //-----------------------------------------------------------------------------
 // single-char implementation
 
-inline size_t to_chars(substr buf, char v)
+inline size_t to_chars(substr buf, char v) noexcept
 {
     if(buf.len > 0)
         buf[0] = v;
@@ -11244,7 +12887,7 @@ inline size_t to_chars(substr buf, char v)
 
 /** extract a single character from a substring
  * @note to extract a string instead and not just a single character, use the csubstr overload */
-inline bool from_chars(csubstr buf, char * C4_RESTRICT v)
+inline bool from_chars(csubstr buf, char * C4_RESTRICT v) noexcept
 {
     if(buf.len != 1)
         return false;
@@ -11252,7 +12895,7 @@ inline bool from_chars(csubstr buf, char * C4_RESTRICT v)
     return true;
 }
 
-inline size_t from_chars_first(csubstr buf, char * C4_RESTRICT v)
+inline size_t from_chars_first(csubstr buf, char * C4_RESTRICT v) noexcept
 {
     if(buf.len < 1)
         return csubstr::npos;
@@ -11264,21 +12907,29 @@ inline size_t from_chars_first(csubstr buf, char * C4_RESTRICT v)
 //-----------------------------------------------------------------------------
 // csubstr implementation
 
-inline size_t to_chars(substr buf, csubstr v)
+inline size_t to_chars(substr buf, csubstr v) noexcept
 {
     C4_ASSERT(!buf.overlaps(v));
     size_t len = buf.len < v.len ? buf.len : v.len;
-    memcpy(buf.str, v.str, len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len)
+    {
+        C4_ASSERT(buf.str != nullptr);
+        C4_ASSERT(v.str != nullptr);
+        memcpy(buf.str, v.str, len);
+    }
     return v.len;
 }
 
-inline bool from_chars(csubstr buf, csubstr *C4_RESTRICT v)
+inline bool from_chars(csubstr buf, csubstr *C4_RESTRICT v) noexcept
 {
     *v = buf;
     return true;
 }
 
-inline size_t from_chars_first(substr buf, csubstr * C4_RESTRICT v)
+inline size_t from_chars_first(substr buf, csubstr * C4_RESTRICT v) noexcept
 {
     csubstr trimmed = buf.first_non_empty_span();
     if(trimmed.len == 0)
@@ -11291,35 +12942,59 @@ inline size_t from_chars_first(substr buf, csubstr * C4_RESTRICT v)
 //-----------------------------------------------------------------------------
 // substr
 
-inline size_t to_chars(substr buf, substr v)
+inline size_t to_chars(substr buf, substr v) noexcept
 {
     C4_ASSERT(!buf.overlaps(v));
     size_t len = buf.len < v.len ? buf.len : v.len;
-    memcpy(buf.str, v.str, len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len)
+    {
+        C4_ASSERT(buf.str != nullptr);
+        C4_ASSERT(v.str != nullptr);
+        memcpy(buf.str, v.str, len);
+    }
     return v.len;
 }
 
-inline bool from_chars(csubstr buf, substr * C4_RESTRICT v)
+inline bool from_chars(csubstr buf, substr * C4_RESTRICT v) noexcept
 {
     C4_ASSERT(!buf.overlaps(*v));
-    if(buf.len <= v->len)
+    // is the destination buffer wide enough?
+    if(v->len >= buf.len)
     {
-        memcpy(v->str, buf.str, buf.len);
+        // calling memcpy with null strings is undefined behavior
+        // and will wreak havoc in calling code's branches.
+        // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+        if(buf.len)
+        {
+            C4_ASSERT(buf.str != nullptr);
+            C4_ASSERT(v->str != nullptr);
+            memcpy(v->str, buf.str, buf.len);
+        }
         v->len = buf.len;
         return true;
     }
-    memcpy(v->str, buf.str, v->len);
     return false;
 }
 
-inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v)
+inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v) noexcept
 {
     csubstr trimmed = buf.first_non_empty_span();
     C4_ASSERT(!trimmed.overlaps(*v));
     if(C4_UNLIKELY(trimmed.len == 0))
         return csubstr::npos;
     size_t len = trimmed.len > v->len ? v->len : trimmed.len;
-    memcpy(v->str, trimmed.str, len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len)
+    {
+        C4_ASSERT(buf.str != nullptr);
+        C4_ASSERT(v->str != nullptr);
+        memcpy(v->str, trimmed.str, len);
+    }
     if(C4_UNLIKELY(trimmed.len > v->len))
         return csubstr::npos;
     return static_cast<size_t>(trimmed.end() - buf.begin());
@@ -11329,13 +13004,13 @@ inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v)
 //-----------------------------------------------------------------------------
 
 template<size_t N>
-inline size_t to_chars(substr buf, const char (& C4_RESTRICT v)[N])
+inline size_t to_chars(substr buf, const char (& C4_RESTRICT v)[N]) noexcept
 {
     csubstr sp(v);
     return to_chars(buf, sp);
 }
 
-inline size_t to_chars(substr buf, const char * C4_RESTRICT v)
+inline size_t to_chars(substr buf, const char * C4_RESTRICT v) noexcept
 {
     return to_chars(buf, to_csubstr(v));
 }
@@ -11624,15 +13299,28 @@ inline integral_<intptr_t> bin(std::nullptr_t)
     return integral_<intptr_t>(intptr_t(0), intptr_t(2));
 }
 /** format the integral_ argument as a binary 0-1 value
- * @see c4::raw() if you want to use a binary memcpy instead of 0-1 formatting */
+ * @see c4::raw() if you want to use a raw memcpy-based binary dump instead of 0-1 formatting */
 template<class T>
 inline integral_<T> bin(T v)
 {
     return integral_<T>(v, T(2));
 }
 
-} // namespace fmt
 
+template<class T>
+struct overflow_checked_
+{
+    static_assert(std::is_integral<T>::value, "range checking only for integral types");
+    C4_ALWAYS_INLINE overflow_checked_(T &val_) : val(&val_) {}
+    T *val;
+};
+template<class T>
+C4_ALWAYS_INLINE overflow_checked_<T> overflow_checked(T &val)
+{
+   return overflow_checked_<T>(val);
+}
+
+} // namespace fmt
 
 /** format an integral_ signed type */
 template<typename T>
@@ -11668,6 +13356,14 @@ to_chars(substr buf, fmt::integral_padded_<T> fmt)
     return utoa(buf, fmt.val, fmt.radix, fmt.num_digits);
 }
 
+template<class T>
+C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_<T> wrapper)
+{
+    if(C4_LIKELY(!overflows<T>(s)))
+        return atox(s, wrapper.val);
+    return false;
+}
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -14400,18 +16096,32 @@ namespace c4 {
 
 //-----------------------------------------------------------------------------
 
-/** get a writeable view to an existing std::string */
-inline c4::substr to_substr(std::string &s)
+/** get a writeable view to an existing std::string.
+ * When the string is empty, the returned view will be pointing
+ * at the character with value '\0', but the size will be zero.
+ * @see https://en.cppreference.com/w/cpp/string/basic_string/operator_at
+ */
+C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept
 {
-    char* data = ! s.empty() ? &s[0] : nullptr;
-    return c4::substr(data, s.size());
+    #if C4_CPP < 11
+    #error this function will do undefined behavior
+    #endif
+    // since c++11 it is legal to call s[s.size()].
+    return c4::substr(&s[0], s.size());
 }
 
-/** get a readonly view to an existing std::string */
-inline c4::csubstr to_csubstr(std::string const& s)
+/** get a readonly view to an existing std::string.
+ * When the string is empty, the returned view will be pointing
+ * at the character with value '\0', but the size will be zero.
+ * @see https://en.cppreference.com/w/cpp/string/basic_string/operator_at
+ */
+C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string const& s) noexcept
 {
-    const char* data = ! s.empty() ? &s[0] : nullptr;
-    return c4::csubstr(data, s.size());
+    #if C4_CPP < 11
+    #error this function will do undefined behavior
+    #endif
+    // since c++11 it is legal to call s[s.size()].
+    return c4::csubstr(&s[0], s.size());
 }
 
 //-----------------------------------------------------------------------------
@@ -14437,7 +16147,15 @@ inline size_t to_chars(c4::substr buf, std::string const& s)
 {
     C4_ASSERT(!buf.overlaps(to_csubstr(s)));
     size_t len = buf.len < s.size() ? buf.len : s.size();
-    memcpy(buf.str, s.data(), len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len)
+    {
+        C4_ASSERT(s.data() != nullptr);
+        C4_ASSERT(buf.str != nullptr);
+        memcpy(buf.str, s.data(), len);
+    }
     return s.size(); // return the number of needed chars
 }
 
@@ -14446,7 +16164,14 @@ inline bool from_chars(c4::csubstr buf, std::string * s)
 {
     s->resize(buf.len);
     C4_ASSERT(!buf.overlaps(to_csubstr(*s)));
-    memcpy(&(*s)[0], buf.str, buf.len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(buf.len)
+    {
+        C4_ASSERT(buf.str != nullptr);
+        memcpy(&(*s)[0], buf.str, buf.len);
+    }
     return true;
 }
 
@@ -14531,7 +16256,13 @@ inline size_t to_chars(c4::substr buf, std::vector<char, Alloc> const& s)
 {
     C4_ASSERT(!buf.overlaps(to_csubstr(s)));
     size_t len = buf.len < s.size() ? buf.len : s.size();
-    memcpy(buf.str, s.data(), len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len > 0)
+    {
+        memcpy(buf.str, s.data(), len);
+    }
     return s.size(); // return the number of needed chars
 }
 
@@ -14541,7 +16272,13 @@ inline bool from_chars(c4::csubstr buf, std::vector<char, Alloc> * s)
 {
     s->resize(buf.len);
     C4_ASSERT(!buf.overlaps(to_csubstr(*s)));
-    memcpy(&(*s)[0], buf.str, buf.len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(buf.len > 0)
+    {
+        memcpy(&(*s)[0], buf.str, buf.len);
+    }
     return true;
 }
 
@@ -14791,11 +16528,13 @@ public:
     friend bool operator!=(splitmix const &, splitmix const &);
 
     splitmix() : m_seed(1) {}
+    explicit splitmix(uint64_t s) : m_seed(s) {}
     explicit splitmix(std::random_device &rd)
     {
         seed(rd);
     }
 
+    void seed(uint64_t s) { m_seed = s; }
     void seed(std::random_device &rd)
     {
         m_seed = uint64_t(rd()) << 31 | uint64_t(rd());
@@ -14848,6 +16587,7 @@ public:
         seed(rd);
     }
 
+    void seed(uint64_t s) { m_seed = s; }
     void seed(std::random_device &rd)
     {
         m_seed = uint64_t(rd()) << 31 | uint64_t(rd());
@@ -14899,11 +16639,13 @@ public:
         : m_state(0x853c49e6748fea9bULL)
         , m_inc(0xda3e39cb94b95bdbULL)
     {}
+    explicit pcg(uint64_t s) { m_state = s; m_inc = m_state << 1; }
     explicit pcg(std::random_device &rd)
     {
         seed(rd);
     }
 
+    void seed(uint64_t s) { m_state = s; }
     void seed(std::random_device &rd)
     {
         uint64_t s0 = uint64_t(rd()) << 31 | uint64_t(rd());
@@ -15462,27 +17204,6 @@ bool from_chars(csubstr buf, fmt::raw_wrapper *r)
 
 namespace c4 {
 
-/** returns true if the memory overlaps */
-bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb)
-{
-    if(a < b)
-    {
-        if(size_t(a) + sza > size_t(b))
-            return true;
-    }
-    else if(a > b)
-    {
-        if(size_t(b) + szb > size_t(a))
-            return true;
-    }
-    else if(a == b)
-    {
-        if(sza != 0 && szb != 0)
-            return true;
-    }
-    return false;
-}
-
 /** Fills 'dest' with the first 'pattern_size' bytes at 'pattern', 'num_times'. */
 void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num_times)
 {
@@ -15984,6 +17705,7 @@ substr decode_code_point(substr out, csubstr code_point)
     C4_ASSERT(!code_point.begins_with("\\U"));
     C4_ASSERT(!code_point.begins_with('0'));
     C4_ASSERT(code_point.len <= 8);
+    C4_ASSERT(code_point.len > 0);
     uint32_t code_point_val;
     C4_CHECK(read_hex(code_point, &code_point_val));
     size_t ret = decode_code_point((uint8_t*)out.str, out.len, code_point_val);
@@ -16787,11 +18509,24 @@ bool is_debugger_attached()
 #endif
 
 
+#if defined(NDEBUG) || defined(C4_NO_DEBUG_BREAK)
+#   define RYML_DEBUG_BREAK()
+#else
+#   define RYML_DEBUG_BREAK()                               \
+    {                                                       \
+        if(c4::get_error_flags() & c4::ON_ERROR_DEBUGBREAK) \
+        {                                                   \
+            C4_DEBUG_BREAK();                               \
+        }                                                   \
+    }
+#endif
+
+
 #define RYML_CHECK(cond)                                                \
     do {                                                                \
         if(!(cond))                                                     \
         {                                                               \
-            C4_DEBUG_BREAK();                                           \
+            RYML_DEBUG_BREAK()                                          \
             c4::yml::error("check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \
         }                                                               \
     } while(0)
@@ -16801,7 +18536,7 @@ bool is_debugger_attached()
     {                                                                   \
         if(!(cond))                                                     \
         {                                                               \
-            C4_DEBUG_BREAK();                                           \
+            RYML_DEBUG_BREAK()                                          \
             c4::yml::error(msg ": check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \
         }                                                               \
     } while(0)
@@ -16811,9 +18546,9 @@ bool is_debugger_attached()
 #   define RYML_DEPRECATED(msg) [[deprecated(msg)]]
 #else
 #   if defined(_MSC_VER)
-#       define RYML_DEPRECATED(msg) __declspec(deprecated)
+#       define RYML_DEPRECATED(msg) __declspec(deprecated(msg))
 #   else // defined(__GNUC__) || defined(__clang__)
-#       define RYML_DEPRECATED(msg) __attribute__((deprecated))
+#       define RYML_DEPRECATED(msg) __attribute__((deprecated(msg)))
 #   endif
 #endif
 
@@ -16875,7 +18610,11 @@ struct RYML_EXPORT Location : public LineCol
 
 /** the type of the function used to report errors. This function must
  * interrupt execution, either by raising an exception or calling
- * std::abort(). */
+ * std::abort().
+ *
+ * @warning the error callback must never return: it must either abort
+ * or throw an exception. Otherwise, the parser will enter into an
+ * infinite loop, or the program may crash. */
 using pfn_error = void (*)(const char* msg, size_t msg_len, Location location, void *user_data);
 /** the type of the function used to allocate memory */
 using pfn_allocate = void* (*)(size_t len, void* hint, void *user_data);
@@ -16904,7 +18643,11 @@ inline void error(const char (&msg)[N])
 
 //-----------------------------------------------------------------------------
 
-/// a c-style callbacks class
+/** a c-style callbacks class
+ *
+ * @warning the error callback must never return: it must either abort
+ * or throw an exception. Otherwise, the parser will enter into an
+ * infinite loop, or the program may crash. */
 struct RYML_EXPORT Callbacks
 {
     void *       m_user_data;
@@ -16925,11 +18668,15 @@ struct RYML_EXPORT Callbacks
     }
 };
 
+/** set the global callbacks.
+ *
+ * @warning the error callback must never return: it must either abort
+ * or throw an exception. Otherwise, the parser will enter into an
+ * infinite loop, or the program may crash. */
+RYML_EXPORT void set_callbacks(Callbacks const& c);
 /// get the global callbacks
 RYML_EXPORT Callbacks const& get_callbacks();
-/// set the global callbacks
-RYML_EXPORT void set_callbacks(Callbacks const& c);
-/// set the global callbacks to their defaults
+/// set the global callbacks back to their defaults
 RYML_EXPORT void reset_callbacks();
 
 /// @cond dev
@@ -16937,7 +18684,7 @@ RYML_EXPORT void reset_callbacks();
 do                                                                      \
 {                                                                       \
     const char msg[] = msg_literal;                                     \
-    C4_DEBUG_BREAK();                                                   \
+    RYML_DEBUG_BREAK()                                                  \
     (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \
 } while(0)
 #define _RYML_CB_CHECK(cb, cond)                                        \
@@ -16946,7 +18693,7 @@ do                                                                      \
         if(!(cond))                                                     \
         {                                                               \
             const char msg[] = "check failed: " #cond;                  \
-            C4_DEBUG_BREAK();                                           \
+            RYML_DEBUG_BREAK()                                          \
             (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \
         }                                                               \
     } while(0)
@@ -17086,6 +18833,7 @@ struct NodeScalar;
 struct NodeInit;
 struct NodeData;
 class NodeRef;
+class ConstNodeRef;
 class Tree;
 
 
@@ -17225,6 +18973,8 @@ typedef enum : type_bits {
     DOCMAP  = DOC|MAP,
     DOCSEQ  = DOC|SEQ,
     DOCVAL  = DOC|VAL,
+    _KEYMASK = KEY | KEYQUO | KEYANCH | KEYREF | KEYTAG,
+    _VALMASK = VAL | VALQUO | VALANCH | VALREF | VALTAG,
     // these flags are from a work in progress and should not be used yet
     _WIP_STYLE_FLOW_SL = c4bit(14), ///< mark container with single-line flow format (seqs as '[val1,val2], maps as '{key: val, key2: val2}')
     _WIP_STYLE_FLOW_ML = c4bit(15), ///< mark container with multi-line flow format (seqs as '[val1,\nval2], maps as '{key: val,\nkey2: val2}')
@@ -17264,9 +19014,6 @@ public:
 
 public:
 
-    C4_ALWAYS_INLINE operator NodeType_e      & C4_RESTRICT ()       { return type; }
-    C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; }
-
     C4_ALWAYS_INLINE NodeType() : type(NOTYPE) {}
     C4_ALWAYS_INLINE NodeType(NodeType_e t) : type(t) {}
     C4_ALWAYS_INLINE NodeType(type_bits t) : type((NodeType_e)t) {}
@@ -17287,6 +19034,14 @@ public:
 
 public:
 
+    C4_ALWAYS_INLINE operator NodeType_e      & C4_RESTRICT ()       { return type; }
+    C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; }
+
+    C4_ALWAYS_INLINE bool operator== (NodeType_e t) const { return type == t; }
+    C4_ALWAYS_INLINE bool operator!= (NodeType_e t) const { return type != t; }
+
+public:
+
     #if defined(__clang__)
     #   pragma clang diagnostic push
     #   pragma clang diagnostic ignored "-Wnull-dereference"
@@ -17297,17 +19052,18 @@ public:
     #   endif
     #endif
 
+    C4_ALWAYS_INLINE bool is_notype() const { return type == NOTYPE; }
     C4_ALWAYS_INLINE bool is_stream() const { return ((type & STREAM) == STREAM) != 0; }
     C4_ALWAYS_INLINE bool is_doc() const { return (type & DOC) != 0; }
     C4_ALWAYS_INLINE bool is_container() const { return (type & (MAP|SEQ|STREAM)) != 0; }
     C4_ALWAYS_INLINE bool is_map() const { return (type & MAP) != 0; }
     C4_ALWAYS_INLINE bool is_seq() const { return (type & SEQ) != 0; }
-    C4_ALWAYS_INLINE bool has_val() const { return (type & VAL) != 0; }
     C4_ALWAYS_INLINE bool has_key() const { return (type & KEY) != 0; }
-    C4_ALWAYS_INLINE bool is_val() const { return (type & (KEYVAL)) == VAL; }
+    C4_ALWAYS_INLINE bool has_val() const { return (type & VAL) != 0; }
+    C4_ALWAYS_INLINE bool is_val() const { return (type & KEYVAL) == VAL; }
     C4_ALWAYS_INLINE bool is_keyval() const { return (type & KEYVAL) == KEYVAL; }
     C4_ALWAYS_INLINE bool has_key_tag() const { return (type & (KEY|KEYTAG)) == (KEY|KEYTAG); }
-    C4_ALWAYS_INLINE bool has_val_tag() const { return ((type & (VALTAG)) && (type & (VAL|MAP|SEQ))); }
+    C4_ALWAYS_INLINE bool has_val_tag() const { return ((type & VALTAG) && (type & (VAL|MAP|SEQ))); }
     C4_ALWAYS_INLINE bool has_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); }
     C4_ALWAYS_INLINE bool is_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); }
     C4_ALWAYS_INLINE bool has_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; }
@@ -17525,14 +19281,10 @@ public:
 
     inline bool   empty() const { return m_size == 0; }
 
-    inline size_t size () const { return m_size; }
+    inline size_t size() const { return m_size; }
     inline size_t capacity() const { return m_cap; }
     inline size_t slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; }
 
-    inline size_t arena_size() const { return m_arena_pos; }
-    inline size_t arena_capacity() const { return m_arena.len; }
-    inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; }
-
     Callbacks const& callbacks() const { return m_callbacks; }
     void callbacks(Callbacks const& cb) { m_callbacks = cb; }
 
@@ -17587,35 +19339,43 @@ public:
     size_t root_id() const {                                 RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; }
 
     //! Get a NodeRef of a node by id
-    NodeRef       ref(size_t id);
+    NodeRef      ref(size_t id);
+    //! Get a NodeRef of a node by id
+    ConstNodeRef ref(size_t id) const;
     //! Get a NodeRef of a node by id
-    NodeRef const ref(size_t id) const;
+    ConstNodeRef cref(size_t id);
+    //! Get a NodeRef of a node by id
+    ConstNodeRef cref(size_t id) const;
 
     //! Get the root as a NodeRef
-    NodeRef       rootref();
+    NodeRef      rootref();
+    //! Get the root as a NodeRef
+    ConstNodeRef rootref() const;
     //! Get the root as a NodeRef
-    NodeRef const rootref() const;
+    ConstNodeRef crootref();
+    //! Get the root as a NodeRef
+    ConstNodeRef crootref() const;
 
     //! find a root child by name, return it as a NodeRef
     //! @note requires the root to be a map.
-    NodeRef       operator[] (csubstr key);
+    NodeRef      operator[] (csubstr key);
     //! find a root child by name, return it as a NodeRef
     //! @note requires the root to be a map.
-    NodeRef const operator[] (csubstr key) const;
+    ConstNodeRef operator[] (csubstr key) const;
 
     //! find a root child by index: return the root node's @p i-th child as a NodeRef
     //! @note @i is NOT the node id, but the child's position
-    NodeRef       operator[] (size_t i);
+    NodeRef      operator[] (size_t i);
     //! find a root child by index: return the root node's @p i-th child as a NodeRef
     //! @note @i is NOT the node id, but the child's position
-    NodeRef const operator[] (size_t i) const;
+    ConstNodeRef operator[] (size_t i) const;
 
     //! get the i-th document of the stream
     //! @note @i is NOT the node id, but the doc position within the stream
-    NodeRef       docref(size_t i);
+    NodeRef      docref(size_t i);
     //! get the i-th document of the stream
     //! @note @i is NOT the node id, but the doc position within the stream
-    NodeRef const docref(size_t i) const;
+    ConstNodeRef docref(size_t i) const;
 
     /** @} */
 
@@ -17639,14 +19399,11 @@ public:
     csubstr    const& val_anchor(size_t node) const { RYML_ASSERT( ! is_val_ref(node) && has_val_anchor(node)); return _p(node)->m_val.anchor; }
     NodeScalar const& valsc     (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val; }
 
-    bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); if(is_key_quoted(node)) return false; csubstr s = _p(node)->m_key.scalar; return s == nullptr || s == "~" || s == "null" || s == "Null" || s == "NULL"; }
-    bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); if(is_val_quoted(node)) return false; csubstr s = _p(node)->m_val.scalar; return s == nullptr || s == "~" || s == "null" || s == "Null" || s == "NULL"; }
-
     /** @} */
 
 public:
 
-    /** @name node type predicates */
+    /** @name node predicates */
     /** @{ */
 
     C4_ALWAYS_INLINE bool is_stream(size_t node) const { return _p(node)->m_type.is_stream(); }
@@ -17678,9 +19435,20 @@ public:
     C4_ALWAYS_INLINE bool parent_is_map(size_t node) const { RYML_ASSERT(has_parent(node)); return is_map(_p(node)->m_parent); }
 
     /** true when key and val are empty, and has no children */
-    bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); }
+    C4_ALWAYS_INLINE bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); }
     /** true when the node has an anchor named a */
-    bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; }
+    C4_ALWAYS_INLINE bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; }
+
+    C4_ALWAYS_INLINE bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && _is_null(n->m_key.scalar); }
+    C4_ALWAYS_INLINE bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && _is_null(n->m_val.scalar); }
+    static bool _is_null(csubstr s) noexcept
+    {
+        return s.str == nullptr ||
+            s == "~" ||
+            s == "null" ||
+            s == "Null" ||
+            s == "NULL";
+    }
 
     /** @} */
 
@@ -17693,16 +19461,30 @@ public:
 
     bool has_parent(size_t node) const { return _p(node)->m_parent != NONE; }
 
+    /** true if @p node has a child with id @p ch */
+    bool has_child(size_t node, size_t ch) const { return _p(ch)->m_parent == node; }
+    /** true if @p node has a child with key @p key */
     bool has_child(size_t node, csubstr key) const { return find_child(node, key) != npos; }
-    bool has_child(size_t node, size_t ch) const { return child_pos(node, ch) != npos; }
+    /** true if @p node has any children key */
     bool has_children(size_t node) const { return _p(node)->m_first_child != NONE; }
 
-    bool has_sibling(size_t node, size_t sib) const { return is_root(node) ? sib==node : child_pos(_p(node)->m_parent, sib) != npos; }
+    /** true if @p node has a sibling with id @p sib */
+    bool has_sibling(size_t node, size_t sib) const { return _p(node)->m_parent == _p(sib)->m_parent; }
+    /** true if one of the node's siblings has the given key */
     bool has_sibling(size_t node, csubstr key) const { return find_sibling(node, key) != npos; }
-    /** counts with *this */
-    bool has_siblings(size_t /*node*/) const { return true; }
-    /** does not count with *this */
-    bool has_other_siblings(size_t node) const { return is_root(node) ? false : (_p(_p(node)->m_parent)->m_first_child != _p(_p(node)->m_parent)->m_last_child); }
+    /** true if node is not a single child */
+    bool has_other_siblings(size_t node) const
+    {
+        NodeData const *n = _p(node);
+        if(C4_LIKELY(n->m_parent != NONE))
+        {
+            n = _p(n->m_parent);
+            return n->m_first_child != n->m_last_child;
+        }
+        return false;
+    }
+
+    RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(size_t /*node*/) const { return true; }
 
     /** @} */
 
@@ -17843,20 +19625,22 @@ public:
     /** @name modifying hierarchy */
     /** @{ */
 
-    /** create and insert a new child of "parent". insert after the (to-be)
-     * sibling "after", which must be a child of "parent". To insert as the
+    /** create and insert a new child of @p parent. insert after the (to-be)
+     * sibling @p after, which must be a child of @p parent. To insert as the
      * first child, set after to NONE */
-    inline size_t insert_child(size_t parent, size_t after)
+    C4_ALWAYS_INLINE size_t insert_child(size_t parent, size_t after)
     {
         RYML_ASSERT(parent != NONE);
         RYML_ASSERT(is_container(parent) || is_root(parent));
-        RYML_ASSERT(after == NONE || has_child(parent, after));
+        RYML_ASSERT(after == NONE || (_p(after)->m_parent == parent));
         size_t child = _claim();
         _set_hierarchy(child, parent, after);
         return child;
     }
-    inline size_t prepend_child(size_t parent) { return insert_child(parent, NONE); }
-    inline size_t  append_child(size_t parent) { return insert_child(parent, last_child(parent)); }
+    /** create and insert a node as the first child of @p parent */
+    C4_ALWAYS_INLINE size_t prepend_child(size_t parent) { return insert_child(parent, NONE); }
+    /** create and insert a node as the last child of @p parent */
+    C4_ALWAYS_INLINE size_t  append_child(size_t parent) { return insert_child(parent, _p(parent)->m_last_child); }
 
 public:
 
@@ -17871,17 +19655,13 @@ public:
     #endif
 
     //! create and insert a new sibling of n. insert after "after"
-    inline size_t insert_sibling(size_t node, size_t after)
+    C4_ALWAYS_INLINE size_t insert_sibling(size_t node, size_t after)
     {
-        RYML_ASSERT(node != NONE);
-        RYML_ASSERT( ! is_root(node));
-        RYML_ASSERT(parent(node) != NONE);
-        RYML_ASSERT(after == NONE || (has_sibling(node, after) && has_sibling(after, node)));
-        RYML_ASSERT(get(node) != nullptr);
-        return insert_child(get(node)->m_parent, after);
+        return insert_child(_p(node)->m_parent, after);
     }
-    inline size_t prepend_sibling(size_t node) { return insert_sibling(node, NONE); }
-    inline size_t  append_sibling(size_t node) { return insert_sibling(node, last_sibling(node)); }
+    /** create and insert a node as the first node of @p parent */
+    C4_ALWAYS_INLINE size_t prepend_sibling(size_t node) { return prepend_child(_p(node)->m_parent); }
+    C4_ALWAYS_INLINE size_t  append_sibling(size_t node) { return append_child(_p(node)->m_parent); }
 
 public:
 
@@ -17994,7 +19774,13 @@ public:
     /** @{ */
 
     /** get the current size of the tree's internal arena */
-    size_t arena_pos() const { return m_arena_pos; }
+    RYML_DEPRECATED("use arena_size() instead") size_t arena_pos() const { return m_arena_pos; }
+    /** get the current size of the tree's internal arena */
+    inline size_t arena_size() const { return m_arena_pos; }
+    /** get the current capacity of the tree's internal arena */
+    inline size_t arena_capacity() const { return m_arena.len; }
+    /** get the current slack of the tree's internal arena */
+    inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; }
 
     /** get the current arena */
     substr arena() const { return m_arena.first(m_arena_pos); }
@@ -18005,51 +19791,117 @@ public:
         return m_arena.is_super(s);
     }
 
-    /** serialize the given non-floating-point variable to the tree's arena, growing it as
-     * needed to accomodate the serialization.
+    /** serialize the given floating-point variable to the tree's
+     * arena, growing it as needed to accomodate the serialization.
+     *
      * @note Growing the arena may cause relocation of the entire
-     * existing arena, and thus change the contents of individual nodes.
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena()
+     *
      * @see alloc_arena() */
     template<class T>
-    typename std::enable_if<!std::is_floating_point<T>::value, csubstr>::type
+    typename std::enable_if<std::is_floating_point<T>::value, csubstr>::type
     to_arena(T const& C4_RESTRICT a)
     {
         substr rem(m_arena.sub(m_arena_pos));
-        size_t num = to_chars(rem, a);
+        size_t num = to_chars_float(rem, a);
         if(num > rem.len)
         {
             rem = _grow_arena(num);
-            num = to_chars(rem, a);
+            num = to_chars_float(rem, a);
             RYML_ASSERT(num <= rem.len);
         }
         rem = _request_span(num);
         return rem;
     }
 
-    /** serialize the given floating-point variable to the tree's arena, growing it as
-     * needed to accomodate the serialization.
+    /** serialize the given non-floating-point variable to the tree's
+     * arena, growing it as needed to accomodate the serialization.
+     *
      * @note Growing the arena may cause relocation of the entire
-     * existing arena, and thus change the contents of individual nodes.
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena()
+     *
      * @see alloc_arena() */
     template<class T>
-    typename std::enable_if<std::is_floating_point<T>::value, csubstr>::type
+    typename std::enable_if<!std::is_floating_point<T>::value, csubstr>::type
     to_arena(T const& C4_RESTRICT a)
     {
         substr rem(m_arena.sub(m_arena_pos));
-        size_t num = to_chars_float(rem, a);
+        size_t num = to_chars(rem, a);
         if(num > rem.len)
         {
             rem = _grow_arena(num);
-            num = to_chars_float(rem, a);
+            num = to_chars(rem, a);
             RYML_ASSERT(num <= rem.len);
         }
         rem = _request_span(num);
         return rem;
     }
 
-    /** copy the given substr to the tree's arena, growing it by the required size
+    /** serialize the given csubstr to the tree's arena, growing the
+     * arena as needed to accomodate the serialization.
+     *
      * @note Growing the arena may cause relocation of the entire
-     * existing arena, and thus change the contents of individual nodes.
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena()
+     *
+     * @see alloc_arena() */
+    csubstr to_arena(csubstr a)
+    {
+        if(a.len > 0)
+        {
+            substr rem(m_arena.sub(m_arena_pos));
+            size_t num = to_chars(rem, a);
+            if(num > rem.len)
+            {
+                rem = _grow_arena(num);
+                num = to_chars(rem, a);
+                RYML_ASSERT(num <= rem.len);
+            }
+            return _request_span(num);
+        }
+        else
+        {
+            if(a.str == nullptr)
+            {
+                return csubstr{};
+            }
+            else if(m_arena.str == nullptr)
+            {
+                // Arena is empty and we want to store a non-null
+                // zero-length string.
+                // Even though the string has zero length, we need
+                // some "memory" to store a non-nullptr string
+                _grow_arena(1);
+            }
+            return _request_span(0);
+        }
+    }
+    C4_ALWAYS_INLINE csubstr to_arena(const char *s)
+    {
+        return to_arena(to_csubstr(s));
+    }
+    C4_ALWAYS_INLINE csubstr to_arena(std::nullptr_t)
+    {
+        return csubstr{};
+    }
+
+    /** copy the given substr to the tree's arena, growing it by the
+     * required size
+     *
+     * @note Growing the arena may cause relocation of the entire
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena()
+     *
      * @see alloc_arena() */
     substr copy_to_arena(csubstr s)
     {
@@ -18061,7 +19913,8 @@ public:
         C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow=") // no need for terminating \0
         C4_SUPPRESS_WARNING_GCC( "-Wrestrict") // there's an assert to ensure no violation of restrict behavior
         #endif
-        memcpy(cp.str, s.str, s.len);
+        if(s.len)
+            memcpy(cp.str, s.str, s.len);
         #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10)
         C4_SUPPRESS_WARNING_GCC_POP
         #endif
@@ -18070,8 +19923,14 @@ public:
 
     /** grow the tree's string arena by the given size and return a substr
      * of the added portion
+     *
      * @note Growing the arena may cause relocation of the entire
-     * existing arena, and thus change the contents of individual nodes. */
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena().
+     *
+     * @see reserve_arena() */
     substr alloc_arena(size_t sz)
     {
         if(sz > arena_slack())
@@ -18081,7 +19940,8 @@ public:
     }
 
     /** ensure the tree's internal string arena is at least the given capacity
-     * @note Growing the arena may cause relocation of the entire
+     * @note This operation has a potential complexity of O(numNodes)+O(arenasize).
+     * Growing the arena may cause relocation of the entire
      * existing arena, and thus change the contents of individual nodes. */
     void reserve_arena(size_t arena_cap)
     {
@@ -18106,7 +19966,7 @@ private:
 
     substr _grow_arena(size_t more)
     {
-        size_t cap = m_arena_pos + more;
+        size_t cap = m_arena.len + more;
         cap = cap < 2 * m_arena.len ? 2 * m_arena.len : cap;
         cap = cap < 64 ? 64 : cap;
         reserve_arena(cap);
@@ -18341,21 +20201,14 @@ public:
     void _swap_hierarchy(size_t n_, size_t m_);
     void _copy_hierarchy(size_t dst_, size_t src_);
 
-    void _copy_props(size_t dst_, size_t src_)
+    inline void _copy_props(size_t dst_, size_t src_)
     {
-        auto      & C4_RESTRICT dst = *_p(dst_);
-        auto const& C4_RESTRICT src = *_p(src_);
-        dst.m_type = src.m_type;
-        dst.m_key  = src.m_key;
-        dst.m_val  = src.m_val;
+        _copy_props(dst_, this, src_);
     }
 
-    void _copy_props_wo_key(size_t dst_, size_t src_)
+    inline void _copy_props_wo_key(size_t dst_, size_t src_)
     {
-        auto      & C4_RESTRICT dst = *_p(dst_);
-        auto const& C4_RESTRICT src = *_p(src_);
-        dst.m_type = src.m_type;
-        dst.m_val  = src.m_val;
+        _copy_props_wo_key(dst_, this, src_);
     }
 
     void _copy_props(size_t dst_, Tree const* that_tree, size_t src_)
@@ -18371,7 +20224,7 @@ public:
     {
         auto      & C4_RESTRICT dst = *_p(dst_);
         auto const& C4_RESTRICT src = *that_tree->_p(src_);
-        dst.m_type = src.m_type;
+        dst.m_type = (src.m_type & ~_KEYMASK) | (dst.m_type & _KEYMASK);
         dst.m_val  = src.m_val;
     }
 
@@ -18399,7 +20252,7 @@ public:
 
     inline void _clear_val(size_t node)
     {
-        _p(node)->m_key.clear();
+        _p(node)->m_val.clear();
         _rem_flags(node, VAL);
     }
 
@@ -18520,213 +20373,705 @@ read(NodeRef const& n, T *v);
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-/** a reference to a node in an existing yaml tree, offering a more
- * convenient API than the index-based API used in the tree. */
-class RYML_EXPORT NodeRef
+// forward decls
+class NodeRef;
+class ConstNodeRef;
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+namespace detail {
+
+template<class NodeRefType>
+struct child_iterator
 {
-private:
+    using value_type = NodeRefType;
+    using tree_type = typename NodeRefType::tree_type;
 
-    // require valid: a helper macro, undefined at the end
-    #define _C4RV() RYML_ASSERT(valid() && !is_seed())
+    tree_type * C4_RESTRICT m_tree;
+    size_t m_child_id;
 
-    Tree *C4_RESTRICT m_tree;
-    size_t m_id;
+    child_iterator(tree_type * t, size_t id) : m_tree(t), m_child_id(id) {}
 
-    /** This member is used to enable lazy operator[] writing. When a child
-     * with a key or index is not found, m_id is set to the id of the parent
-     * and the asked-for key or index are stored in this member until a write
-     * does happen. Then it is given as key or index for creating the child.
-     * When a key is used, the csubstr stores it (so the csubstr's string is
-     * non-null and the csubstr's size is different from NONE). When an index is
-     * used instead, the csubstr's string is set to null, and only the csubstr's
-     * size is set to a value different from NONE. Otherwise, when operator[]
-     * does find the child then this member is empty: the string is null and
-     * the size is NONE. */
-    csubstr m_seed;
+    child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; }
+    child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; }
+
+    NodeRefType operator*  () const { return NodeRefType(m_tree, m_child_id); }
+    NodeRefType operator-> () const { return NodeRefType(m_tree, m_child_id); }
+
+    bool operator!= (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id != that.m_child_id; }
+    bool operator== (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id == that.m_child_id; }
+};
+
+template<class NodeRefType>
+struct children_view_
+{
+    using n_iterator = child_iterator<NodeRefType>;
+
+    n_iterator b, e;
+
+    inline children_view_(n_iterator const& C4_RESTRICT b_,
+                          n_iterator const& C4_RESTRICT e_) : b(b_), e(e_) {}
+
+    inline n_iterator begin() const { return b; }
+    inline n_iterator end  () const { return e; }
+};
+
+template<class NodeRefType, class Visitor>
+bool _visit(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false)
+{
+    size_t increment = 0;
+    if( ! (node.is_root() && skip_root))
+    {
+        if(fn(node, indentation_level))
+            return true;
+        ++increment;
+    }
+    if(node.has_children())
+    {
+        for(auto ch : node.children())
+        {
+            if(_visit(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root
+            {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+template<class NodeRefType, class Visitor>
+bool _visit_stacked(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false)
+{
+    size_t increment = 0;
+    if( ! (node.is_root() && skip_root))
+    {
+        if(fn(node, indentation_level))
+        {
+            return true;
+        }
+        ++increment;
+    }
+    if(node.has_children())
+    {
+        fn.push(node, indentation_level);
+        for(auto ch : node.children())
+        {
+            if(_visit_stacked(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root
+            {
+                fn.pop(node, indentation_level);
+                return true;
+            }
+        }
+        fn.pop(node, indentation_level);
+    }
+    return false;
+}
+
+
+//-----------------------------------------------------------------------------
+
+/** a CRTP base for read-only node methods */
+template<class Impl, class ConstImpl>
+struct RoNodeMethods
+{
+    C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wcast-align")
+    // helper CRTP macros, undefined at the end
+    #define tree_ ((ConstImpl const* C4_RESTRICT)this)->m_tree
+    #define id_ ((ConstImpl const* C4_RESTRICT)this)->m_id
+    #define tree__ ((Impl const* C4_RESTRICT)this)->m_tree
+    #define id__ ((Impl const* C4_RESTRICT)this)->m_id
+    // require valid
+    #define _C4RV()                                       \
+        RYML_ASSERT(tree_ != nullptr);                    \
+        _RYML_CB_ASSERT(tree_->m_callbacks, id_ != NONE)
+    #define _C4_IF_MUTABLE(ty) typename std::enable_if<!std::is_same<U, ConstImpl>::value, ty>::type
 
 public:
 
-    /** @name node construction */
+    /** @name node property getters */
     /** @{ */
 
-    NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); }
-    NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); }
-    NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); }
-    NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); }
-    NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; }
-    NodeRef(Tree *t, size_t id, csubstr  seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {}
-    NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {}
+    /** returns the data or null when the id is NONE */
+    C4_ALWAYS_INLINE C4_PURE NodeData const* get() const noexcept { RYML_ASSERT(tree_ != nullptr); return tree_->get(id_); }
+    /** returns the data or null when the id is NONE */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto get() noexcept -> _C4_IF_MUTABLE(NodeData*) { RYML_ASSERT(tree_ != nullptr); return tree__->get(id__); }
 
-    NodeRef(NodeRef const&) = default;
-    NodeRef(NodeRef     &&) = default;
+    C4_ALWAYS_INLINE C4_PURE NodeType    type() const noexcept { _C4RV(); return tree_->type(id_); }
+    C4_ALWAYS_INLINE C4_PURE const char* type_str() const noexcept { return tree_->type_str(id_); }
 
-    NodeRef& operator= (NodeRef const&) = default;
-    NodeRef& operator= (NodeRef     &&) = default;
+    C4_ALWAYS_INLINE C4_PURE csubstr key()        const noexcept { _C4RV(); return tree_->key(id_); }
+    C4_ALWAYS_INLINE C4_PURE csubstr key_tag()    const noexcept { _C4RV(); return tree_->key_tag(id_); }
+    C4_ALWAYS_INLINE C4_PURE csubstr key_ref()    const noexcept { _C4RV(); return tree_->key_ref(id_); }
+    C4_ALWAYS_INLINE C4_PURE csubstr key_anchor() const noexcept { _C4RV(); return tree_->key_anchor(id_); }
+
+    C4_ALWAYS_INLINE C4_PURE csubstr val()        const noexcept { _C4RV(); return tree_->val(id_); }
+    C4_ALWAYS_INLINE C4_PURE csubstr val_tag()    const noexcept { _C4RV(); return tree_->val_tag(id_); }
+    C4_ALWAYS_INLINE C4_PURE csubstr val_ref()    const noexcept { _C4RV(); return tree_->val_ref(id_); }
+    C4_ALWAYS_INLINE C4_PURE csubstr val_anchor() const noexcept { _C4RV(); return tree_->val_anchor(id_); }
+
+    C4_ALWAYS_INLINE C4_PURE NodeScalar const& keysc() const noexcept { _C4RV(); return tree_->keysc(id_); }
+    C4_ALWAYS_INLINE C4_PURE NodeScalar const& valsc() const noexcept { _C4RV(); return tree_->valsc(id_); }
+
+    C4_ALWAYS_INLINE C4_PURE bool key_is_null() const noexcept { _C4RV(); return tree_->key_is_null(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool val_is_null() const noexcept { _C4RV(); return tree_->val_is_null(id_); }
 
     /** @} */
 
 public:
 
-    inline Tree      * tree()       { return m_tree; }
-    inline Tree const* tree() const { return m_tree; }
+    /** @name node property predicates */
+    /** @{ */
 
-    inline size_t id() const { return m_id; }
+    C4_ALWAYS_INLINE C4_PURE bool empty()            const noexcept { _C4RV(); return tree_->empty(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_stream()        const noexcept { _C4RV(); return tree_->is_stream(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_doc()           const noexcept { _C4RV(); return tree_->is_doc(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_container()     const noexcept { _C4RV(); return tree_->is_container(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_map()           const noexcept { _C4RV(); return tree_->is_map(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_seq()           const noexcept { _C4RV(); return tree_->is_seq(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool has_val()          const noexcept { _C4RV(); return tree_->has_val(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool has_key()          const noexcept { _C4RV(); return tree_->has_key(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_val()           const noexcept { _C4RV(); return tree_->is_val(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_keyval()        const noexcept { _C4RV(); return tree_->is_keyval(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool has_key_tag()      const noexcept { _C4RV(); return tree_->has_key_tag(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool has_val_tag()      const noexcept { _C4RV(); return tree_->has_val_tag(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool has_key_anchor()   const noexcept { _C4RV(); return tree_->has_key_anchor(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_key_anchor()    const noexcept { _C4RV(); return tree_->is_key_anchor(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool has_val_anchor()   const noexcept { _C4RV(); return tree_->has_val_anchor(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_val_anchor()    const noexcept { _C4RV(); return tree_->is_val_anchor(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool has_anchor()       const noexcept { _C4RV(); return tree_->has_anchor(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_anchor()        const noexcept { _C4RV(); return tree_->is_anchor(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_key_ref()       const noexcept { _C4RV(); return tree_->is_key_ref(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_val_ref()       const noexcept { _C4RV(); return tree_->is_val_ref(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_ref()           const noexcept { _C4RV(); return tree_->is_ref(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_anchor_or_ref() const noexcept { _C4RV(); return tree_->is_anchor_or_ref(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_key_quoted()    const noexcept { _C4RV(); return tree_->is_key_quoted(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_val_quoted()    const noexcept { _C4RV(); return tree_->is_val_quoted(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool is_quoted()        const noexcept { _C4RV(); return tree_->is_quoted(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool parent_is_seq()    const noexcept { _C4RV(); return tree_->parent_is_seq(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool parent_is_map()    const noexcept { _C4RV(); return tree_->parent_is_map(id_); }
 
-    inline NodeData      * get()       { return m_tree->get(m_id); }
-    inline NodeData const* get() const { return m_tree->get(m_id); }
+    /** @} */
 
-    inline bool operator== (NodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid() && !that.is_seed()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; }
-    inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); }
+public:
 
-    inline bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); }
-    inline bool operator!= (std::nullptr_t) const { return ! this->operator== (nullptr); }
+    /** @name hierarchy predicates */
+    /** @{ */
 
-    inline bool operator== (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; }
-    inline bool operator!= (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; }
+    C4_ALWAYS_INLINE C4_PURE bool is_root()    const noexcept { _C4RV(); return tree_->is_root(id_); }
+    C4_ALWAYS_INLINE C4_PURE bool has_parent() const noexcept { _C4RV(); return tree_->has_parent(id_); }
 
-    //inline operator bool () const { return m_tree == nullptr || m_id == NONE || is_seed(); }
+    C4_ALWAYS_INLINE C4_PURE bool has_child(ConstImpl const& ch) const noexcept { _C4RV(); return tree_->has_child(id_, ch.m_id); }
+    C4_ALWAYS_INLINE C4_PURE bool has_child(csubstr name) const noexcept { _C4RV(); return tree_->has_child(id_, name); }
+    C4_ALWAYS_INLINE C4_PURE bool has_children() const noexcept { _C4RV(); return tree_->has_children(id_); }
+
+    C4_ALWAYS_INLINE C4_PURE bool has_sibling(ConstImpl const& n) const noexcept { _C4RV(); return tree_->has_sibling(id_, n.m_id); }
+    C4_ALWAYS_INLINE C4_PURE bool has_sibling(csubstr name) const noexcept { _C4RV(); return tree_->has_sibling(id_, name); }
+    /** counts with this */
+    C4_ALWAYS_INLINE C4_PURE bool has_siblings() const noexcept { _C4RV(); return tree_->has_siblings(id_); }
+    /** does not count with this */
+    C4_ALWAYS_INLINE C4_PURE bool has_other_siblings() const noexcept { _C4RV(); return tree_->has_other_siblings(id_); }
+
+    /** @} */
 
 public:
 
-    inline bool valid() const { return m_tree != nullptr && m_id != NONE; }
-    inline bool is_seed() const { return m_seed.str != nullptr || m_seed.len != NONE; }
+    /** @name hierarchy getters */
+    /** @{ */
 
-    inline void _clear_seed() { /*do this manually or an assert is triggered*/ m_seed.str = nullptr; m_seed.len = NONE; }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto doc(size_t num) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->doc(num)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl doc(size_t num) const noexcept { _C4RV(); return {tree_, tree_->doc(num)}; }
+
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto parent() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->parent(id__)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl parent() const noexcept { _C4RV(); return {tree_, tree_->parent(id_)}; }
+
+
+    /** O(#num_children) */
+    C4_ALWAYS_INLINE C4_PURE size_t child_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(id_, n.m_id); }
+    C4_ALWAYS_INLINE C4_PURE size_t num_children() const noexcept { _C4RV(); return tree_->num_children(id_); }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto first_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_child(id__)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl first_child() const noexcept { _C4RV(); return {tree_, tree_->first_child(id_)}; }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto last_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_child(id__)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl last_child () const noexcept { _C4RV(); return {tree_, tree_->last_child (id_)}; }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto child(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->child(id__, pos)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl child(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->child(id_, pos)}; }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto find_child(csubstr name)  noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_child(id__, name)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl find_child(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_child(id_, name)}; }
+
+
+    /** O(#num_siblings) */
+    C4_ALWAYS_INLINE C4_PURE size_t num_siblings() const noexcept { _C4RV(); return tree_->num_siblings(id_); }
+    C4_ALWAYS_INLINE C4_PURE size_t num_other_siblings() const noexcept { _C4RV(); return tree_->num_other_siblings(id_); }
+    C4_ALWAYS_INLINE C4_PURE size_t sibling_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(tree_->parent(id_), n.m_id); }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto prev_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->prev_sibling(id__)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl prev_sibling() const noexcept { _C4RV(); return {tree_, tree_->prev_sibling(id_)}; }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto next_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->next_sibling(id__)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl next_sibling() const noexcept { _C4RV(); return {tree_, tree_->next_sibling(id_)}; }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto first_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_sibling(id__)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl first_sibling() const noexcept { _C4RV(); return {tree_, tree_->first_sibling(id_)}; }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto last_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_sibling(id__)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl last_sibling () const noexcept { _C4RV(); return {tree_, tree_->last_sibling(id_)}; }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto sibling(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->sibling(id__, pos)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl sibling(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->sibling(id_, pos)}; }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto find_sibling(csubstr name) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_sibling(id__, name)}; }
+    C4_ALWAYS_INLINE C4_PURE ConstImpl find_sibling(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_sibling(id_, name)}; }
+
+
+    /** O(num_children) */
+    C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (csubstr k) const noexcept
+    {
+        _C4RV();
+        size_t ch = tree_->find_child(id_, k);
+        _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE);
+        return {tree_, ch};
+    }
+    /** Find child by key. O(num_children). returns a seed node if no such child is found.  */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto operator[] (csubstr k) noexcept -> _C4_IF_MUTABLE(Impl)
+    {
+        _C4RV();
+        size_t ch = tree__->find_child(id__, k);
+        return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, k);
+    }
+
+    /** O(num_children) */
+    C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (size_t pos) const noexcept
+    {
+        _C4RV();
+        size_t ch = tree_->child(id_, pos);
+        _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE);
+        return {tree_, ch};
+    }
+
+    /** Find child by position. O(pos). returns a seed node if no such child is found.  */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto operator[] (size_t pos) noexcept -> _C4_IF_MUTABLE(Impl)
+    {
+        _C4RV();
+        size_t ch = tree__->child(id__, pos);
+        return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, pos);
+    }
+
+    /** @} */
 
 public:
 
-    /** @name node property getters */
+    /** deserialization */
     /** @{ */
 
-    inline NodeType     type() const { _C4RV(); return m_tree->type(m_id); }
-    inline const char*  type_str() const { _C4RV(); RYML_ASSERT(valid() && ! is_seed()); return m_tree->type_str(m_id); }
+    template<class T>
+    ConstImpl const& operator>> (T &v) const
+    {
+        _C4RV();
+        if( ! read((ConstImpl const&)*this, &v))
+            _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize value");
+        return *((ConstImpl const*)this);
+    }
 
-    inline csubstr    key()        const { _C4RV(); return m_tree->key(m_id); }
-    inline csubstr    key_tag()    const { _C4RV(); return m_tree->key_tag(m_id); }
-    inline csubstr    key_ref()    const { _C4RV(); return m_tree->key_ref(m_id); }
-    inline csubstr    key_anchor() const { _C4RV(); return m_tree->key_anchor(m_id); }
-    inline NodeScalar keysc()      const { _C4RV(); return m_tree->keysc(m_id); }
+    /** deserialize the node's key to the given variable */
+    template<class T>
+    ConstImpl const& operator>> (Key<T> v) const
+    {
+        _C4RV();
+        if( ! from_chars(key(), &v.k))
+            _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key");
+        return *((ConstImpl const*)this);
+    }
 
-    inline csubstr    val()        const { _C4RV(); return m_tree->val(m_id); }
-    inline csubstr    val_tag()    const { _C4RV(); return m_tree->val_tag(m_id); }
-    inline csubstr    val_ref()    const { _C4RV(); return m_tree->val_ref(m_id); }
-    inline csubstr    val_anchor() const { _C4RV(); return m_tree->val_anchor(m_id); }
-    inline NodeScalar valsc()      const { _C4RV(); return m_tree->valsc(m_id); }
+    /** deserialize the node's key as base64 */
+    ConstImpl const& operator>> (Key<fmt::base64_wrapper> w) const
+    {
+        deserialize_key(w.wrapper);
+        return *((ConstImpl const*)this);
+    }
 
-    inline bool key_is_null() const { _C4RV(); return m_tree->key_is_null(m_id); }
-    inline bool val_is_null() const { _C4RV(); return m_tree->val_is_null(m_id); }
+    /** deserialize the node's val as base64 */
+    ConstImpl const& operator>> (fmt::base64_wrapper w) const
+    {
+        deserialize_val(w);
+        return *((ConstImpl const*)this);
+    }
 
-    /** decode the base64-encoded key deserialize and assign the
+    /** decode the base64-encoded key and assign the
      * decoded blob to the given buffer/
      * @return the size of base64-decoded blob */
-    size_t deserialize_key(fmt::base64_wrapper v) const;
-    /** decode the base64-encoded key deserialize and assign the
+    size_t deserialize_key(fmt::base64_wrapper v) const
+    {
+        _C4RV();
+        return from_chars(key(), &v);
+    }
+    /** decode the base64-encoded key and assign the
      * decoded blob to the given buffer/
      * @return the size of base64-decoded blob */
-    size_t deserialize_val(fmt::base64_wrapper v) const;
+    size_t deserialize_val(fmt::base64_wrapper v) const
+    {
+        _C4RV();
+        return from_chars(val(), &v);
+    };
+
+    template<class T>
+    bool get_if(csubstr name, T *var) const
+    {
+        auto ch = find_child(name);
+        if(!ch.valid())
+            return false;
+        ch >> *var;
+        return true;
+    }
+
+    template<class T>
+    bool get_if(csubstr name, T *var, T const& fallback) const
+    {
+        auto ch = find_child(name);
+        if(ch.valid())
+        {
+            ch >> *var;
+            return true;
+        }
+        else
+        {
+            *var = fallback;
+            return false;
+        }
+    }
 
     /** @} */
 
 public:
 
-    /** @name node property predicates */
+    #if defined(__clang__)
+    #   pragma clang diagnostic push
+    #   pragma clang diagnostic ignored "-Wnull-dereference"
+    #elif defined(__GNUC__)
+    #   pragma GCC diagnostic push
+    #   if __GNUC__ >= 6
+    #       pragma GCC diagnostic ignored "-Wnull-dereference"
+    #   endif
+    #endif
+
+    /** @name iteration */
+    /** @{ */
+
+    using iterator = detail::child_iterator<Impl>;
+    using const_iterator = detail::child_iterator<ConstImpl>;
+    using children_view = detail::children_view_<Impl>;
+    using const_children_view = detail::children_view_<ConstImpl>;
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto begin() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, tree__->first_child(id__)); }
+    C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); }
+    C4_ALWAYS_INLINE C4_PURE const_iterator cbegin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); }
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto end() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, NONE); }
+    C4_ALWAYS_INLINE C4_PURE const_iterator end() const noexcept { _C4RV(); return const_iterator(tree_, NONE); }
+    C4_ALWAYS_INLINE C4_PURE const_iterator cend() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); }
+
+    /** get an iterable view over children */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto children() noexcept -> _C4_IF_MUTABLE(children_view) { _C4RV(); return children_view(begin(), end()); }
+    /** get an iterable view over children */
+    C4_ALWAYS_INLINE C4_PURE const_children_view children() const noexcept { _C4RV(); return const_children_view(begin(), end()); }
+    /** get an iterable view over children */
+    C4_ALWAYS_INLINE C4_PURE const_children_view cchildren() const noexcept { _C4RV(); return const_children_view(begin(), end()); }
+
+    /** get an iterable view over all siblings (including the calling node) */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE C4_PURE auto siblings() noexcept -> _C4_IF_MUTABLE(children_view)
+    {
+        _C4RV();
+        NodeData const *nd = tree__->get(id__);
+        return (nd->m_parent != NONE) ? // does it have a parent?
+            children_view(iterator(tree__, tree_->get(nd->m_parent)->m_first_child), iterator(tree__, NONE))
+            :
+            children_view(end(), end());
+    }
+    /** get an iterable view over all siblings (including the calling node) */
+    C4_ALWAYS_INLINE C4_PURE const_children_view siblings() const noexcept
+    {
+        _C4RV();
+        NodeData const *nd = tree_->get(id_);
+        return (nd->m_parent != NONE) ? // does it have a parent?
+            const_children_view(const_iterator(tree_, tree_->get(nd->m_parent)->m_first_child), const_iterator(tree_, NONE))
+            :
+            const_children_view(end(), end());
+    }
+    /** get an iterable view over all siblings (including the calling node) */
+    C4_ALWAYS_INLINE C4_PURE const_children_view csiblings() const noexcept { return siblings(); }
+
+    /** visit every child node calling fn(node) */
+    template<class Visitor>
+    C4_ALWAYS_INLINE C4_PURE bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept
+    {
+        return detail::_visit(*(ConstImpl*)this, fn, indentation_level, skip_root);
+    }
+    /** visit every child node calling fn(node) */
+    template<class Visitor, class U=Impl>
+    auto visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept
+        -> _C4_IF_MUTABLE(bool)
+    {
+        return detail::_visit(*(Impl*)this, fn, indentation_level, skip_root);
+    }
+
+    /** visit every child node calling fn(node, level) */
+    template<class Visitor>
+    C4_ALWAYS_INLINE C4_PURE bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept
+    {
+        return detail::_visit_stacked(*(ConstImpl*)this, fn, indentation_level, skip_root);
+    }
+    /** visit every child node calling fn(node, level) */
+    template<class Visitor, class U=Impl>
+    auto visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept
+        -> _C4_IF_MUTABLE(bool)
+    {
+        return detail::_visit_stacked(*(Impl*)this, fn, indentation_level, skip_root);
+    }
+
+    /** @} */
+
+    #if defined(__clang__)
+    #   pragma clang diagnostic pop
+    #elif defined(__GNUC__)
+    #   pragma GCC diagnostic pop
+    #endif
+
+    #undef _C4_IF_MUTABLE
+    #undef _C4RV
+    #undef tree_
+    #undef tree__
+    #undef id_
+    #undef id__
+
+    C4_SUPPRESS_WARNING_GCC_CLANG_POP
+};
+
+} // namespace detail
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods<ConstNodeRef, ConstNodeRef>
+{
+public:
+
+    using tree_type = Tree const;
+
+public:
+
+    Tree const* C4_RESTRICT m_tree;
+    size_t m_id;
+
+    friend NodeRef;
+    friend struct detail::RoNodeMethods<ConstNodeRef, ConstNodeRef>;
+
+public:
+
+    /** @name construction */
     /** @{ */
 
-    C4_ALWAYS_INLINE bool is_stream()        const { _C4RV(); return m_tree->is_stream(m_id); }
-    C4_ALWAYS_INLINE bool is_doc()           const { _C4RV(); return m_tree->is_doc(m_id); }
-    C4_ALWAYS_INLINE bool is_container()     const { _C4RV(); return m_tree->is_container(m_id); }
-    C4_ALWAYS_INLINE bool is_map()           const { _C4RV(); return m_tree->is_map(m_id); }
-    C4_ALWAYS_INLINE bool is_seq()           const { _C4RV(); return m_tree->is_seq(m_id); }
-    C4_ALWAYS_INLINE bool has_val()          const { _C4RV(); return m_tree->has_val(m_id); }
-    C4_ALWAYS_INLINE bool has_key()          const { _C4RV(); return m_tree->has_key(m_id); }
-    C4_ALWAYS_INLINE bool is_val()           const { _C4RV(); return m_tree->is_val(m_id); }
-    C4_ALWAYS_INLINE bool is_keyval()        const { _C4RV(); return m_tree->is_keyval(m_id); }
-    C4_ALWAYS_INLINE bool has_key_tag()      const { _C4RV(); return m_tree->has_key_tag(m_id); }
-    C4_ALWAYS_INLINE bool has_val_tag()      const { _C4RV(); return m_tree->has_val_tag(m_id); }
-    C4_ALWAYS_INLINE bool has_key_anchor()   const { _C4RV(); return m_tree->has_key_anchor(m_id); }
-    C4_ALWAYS_INLINE bool is_key_anchor()    const { _C4RV(); return m_tree->is_key_anchor(m_id); }
-    C4_ALWAYS_INLINE bool has_val_anchor()   const { _C4RV(); return m_tree->has_val_anchor(m_id); }
-    C4_ALWAYS_INLINE bool is_val_anchor()    const { _C4RV(); return m_tree->is_val_anchor(m_id); }
-    C4_ALWAYS_INLINE bool has_anchor()       const { _C4RV(); return m_tree->has_anchor(m_id); }
-    C4_ALWAYS_INLINE bool is_anchor()        const { _C4RV(); return m_tree->is_anchor(m_id); }
-    C4_ALWAYS_INLINE bool is_key_ref()       const { _C4RV(); return m_tree->is_key_ref(m_id); }
-    C4_ALWAYS_INLINE bool is_val_ref()       const { _C4RV(); return m_tree->is_val_ref(m_id); }
-    C4_ALWAYS_INLINE bool is_ref()           const { _C4RV(); return m_tree->is_ref(m_id); }
-    C4_ALWAYS_INLINE bool is_anchor_or_ref() const { _C4RV(); return m_tree->is_anchor_or_ref(m_id); }
-    C4_ALWAYS_INLINE bool is_key_quoted()    const { _C4RV(); return m_tree->is_key_quoted(m_id); }
-    C4_ALWAYS_INLINE bool is_val_quoted()    const { _C4RV(); return m_tree->is_val_quoted(m_id); }
-    C4_ALWAYS_INLINE bool is_quoted()        const { _C4RV(); return m_tree->is_quoted(m_id); }
-
-    C4_ALWAYS_INLINE bool parent_is_seq()    const { _C4RV(); return m_tree->parent_is_seq(m_id); }
-    C4_ALWAYS_INLINE bool parent_is_map()    const { _C4RV(); return m_tree->parent_is_map(m_id); }
-
-    /** true when name and value are empty, and has no children */
-    C4_ALWAYS_INLINE bool empty() const { _C4RV(); return m_tree->empty(m_id); }
+    ConstNodeRef() : m_tree(nullptr), m_id(NONE) {}
+    ConstNodeRef(Tree const &t) : m_tree(&t), m_id(t .root_id()) {}
+    ConstNodeRef(Tree const *t) : m_tree(t ), m_id(t->root_id()) {}
+    ConstNodeRef(Tree const *t, size_t id) : m_tree(t), m_id(id) {}
+    ConstNodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE) {}
+
+    ConstNodeRef(ConstNodeRef const&) = default;
+    ConstNodeRef(ConstNodeRef     &&) = default;
+
+    ConstNodeRef(NodeRef const&);
+    ConstNodeRef(NodeRef     &&);
 
     /** @} */
 
 public:
 
-    /** @name hierarchy predicates */
+    /** @name assignment */
     /** @{ */
 
-    inline bool is_root()    const { _C4RV(); return m_tree->is_root(m_id); }
-    inline bool has_parent() const { _C4RV(); return m_tree->has_parent(m_id); }
+    ConstNodeRef& operator= (std::nullptr_t) { m_tree = nullptr; m_id = NONE; return *this; }
 
-    inline bool has_child(NodeRef const& ch) const { _C4RV(); return m_tree->has_child(m_id, ch.m_id); }
-    inline bool has_child(csubstr name) const { _C4RV();  return m_tree->has_child(m_id, name); }
-    inline bool has_children() const { _C4RV(); return m_tree->has_children(m_id); }
+    ConstNodeRef& operator= (ConstNodeRef const&) = default;
+    ConstNodeRef& operator= (ConstNodeRef     &&) = default;
+
+    ConstNodeRef& operator= (NodeRef const&);
+    ConstNodeRef& operator= (NodeRef     &&);
 
-    inline bool has_sibling(NodeRef const& n) const { _C4RV(); return m_tree->has_sibling(m_id, n.m_id); }
-    inline bool has_sibling(csubstr name) const { _C4RV();  return m_tree->has_sibling(m_id, name); }
-    /** counts with this */
-    inline bool has_siblings() const { _C4RV(); return m_tree->has_siblings(m_id); }
-    /** does not count with this */
-    inline bool has_other_siblings() const { _C4RV(); return m_tree->has_other_siblings(m_id); }
 
     /** @} */
 
 public:
 
-    /** @name hierarchy getters */
+    /** @name state queries */
     /** @{ */
 
-    NodeRef       parent()       { _C4RV(); return {m_tree, m_tree->parent(m_id)}; }
-    NodeRef const parent() const { _C4RV(); return {m_tree, m_tree->parent(m_id)}; }
+    C4_ALWAYS_INLINE C4_PURE bool valid() const noexcept { return m_tree != nullptr && m_id != NONE; }
 
-    NodeRef       prev_sibling()       { _C4RV(); return {m_tree, m_tree->prev_sibling(m_id)}; }
-    NodeRef const prev_sibling() const { _C4RV(); return {m_tree, m_tree->prev_sibling(m_id)}; }
+    /** @} */
 
-    NodeRef       next_sibling()       { _C4RV(); return {m_tree, m_tree->next_sibling(m_id)}; }
-    NodeRef const next_sibling() const { _C4RV(); return {m_tree, m_tree->next_sibling(m_id)}; }
+public:
 
-    /** O(#num_children) */
-    size_t  num_children() const { _C4RV(); return m_tree->num_children(m_id); }
-    size_t  child_pos(NodeRef const& n) const { _C4RV(); return m_tree->child_pos(m_id, n.m_id); }
-    NodeRef       first_child()       { _C4RV(); return {m_tree, m_tree->first_child(m_id)}; }
-    NodeRef const first_child() const { _C4RV(); return {m_tree, m_tree->first_child(m_id)}; }
-    NodeRef       last_child ()       { _C4RV(); return {m_tree, m_tree->last_child (m_id)}; }
-    NodeRef const last_child () const { _C4RV(); return {m_tree, m_tree->last_child (m_id)}; }
-    NodeRef       child(size_t pos)       { _C4RV(); return {m_tree, m_tree->child(m_id, pos)}; }
-    NodeRef const child(size_t pos) const { _C4RV(); return {m_tree, m_tree->child(m_id, pos)}; }
-    NodeRef       find_child(csubstr name)       { _C4RV(); return {m_tree, m_tree->find_child(m_id, name)}; }
-    NodeRef const find_child(csubstr name) const { _C4RV(); return {m_tree, m_tree->find_child(m_id, name)}; }
+    /** @name member getters */
+    /** @{ */
 
-    /** O(#num_siblings) */
-    size_t  num_siblings() const { _C4RV(); return m_tree->num_siblings(m_id); }
-    size_t  num_other_siblings() const { _C4RV(); return m_tree->num_other_siblings(m_id); }
-    size_t  sibling_pos(NodeRef const& n) const { _C4RV(); return m_tree->child_pos(m_tree->parent(m_id), n.m_id); }
-    NodeRef       first_sibling()       { _C4RV(); return {m_tree, m_tree->first_sibling(m_id)}; }
-    NodeRef const first_sibling() const { _C4RV(); return {m_tree, m_tree->first_sibling(m_id)}; }
-    NodeRef       last_sibling ()       { _C4RV(); return {m_tree, m_tree->last_sibling(m_id)}; }
-    NodeRef const last_sibling () const { _C4RV(); return {m_tree, m_tree->last_sibling(m_id)}; }
-    NodeRef       sibling(size_t pos)       { _C4RV(); return {m_tree, m_tree->sibling(m_id, pos)}; }
-    NodeRef const sibling(size_t pos) const { _C4RV(); return {m_tree, m_tree->sibling(m_id, pos)}; }
-    NodeRef       find_sibling(csubstr name)       { _C4RV(); return {m_tree, m_tree->find_sibling(m_id, name)}; }
-    NodeRef const find_sibling(csubstr name) const { _C4RV(); return {m_tree, m_tree->find_sibling(m_id, name)}; }
-
-    NodeRef       doc(size_t num)       { _C4RV(); return {m_tree, m_tree->doc(num)}; }
-    NodeRef const doc(size_t num) const { _C4RV(); return {m_tree, m_tree->doc(num)}; }
+    C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; }
+    C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; }
+
+    /** @} */
+
+public:
+
+    /** @name comparisons */
+    /** @{ */
+
+    C4_ALWAYS_INLINE C4_PURE bool operator== (ConstNodeRef const& that) const noexcept { RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; }
+    C4_ALWAYS_INLINE C4_PURE bool operator!= (ConstNodeRef const& that) const noexcept { RYML_ASSERT(that.m_tree == m_tree); return ! this->operator==(that); }
+
+    C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return m_tree == nullptr || m_id == NONE; }
+    C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return ! this->operator== (nullptr); }
+
+    C4_ALWAYS_INLINE C4_PURE bool operator== (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; }
+    C4_ALWAYS_INLINE C4_PURE bool operator!= (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; }
+
+    /** @} */
+
+};
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+/** a reference to a node in an existing yaml tree, offering a more
+ * convenient API than the index-based API used in the tree. */
+class RYML_EXPORT NodeRef : public detail::RoNodeMethods<NodeRef, ConstNodeRef>
+{
+public:
+
+    using tree_type = Tree;
+    using base_type = detail::RoNodeMethods<NodeRef, ConstNodeRef>;
+
+private:
+
+    Tree *C4_RESTRICT m_tree;
+    size_t m_id;
+
+    /** This member is used to enable lazy operator[] writing. When a child
+     * with a key or index is not found, m_id is set to the id of the parent
+     * and the asked-for key or index are stored in this member until a write
+     * does happen. Then it is given as key or index for creating the child.
+     * When a key is used, the csubstr stores it (so the csubstr's string is
+     * non-null and the csubstr's size is different from NONE). When an index is
+     * used instead, the csubstr's string is set to null, and only the csubstr's
+     * size is set to a value different from NONE. Otherwise, when operator[]
+     * does find the child then this member is empty: the string is null and
+     * the size is NONE. */
+    csubstr m_seed;
+
+    friend ConstNodeRef;
+    friend struct detail::RoNodeMethods<NodeRef, ConstNodeRef>;
+
+    // require valid: a helper macro, undefined at the end
+    #define _C4RV()                                                         \
+        RYML_ASSERT(m_tree != nullptr);                                     \
+        _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE && !is_seed())
+
+public:
+
+    /** @name construction */
+    /** @{ */
+
+    NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); }
+    NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); }
+    NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); }
+    NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); }
+    NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; }
+    NodeRef(Tree *t, size_t id, csubstr  seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {}
+    NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {}
+
+    /** @} */
+
+public:
+
+    /** @name assignment */
+    /** @{ */
+
+    NodeRef(NodeRef const&) = default;
+    NodeRef(NodeRef     &&) = default;
+
+    NodeRef& operator= (NodeRef const&) = default;
+    NodeRef& operator= (NodeRef     &&) = default;
+
+    /** @} */
+
+public:
+
+    /** @name state queries */
+    /** @{ */
+
+    inline bool valid() const { return m_tree != nullptr && m_id != NONE; }
+    inline bool is_seed() const { return m_seed.str != nullptr || m_seed.len != NONE; }
+
+    inline void _clear_seed() { /*do this manually or an assert is triggered*/ m_seed.str = nullptr; m_seed.len = NONE; }
+
+    /** @} */
+
+public:
+
+    /** @name comparisons */
+    /** @{ */
+
+    inline bool operator== (NodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid() && !that.is_seed()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; }
+    inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); }
+
+    inline bool operator== (ConstNodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; }
+    inline bool operator!= (ConstNodeRef const& that) const { return ! this->operator==(that); }
+
+    inline bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); }
+    inline bool operator!= (std::nullptr_t) const { return m_tree != nullptr && m_id != NONE && !is_seed(); }
+
+    inline bool operator== (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; }
+    inline bool operator!= (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; }
+
+    //inline operator bool () const { return m_tree == nullptr || m_id == NONE || is_seed(); }
+
+    /** @} */
+
+public:
+
+    /** @name node property getters */
+    /** @{ */
+
+    C4_ALWAYS_INLINE C4_PURE Tree * tree() noexcept { return m_tree; }
+    C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; }
+
+    C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; }
 
     /** @} */
 
@@ -18736,6 +21081,7 @@ public:
     /** @{ */
 
     void change_type(NodeType t) { _C4RV(); m_tree->change_type(m_id, t); }
+
     void set_type(NodeType t) { _C4RV(); m_tree->_set_flags(m_id, t); }
     void set_key(csubstr key) { _C4RV(); m_tree->_set_key(m_id, key); }
     void set_val(csubstr val) { _C4RV(); m_tree->_set_val(m_id, val); }
@@ -18762,6 +21108,12 @@ public:
         m_tree->_set_val(m_id, s);
         return s.len;
     }
+    size_t set_val_serialized(std::nullptr_t)
+    {
+        _C4RV();
+        m_tree->_set_val(m_id, csubstr{});
+        return 0;
+    }
 
     /** encode a blob as base64, then assign the result to the node's key
      * @return the size of base64-encoded blob */
@@ -18801,62 +21153,6 @@ public:
         m_tree->remove_children(m_id);
     }
 
-    /** @} */
-
-public:
-
-    /** hierarchy getters */
-    /** @{ */
-
-    /** O(num_children) */
-    NodeRef operator[] (csubstr k)
-    {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        size_t ch = m_tree->find_child(m_id, k);
-        NodeRef r = ch != NONE ? NodeRef(m_tree, ch) : NodeRef(m_tree, m_id, k);
-        return r;
-    }
-
-    /** O(num_children) */
-    NodeRef const operator[] (csubstr k) const
-    {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        size_t ch = m_tree->find_child(m_id, k);
-        RYML_ASSERT(ch != NONE);
-        NodeRef const r(m_tree, ch);
-        return r;
-    }
-
-    /** O(num_children) */
-    NodeRef operator[] (size_t pos)
-    {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        size_t ch = m_tree->child(m_id, pos);
-        NodeRef r = ch != NONE ? NodeRef(m_tree, ch) : NodeRef(m_tree, m_id, pos);
-        return r;
-    }
-
-    /** O(num_children) */
-    NodeRef const operator[] (size_t pos) const
-    {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        size_t ch = m_tree->child(m_id, pos);
-        RYML_ASSERT(ch != NONE);
-        NodeRef const r(m_tree, ch);
-        return r;
-    }
-
-    /** @} */
-
-public:
-
-    /** node modification */
-    /** @{ */
-
     void create() { _apply_seed(); }
 
     inline void operator= (NodeType_e t)
@@ -18883,6 +21179,12 @@ public:
         _apply(v);
     }
 
+    inline void operator= (std::nullptr_t)
+    {
+        _apply_seed();
+        _apply(csubstr{});
+    }
+
     inline void operator= (csubstr v)
     {
         _apply_seed();
@@ -18902,9 +21204,12 @@ public:
 
 public:
 
+    /** @name serialization */
+    /** @{ */
+
     /** serialize a variable to the arena */
     template<class T>
-    inline csubstr to_arena(T const& C4_RESTRICT s) const
+    inline csubstr to_arena(T const& C4_RESTRICT s)
     {
         _C4RV();
         return m_tree->to_arena(s);
@@ -18929,21 +21234,6 @@ public:
         return *this;
     }
 
-    template<class T>
-    inline NodeRef const& operator>> (T &v) const
-    {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        RYML_ASSERT(get() != nullptr);
-        if( ! read(*this, &v))
-        {
-            c4::yml::error("could not deserialize value");
-        }
-        return *this;
-    }
-
-public:
-
     /** serialize a variable, then assign the result to the node's key */
     template<class T>
     inline NodeRef& operator<< (Key<const T> const& C4_RESTRICT v)
@@ -18962,19 +21252,6 @@ public:
         return *this;
     }
 
-    /** deserialize the node's key to the given variable */
-    template<class T>
-    inline NodeRef const& operator>> (Key<T> v) const
-    {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        RYML_ASSERT(get() != nullptr);
-        from_chars(key(), &v.k);
-        return *this;
-    }
-
-public:
-
     NodeRef& operator<< (Key<fmt::const_base64_wrapper> w)
     {
         set_key_serialized(w.wrapper);
@@ -18987,43 +21264,7 @@ public:
         return *this;
     }
 
-    NodeRef const& operator>> (Key<fmt::base64_wrapper> w) const
-    {
-        deserialize_key(w.wrapper);
-        return *this;
-    }
-
-    NodeRef const& operator>> (fmt::base64_wrapper w) const
-    {
-        deserialize_val(w);
-        return *this;
-    }
-
-public:
-
-    template<class T>
-    void get_if(csubstr name, T *var) const
-    {
-        auto ch = find_child(name);
-        if(ch.valid())
-        {
-            ch >> *var;
-        }
-    }
-
-    template<class T>
-    void get_if(csubstr name, T *var, T fallback) const
-    {
-        auto ch = find_child(name);
-        if(ch.valid())
-        {
-            ch >> *var;
-        }
-        else
-        {
-            *var = fallback;
-        }
-    }
+    /** @} */
 
 private:
 
@@ -19067,6 +21308,9 @@ private:
 
 public:
 
+    /** @name modification of hierarchy */
+    /** @{ */
+
     inline NodeRef insert_child(NodeRef after)
     {
         _C4RV();
@@ -19116,7 +21360,7 @@ public:
 
 public:
 
-    inline NodeRef insert_sibling(NodeRef const after)
+    inline NodeRef insert_sibling(ConstNodeRef const& after)
     {
         _C4RV();
         RYML_ASSERT(after.m_tree == m_tree);
@@ -19124,7 +21368,7 @@ public:
         return r;
     }
 
-    inline NodeRef insert_sibling(NodeInit const& i, NodeRef const after)
+    inline NodeRef insert_sibling(NodeInit const& i, ConstNodeRef const& after)
     {
         _C4RV();
         RYML_ASSERT(after.m_tree == m_tree);
@@ -19195,20 +21439,23 @@ public:
 
 public:
 
-    /** change the node's position within its parent */
-    inline void move(NodeRef const after)
+    /** change the node's position within its parent, placing it after
+     * @p after. To move to the first position in the parent, simply
+     * pass an empty or default-constructed reference like this:
+     * `n.move({})`. */
+    inline void move(ConstNodeRef const& after)
     {
         _C4RV();
         m_tree->move(m_id, after.m_id);
     }
 
-    /** move the node to a different parent, which may belong to a different
-     * tree. When this is the case, then this node's tree pointer is reset to
-     * the tree of the parent node. */
-    inline void move(NodeRef const parent, NodeRef const after)
+    /** move the node to a different @p parent (which may belong to a
+     * different tree), placing it after @p after. When the
+     * destination parent is in a new tree, then this node's tree
+     * pointer is reset to the tree of the parent node. */
+    inline void move(NodeRef const& parent, ConstNodeRef const& after)
     {
         _C4RV();
-        RYML_ASSERT(parent.m_tree == after.m_tree);
         if(parent.m_tree == m_tree)
         {
             m_tree->move(m_id, parent.m_id, after.m_id);
@@ -19220,10 +21467,28 @@ public:
         }
     }
 
-    inline NodeRef duplicate(NodeRef const parent, NodeRef const after) const
+    /** duplicate the current node somewhere within its parent, and
+     * place it after the node @p after. To place into the first
+     * position of the parent, simply pass an empty or
+     * default-constructed reference like this: `n.move({})`. */
+    inline NodeRef duplicate(ConstNodeRef const& after) const
     {
         _C4RV();
-        RYML_ASSERT(parent.m_tree == after.m_tree);
+        RYML_ASSERT(m_tree == after.m_tree || after.m_id == NONE);
+        size_t dup = m_tree->duplicate(m_id, m_tree->parent(m_id), after.m_id);
+        NodeRef r(m_tree, dup);
+        return r;
+    }
+
+    /** duplicate the current node somewhere into a different @p parent
+     * (possibly from a different tree), and place it after the node
+     * @p after. To place into the first position of the parent,
+     * simply pass an empty or default-constructed reference like
+     * this: `n.move({})`. */
+    inline NodeRef duplicate(NodeRef const& parent, ConstNodeRef const& after) const
+    {
+        _C4RV();
+        RYML_ASSERT(parent.m_tree == after.m_tree || after.m_id == NONE);
         if(parent.m_tree == m_tree)
         {
             size_t dup = m_tree->duplicate(m_id, parent.m_id, after.m_id);
@@ -19238,7 +21503,7 @@ public:
         }
     }
 
-    inline void duplicate_children(NodeRef const parent, NodeRef const after) const
+    inline void duplicate_children(NodeRef const& parent, ConstNodeRef const& after) const
     {
         _C4RV();
         RYML_ASSERT(parent.m_tree == after.m_tree);
@@ -19252,97 +21517,44 @@ public:
         }
     }
 
-private:
-
-    template<class Nd>
-    struct child_iterator
-    {
-        Tree * m_tree;
-        size_t m_child_id;
-
-        using value_type = NodeRef;
-
-        child_iterator(Tree * t, size_t id) : m_tree(t), m_child_id(id) {}
-
-        child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; }
-        child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; }
-
-        Nd operator*  () const { return Nd(m_tree, m_child_id); }
-        Nd operator-> () const { return Nd(m_tree, m_child_id); }
-
-        bool operator!= (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id != that.m_child_id; }
-        bool operator== (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id == that.m_child_id; }
-    };
-
-public:
-
-    using       iterator = child_iterator<      NodeRef>;
-    using const_iterator = child_iterator<const NodeRef>;
-
-    inline iterator begin() { return iterator(m_tree, m_tree->first_child(m_id)); }
-    inline iterator end  () { return iterator(m_tree, NONE); }
-
-    inline const_iterator begin() const { return const_iterator(m_tree, m_tree->first_child(m_id)); }
-    inline const_iterator end  () const { return const_iterator(m_tree, NONE); }
-
-private:
-
-    template<class Nd>
-    struct children_view_
-    {
-        using n_iterator = child_iterator<Nd>;
-
-        n_iterator b, e;
-
-        inline children_view_(n_iterator const& b_, n_iterator const& e_) : b(b_), e(e_) {}
-
-        inline n_iterator begin() const { return b; }
-        inline n_iterator end  () const { return e; }
-    };
-
-public:
+    /** @} */
 
-    using       children_view = children_view_<      NodeRef>;
-    using const_children_view = children_view_<const NodeRef>;
+#undef _C4RV
+};
 
-          children_view children()       { return       children_view(begin(), end()); }
-    const_children_view children() const { return const_children_view(begin(), end()); }
 
-    #if defined(__clang__)
-    #   pragma clang diagnostic push
-    #   pragma clang diagnostic ignored "-Wnull-dereference"
-    #elif defined(__GNUC__)
-    #   pragma GCC diagnostic push
-    #   if __GNUC__ >= 6
-    #       pragma GCC diagnostic ignored "-Wnull-dereference"
-    #   endif
-    #endif
+//-----------------------------------------------------------------------------
 
-          children_view siblings()       { if(is_root()) { return       children_view(end(), end()); } else { size_t p = get()->m_parent; return       children_view(iterator(m_tree, m_tree->get(p)->m_first_child), iterator(m_tree, NONE)); } }
-    const_children_view siblings() const { if(is_root()) { return const_children_view(end(), end()); } else { size_t p = get()->m_parent; return const_children_view(const_iterator(m_tree, m_tree->get(p)->m_first_child), const_iterator(m_tree, NONE)); } }
+inline ConstNodeRef::ConstNodeRef(NodeRef const& that)
+    : m_tree(that.m_tree)
+    , m_id(!that.is_seed() ? that.id() : NONE)
+{
+}
 
-    #if defined(__clang__)
-    #   pragma clang diagnostic pop
-    #elif defined(__GNUC__)
-    #   pragma GCC diagnostic pop
-    #endif
+inline ConstNodeRef::ConstNodeRef(NodeRef && that)
+    : m_tree(that.m_tree)
+    , m_id(!that.is_seed() ? that.id() : NONE)
+{
+}
 
-public:
 
-    /** visit every child node calling fn(node) */
-    template<class Visitor> bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true);
-    /** visit every child node calling fn(node) */
-    template<class Visitor> bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const;
+inline ConstNodeRef& ConstNodeRef::operator= (NodeRef const& that)
+{
+    m_tree = (that.m_tree);
+    m_id = (!that.is_seed() ? that.id() : NONE);
+    return *this;
+}
 
-    /** visit every child node calling fn(node, level) */
-    template<class Visitor> bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true);
-    /** visit every child node calling fn(node, level) */
-    template<class Visitor> bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const;
+inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that)
+{
+    m_tree = (that.m_tree);
+    m_id = (!that.is_seed() ? that.id() : NONE);
+    return *this;
+}
 
-#undef _C4RV
-};
 
 //-----------------------------------------------------------------------------
+
 template<class T>
 inline void write(NodeRef *n, T const& v)
 {
@@ -19355,82 +21567,27 @@ inline read(NodeRef const& n, T *v)
 {
     return from_chars(n.val(), v);
 }
-
 template<class T>
-typename std::enable_if< std::is_floating_point<T>::value, bool>::type
-inline read(NodeRef const& n, T *v)
+typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
+inline read(ConstNodeRef const& n, T *v)
 {
-    return from_chars_float(n.val(), v);
+    return from_chars(n.val(), v);
 }
 
-
-//-----------------------------------------------------------------------------
-template<class Visitor>
-bool NodeRef::visit(Visitor fn, size_t indentation_level, bool skip_root)
+template<class T>
+typename std::enable_if<std::is_floating_point<T>::value, bool>::type
+inline read(NodeRef const& n, T *v)
 {
-    return const_cast<NodeRef const*>(this)->visit(fn, indentation_level, skip_root);
+    return from_chars_float(n.val(), v);
 }
-
-template<class Visitor>
-bool NodeRef::visit(Visitor fn, size_t indentation_level, bool skip_root) const
+template<class T>
+typename std::enable_if<std::is_floating_point<T>::value, bool>::type
+inline read(ConstNodeRef const& n, T *v)
 {
-    size_t increment = 0;
-    if( ! (is_root() && skip_root))
-    {
-        if(fn(this, indentation_level))
-        {
-            return true;
-        }
-        ++increment;
-    }
-    if(has_children())
-    {
-        for(auto ch : children())
-        {
-            if(ch.visit(fn, indentation_level + increment)) // no need to forward skip_root as it won't be root
-            {
-                return true;
-            }
-        }
-    }
-    return false;
+    return from_chars_float(n.val(), v);
 }
 
 
-template<class Visitor>
-bool NodeRef::visit_stacked(Visitor fn, size_t indentation_level, bool skip_root)
-{
-    return const_cast< NodeRef const* >(this)->visit_stacked(fn, indentation_level, skip_root);
-}
-
-template<class Visitor>
-bool NodeRef::visit_stacked(Visitor fn, size_t indentation_level, bool skip_root) const
-{
-    size_t increment = 0;
-    if( ! (is_root() && skip_root))
-    {
-        if(fn(this, indentation_level))
-        {
-            return true;
-        }
-        ++increment;
-    }
-    if(has_children())
-    {
-        fn.push(this, indentation_level);
-        for(auto ch : children())
-        {
-            if(ch.visit(fn, indentation_level + increment)) // no need to forward skip_root as it won't be root
-            {
-                fn.pop(this, indentation_level);
-                return true;
-            }
-        }
-        fn.pop(this, indentation_level);
-    }
-    return false;
-}
-
 } // namespace yml
 } // namespace c4
 
@@ -19881,6 +22038,20 @@ inline void __c4presc(const char *s, size_t len)
 #include "./node.hpp"
 #endif
 
+
+#define RYML_DEPRECATE_EMIT                                             \
+    RYML_DEPRECATED("use emit_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120")
+#ifdef emit
+#error "emit is defined, likely from a Qt include. This will cause a compilation error. See https://github.com/biojppm/rapidyaml/issues/120"
+#endif
+#define RYML_DEPRECATE_EMITRS                                           \
+    RYML_DEPRECATED("use emitrs_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120")
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
 namespace c4 {
 namespace yml {
 
@@ -19904,7 +22075,7 @@ struct as_json
     size_t node;
     as_json(Tree const& t) : tree(&t), node(t.empty() ? NONE : t.root_id()) {}
     as_json(Tree const& t, size_t id) : tree(&t), node(id) {}
-    as_json(NodeRef const& n) : tree(n.tree()), node(n.id()) {}
+    as_json(ConstNodeRef const& n) : tree(n.tree()), node(n.id()) {}
 };
 
 
@@ -19928,11 +22099,11 @@ public:
      *
      * When writing to a file, the returned substr will be null, but its
      * length will be set to the number of bytes written. */
-    substr emit(EmitType_e type, Tree const& t, size_t id, bool error_on_excess);
+    substr emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess);
     /** emit starting at the root node */
-    substr emit(EmitType_e type, Tree const& t, bool error_on_excess=true);
+    substr emit_as(EmitType_e type, Tree const& t, bool error_on_excess=true);
     /** emit the given node */
-    substr emit(EmitType_e type, NodeRef const& n, bool error_on_excess=true);
+    substr emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess=true);
 
 private:
 
@@ -19988,27 +22159,36 @@ private:
 
 /** emit YAML to the given file. A null file defaults to stdout.
  * Return the number of bytes written. */
-inline size_t emit(Tree const& t, size_t id, FILE *f)
+inline size_t emit_yaml(Tree const& t, size_t id, FILE *f)
 {
     EmitterFile em(f);
-    return em.emit(EMIT_YAML, t, id, /*error_on_excess*/true).len;
+    return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len;
 }
+RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, size_t id, FILE *f)
+{
+    return emit_yaml(t, id, f);
+}
+
 /** emit JSON to the given file. A null file defaults to stdout.
  * Return the number of bytes written. */
 inline size_t emit_json(Tree const& t, size_t id, FILE *f)
 {
     EmitterFile em(f);
-    return em.emit(EMIT_JSON, t, id, /*error_on_excess*/true).len;
+    return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len;
 }
 
 
 /** emit YAML to the given file. A null file defaults to stdout.
  * Return the number of bytes written.
  * @overload */
-inline size_t emit(Tree const& t, FILE *f=nullptr)
+inline size_t emit_yaml(Tree const& t, FILE *f=nullptr)
 {
     EmitterFile em(f);
-    return em.emit(EMIT_YAML, t, /*error_on_excess*/true).len;
+    return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len;
+}
+RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, FILE *f=nullptr)
+{
+    return emit_yaml(t, f);
 }
 
 /** emit JSON to the given file. A null file defaults to stdout.
@@ -20017,26 +22197,30 @@ inline size_t emit(Tree const& t, FILE *f=nullptr)
 inline size_t emit_json(Tree const& t, FILE *f=nullptr)
 {
     EmitterFile em(f);
-    return em.emit(EMIT_JSON, t, /*error_on_excess*/true).len;
+    return em.emit_as(EMIT_JSON, t, /*error_on_excess*/true).len;
 }
 
 
 /** emit YAML to the given file. A null file defaults to stdout.
  * Return the number of bytes written.
  * @overload */
-inline size_t emit(NodeRef const& r, FILE *f=nullptr)
+inline size_t emit_yaml(ConstNodeRef const& r, FILE *f=nullptr)
 {
     EmitterFile em(f);
-    return em.emit(EMIT_YAML, r, /*error_on_excess*/true).len;
+    return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len;
+}
+RYML_DEPRECATE_EMIT inline size_t emit(ConstNodeRef const& r, FILE *f=nullptr)
+{
+    return emit_yaml(r, f);
 }
 
 /** emit JSON to the given file. A null file defaults to stdout.
  * Return the number of bytes written.
  * @overload */
-inline size_t emit_json(NodeRef const& r, FILE *f=nullptr)
+inline size_t emit_json(ConstNodeRef const& r, FILE *f=nullptr)
 {
     EmitterFile em(f);
-    return em.emit(EMIT_JSON, r, /*error_on_excess*/true).len;
+    return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len;
 }
 
 
@@ -20047,17 +22231,17 @@ template<class OStream>
 inline OStream& operator<< (OStream& s, Tree const& t)
 {
     EmitterOStream<OStream> em(s);
-    em.emit(EMIT_YAML, t);
+    em.emit_as(EMIT_YAML, t);
     return s;
 }
 
 /** emit YAML to an STL-like ostream
  * @overload */
 template<class OStream>
-inline OStream& operator<< (OStream& s, NodeRef const& n)
+inline OStream& operator<< (OStream& s, ConstNodeRef const& n)
 {
     EmitterOStream<OStream> em(s);
-    em.emit(EMIT_YAML, n);
+    em.emit_as(EMIT_YAML, n);
     return s;
 }
 
@@ -20066,7 +22250,7 @@ template<class OStream>
 inline OStream& operator<< (OStream& s, as_json const& j)
 {
     EmitterOStream<OStream> em(s);
-    em.emit(EMIT_JSON, *j.tree, j.node, true);
+    em.emit_as(EMIT_JSON, *j.tree, j.node, true);
     return s;
 }
 
@@ -20077,10 +22261,14 @@ inline OStream& operator<< (OStream& s, as_json const& j)
 /** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML.
  * @param error_on_excess Raise an error if the space in the buffer is insufficient.
  * @overload */
-inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true)
+inline substr emit_yaml(Tree const& t, size_t id, substr buf, bool error_on_excess=true)
 {
     EmitterBuf em(buf);
-    return em.emit(EMIT_YAML, t, id, error_on_excess);
+    return em.emit_as(EMIT_YAML, t, id, error_on_excess);
+}
+RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true)
+{
+    return emit_yaml(t, id, buf, error_on_excess);
 }
 
 /** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
@@ -20089,17 +22277,21 @@ inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=tr
 inline substr emit_json(Tree const& t, size_t id, substr buf, bool error_on_excess=true)
 {
     EmitterBuf em(buf);
-    return em.emit(EMIT_JSON, t, id, error_on_excess);
+    return em.emit_as(EMIT_JSON, t, id, error_on_excess);
 }
 
 
 /** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML.
  * @param error_on_excess Raise an error if the space in the buffer is insufficient.
  * @overload */
-inline substr emit(Tree const& t, substr buf, bool error_on_excess=true)
+inline substr emit_yaml(Tree const& t, substr buf, bool error_on_excess=true)
 {
     EmitterBuf em(buf);
-    return em.emit(EMIT_YAML, t, error_on_excess);
+    return em.emit_as(EMIT_YAML, t, error_on_excess);
+}
+RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, substr buf, bool error_on_excess=true)
+{
+    return emit_yaml(t, buf, error_on_excess);
 }
 
 /** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
@@ -20108,7 +22300,7 @@ inline substr emit(Tree const& t, substr buf, bool error_on_excess=true)
 inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true)
 {
     EmitterBuf em(buf);
-    return em.emit(EMIT_JSON, t, error_on_excess);
+    return em.emit_as(EMIT_JSON, t, error_on_excess);
 }
 
 
@@ -20116,20 +22308,24 @@ inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true)
  * @param error_on_excess Raise an error if the space in the buffer is insufficient.
  * @overload
  */
-inline substr emit(NodeRef const& r, substr buf, bool error_on_excess=true)
+inline substr emit_yaml(ConstNodeRef const& r, substr buf, bool error_on_excess=true)
 {
     EmitterBuf em(buf);
-    return em.emit(EMIT_YAML, r, error_on_excess);
+    return em.emit_as(EMIT_YAML, r, error_on_excess);
+}
+RYML_DEPRECATE_EMIT inline substr emit(ConstNodeRef const& r, substr buf, bool error_on_excess=true)
+{
+    return emit_yaml(r, buf, error_on_excess);
 }
 
 /** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
  * @param error_on_excess Raise an error if the space in the buffer is insufficient.
  * @overload
  */
-inline substr emit_json(NodeRef const& r, substr buf, bool error_on_excess=true)
+inline substr emit_json(ConstNodeRef const& r, substr buf, bool error_on_excess=true)
 {
     EmitterBuf em(buf);
-    return em.emit(EMIT_JSON, r, error_on_excess);
+    return em.emit_as(EMIT_JSON, r, error_on_excess);
 }
 
 
@@ -20138,18 +22334,23 @@ inline substr emit_json(NodeRef const& r, substr buf, bool error_on_excess=true)
 /** emit+resize: emit YAML to the given std::string/std::vector-like
  * container, resizing it as needed to fit the emitted YAML. */
 template<class CharOwningContainer>
-substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont)
+substr emitrs_yaml(Tree const& t, size_t id, CharOwningContainer * cont)
 {
     substr buf = to_substr(*cont);
-    substr ret = emit(t, id, buf, /*error_on_excess*/false);
+    substr ret = emit_yaml(t, id, buf, /*error_on_excess*/false);
     if(ret.str == nullptr && ret.len > 0)
     {
         cont->resize(ret.len);
         buf = to_substr(*cont);
-        ret = emit(t, id, buf, /*error_on_excess*/true);
+        ret = emit_yaml(t, id, buf, /*error_on_excess*/true);
     }
     return ret;
 }
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont)
+{
+    return emitrs_yaml(t, id, cont);
+}
 
 /** emit+resize: emit JSON to the given std::string/std::vector-like
  * container, resizing it as needed to fit the emitted JSON. */
@@ -20171,15 +22372,22 @@ substr emitrs_json(Tree const& t, size_t id, CharOwningContainer * cont)
 /** emit+resize: emit YAML to the given std::string/std::vector-like
  * container, resizing it as needed to fit the emitted YAML. */
 template<class CharOwningContainer>
-CharOwningContainer emitrs(Tree const& t, size_t id)
+CharOwningContainer emitrs_yaml(Tree const& t, size_t id)
 {
     CharOwningContainer c;
-    emitrs(t, id, &c);
+    emitrs_yaml(t, id, &c);
+    return c;
+}
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t, size_t id)
+{
+    CharOwningContainer c;
+    emitrs_yaml(t, id, &c);
     return c;
 }
 
-/** emit+resize: emit JSON to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted JSON. */
+/** emit+resize: emit JSON to the given std::string/std::vector-like
+ * container, resizing it as needed to fit the emitted JSON. */
 template<class CharOwningContainer>
 CharOwningContainer emitrs_json(Tree const& t, size_t id)
 {
@@ -20189,18 +22397,23 @@ CharOwningContainer emitrs_json(Tree const& t, size_t id)
 }
 
 
-/** emit+resize: YAML to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted YAML. */
+/** emit+resize: YAML to the given std::string/std::vector-like
+ * container, resizing it as needed to fit the emitted YAML. */
 template<class CharOwningContainer>
-substr emitrs(Tree const& t, CharOwningContainer * cont)
+substr emitrs_yaml(Tree const& t, CharOwningContainer * cont)
 {
     if(t.empty())
         return {};
-    return emitrs(t, t.root_id(), cont);
+    return emitrs_yaml(t, t.root_id(), cont);
+}
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, CharOwningContainer * cont)
+{
+    return emitrs_yaml(t, cont);
 }
 
-/** emit+resize: JSON to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted JSON. */
+/** emit+resize: JSON to the given std::string/std::vector-like
+ * container, resizing it as needed to fit the emitted JSON. */
 template<class CharOwningContainer>
 substr emitrs_json(Tree const& t, CharOwningContainer * cont)
 {
@@ -20213,14 +22426,19 @@ substr emitrs_json(Tree const& t, CharOwningContainer * cont)
 /** emit+resize: YAML to the given std::string/std::vector-like container,
  * resizing it as needed to fit the emitted YAML. */
 template<class CharOwningContainer>
-CharOwningContainer emitrs(Tree const& t)
+CharOwningContainer emitrs_yaml(Tree const& t)
 {
     CharOwningContainer c;
     if(t.empty())
         return c;
-    emitrs(t, t.root_id(), &c);
+    emitrs_yaml(t, t.root_id(), &c);
     return c;
 }
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t)
+{
+    return emitrs_yaml<CharOwningContainer>(t);
+}
 
 /** emit+resize: JSON to the given std::string/std::vector-like container,
  * resizing it as needed to fit the emitted JSON. */
@@ -20238,16 +22456,21 @@ CharOwningContainer emitrs_json(Tree const& t)
 /** emit+resize: YAML to the given std::string/std::vector-like container,
  * resizing it as needed to fit the emitted YAML. */
 template<class CharOwningContainer>
-substr emitrs(NodeRef const& n, CharOwningContainer * cont)
+substr emitrs_yaml(ConstNodeRef const& n, CharOwningContainer * cont)
 {
     _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
-    return emitrs(*n.tree(), n.id(), cont);
+    return emitrs_yaml(*n.tree(), n.id(), cont);
+}
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS substr emitrs(ConstNodeRef const& n, CharOwningContainer * cont)
+{
+    return emitrs_yaml(n, cont);
 }
 
 /** emit+resize: JSON to the given std::string/std::vector-like container,
  * resizing it as needed to fit the emitted JSON. */
 template<class CharOwningContainer>
-substr emitrs_json(NodeRef const& n, CharOwningContainer * cont)
+substr emitrs_json(ConstNodeRef const& n, CharOwningContainer * cont)
 {
     _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
     return emitrs_json(*n.tree(), n.id(), cont);
@@ -20257,18 +22480,23 @@ substr emitrs_json(NodeRef const& n, CharOwningContainer * cont)
 /** emit+resize: YAML to the given std::string/std::vector-like container,
  * resizing it as needed to fit the emitted YAML. */
 template<class CharOwningContainer>
-CharOwningContainer emitrs(NodeRef const& n)
+CharOwningContainer emitrs_yaml(ConstNodeRef const& n)
 {
     _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
     CharOwningContainer c;
-    emitrs(*n.tree(), n.id(), &c);
+    emitrs_yaml(*n.tree(), n.id(), &c);
     return c;
 }
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(ConstNodeRef const& n)
+{
+    return emitrs_yaml<CharOwningContainer>(n);
+}
 
 /** emit+resize: JSON to the given std::string/std::vector-like container,
  * resizing it as needed to fit the emitted JSON. */
 template<class CharOwningContainer>
-CharOwningContainer emitrs_json(NodeRef const& n)
+CharOwningContainer emitrs_json(ConstNodeRef const& n)
 {
     _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
     CharOwningContainer c;
@@ -20279,6 +22507,9 @@ CharOwningContainer emitrs_json(NodeRef const& n)
 } // namespace yml
 } // namespace c4
 
+#undef RYML_DEPRECATE_EMIT
+#undef RYML_DEPRECATE_EMITRS
+
 // amalgamate: removed include of
 // https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp
 //#include "c4/yml/emit.def.hpp"
@@ -20318,7 +22549,7 @@ namespace c4 {
 namespace yml {
 
 template<class Writer>
-substr Emitter<Writer>::emit(EmitType_e type, Tree const& t, size_t id, bool error_on_excess)
+substr Emitter<Writer>::emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess)
 {
     if(t.empty())
     {
@@ -20337,18 +22568,18 @@ substr Emitter<Writer>::emit(EmitType_e type, Tree const& t, size_t id, bool err
 }
 
 template<class Writer>
-substr Emitter<Writer>::emit(EmitType_e type, Tree const& t, bool error_on_excess)
+substr Emitter<Writer>::emit_as(EmitType_e type, Tree const& t, bool error_on_excess)
 {
     if(t.empty())
         return {};
-    return emit(type, t, t.root_id(), error_on_excess);
+    return this->emit_as(type, t, t.root_id(), error_on_excess);
 }
 
 template<class Writer>
-substr Emitter<Writer>::emit(EmitType_e type, NodeRef const& n, bool error_on_excess)
+substr Emitter<Writer>::emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess)
 {
     _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
-    return emit(type, *n.tree(), n.id(), error_on_excess);
+    return this->emit_as(type, *n.tree(), n.id(), error_on_excess);
 }
 
 
@@ -21076,6 +23307,13 @@ void Emitter<Writer>::_write_scalar_dquo(csubstr s, size_t ilevel)
                 pos = i;
             }
         }
+        else if(C4_UNLIKELY(curr == '\r'))
+        {
+            csubstr sub = s.range(pos, i);
+            this->Writer::_do_write(sub);  // write everything up to (excluding) this char
+            this->Writer::_do_write("\\r"); // write the escaped char
+            pos = i+1;
+        }
     }
     // write missing characters at the end of the string
     if(pos < s.len)
@@ -21121,7 +23359,7 @@ void Emitter<Writer>::_write_scalar(csubstr s, bool was_quoted)
     // was evaluated as true even if s.str was actually a nullptr (!!!)
     if(s.len == size_t(0))
     {
-        if(was_quoted)
+        if(was_quoted || s.str != nullptr)
             this->Writer::_do_write("''");
         return;
     }
@@ -21134,10 +23372,10 @@ void Emitter<Writer>::_write_scalar(csubstr s, bool was_quoted)
             &&
             (
                 // has leading whitespace
-                s.begins_with_any(" \n\t\r")
-                ||
-                // looks like reference or anchor or would be treated as a directive
-                s.begins_with_any("*&%")
+                // looks like reference or anchor
+                // would be treated as a directive
+                // see https://www.yaml.info/learn/quote.html#noplain
+                s.begins_with_any(" \n\t\r*&%@`")
                 ||
                 s.begins_with("<<")
                 ||
@@ -21178,16 +23416,27 @@ void Emitter<Writer>::_write_scalar(csubstr s, bool was_quoted)
     }
 }
 template<class Writer>
-void Emitter<Writer>::_write_scalar_json(csubstr s, bool as_key, bool was_quoted)
-{
-    if(was_quoted)
-    {
-        this->Writer::_do_write('"');
-        this->Writer::_do_write(s);
-        this->Writer::_do_write('"');
-    }
-    // json only allows strings as keys
-    else if(!as_key && (s.is_number() || s == "true" || s == "null" || s == "false"))
+void Emitter<Writer>::_write_scalar_json(csubstr s, bool as_key, bool use_quotes)
+{
+    if((!use_quotes)
+       // json keys require quotes
+       && (!as_key)
+       && (
+           // do not quote special cases
+           (s == "true" || s == "false" || s == "null")
+           || (
+               // do not quote numbers
+               (s.is_number()
+                && (
+                    // quote integral numbers if they have a leading 0
+                    // https://github.com/biojppm/rapidyaml/issues/291
+                    (!(s.len > 1 && s.begins_with('0')))
+                    // do not quote reals with leading 0
+                    // https://github.com/biojppm/rapidyaml/issues/313
+                    || (s.find('.') != csubstr::npos) ))
+               )
+           )
+        )
     {
         this->Writer::_do_write(s);
     }
@@ -21197,26 +23446,43 @@ void Emitter<Writer>::_write_scalar_json(csubstr s, bool as_key, bool was_quoted
         this->Writer::_do_write('"');
         for(size_t i = 0; i < s.len; ++i)
         {
-            switch (s[i])
+            switch(s.str[i])
             {
-                case '"':
-                case '\n': {
-                    if(i > 0)
-                    {
-                        csubstr sub = s.range(pos, i);
-                        this->Writer::_do_write(sub);
-                    }
-                    pos = i + 1;
-                    switch (s[i]) {
-                        case '"':
-                            this->Writer::_do_write("\\\"");
-                            break;
-                        case '\n':
-                            this->Writer::_do_write("\\n");
-                            break;
-                    }
-                    break;
-                }
+            case '"':
+              this->Writer ::_do_write(s.range(pos, i));
+              this->Writer ::_do_write("\\\"");
+              pos = i + 1;
+              break;
+            case '\n':
+              this->Writer ::_do_write(s.range(pos, i));
+              this->Writer ::_do_write("\\n");
+              pos = i + 1;
+              break;
+            case '\t':
+              this->Writer ::_do_write(s.range(pos, i));
+              this->Writer ::_do_write("\\t");
+              pos = i + 1;
+              break;
+            case '\\':
+              this->Writer ::_do_write(s.range(pos, i));
+              this->Writer ::_do_write("\\\\");
+              pos = i + 1;
+              break;
+            case '\r':
+              this->Writer ::_do_write(s.range(pos, i));
+              this->Writer ::_do_write("\\r");
+              pos = i + 1;
+              break;
+            case '\b':
+              this->Writer ::_do_write(s.range(pos, i));
+              this->Writer ::_do_write("\\b");
+              pos = i + 1;
+              break;
+            case '\f':
+              this->Writer ::_do_write(s.range(pos, i));
+              this->Writer ::_do_write("\\f");
+              pos = i + 1;
+              break;
             }
         }
         if(pos < s.len)
@@ -21579,6 +23845,36 @@ void stack<T, N>::_cb(Callbacks const& cb)
 namespace c4 {
 namespace yml {
 
+struct RYML_EXPORT ParserOptions
+{
+private:
+
+    typedef enum : uint32_t {
+        LOCATIONS = (1 << 0),
+        DEFAULTS = 0,
+    } Flags_e;
+
+    uint32_t flags = DEFAULTS;
+public:
+    ParserOptions() = default;
+
+    /** @name source location tracking */
+    /** @{ */
+
+    /** enable/disable source location tracking */
+    ParserOptions& locations(bool enabled)
+    {
+        if(enabled)
+            flags |= LOCATIONS;
+        else
+            flags &= ~LOCATIONS;
+        return *this;
+    }
+    bool locations() const { return (flags & LOCATIONS) != 0u; }
+
+    /** @} */
+};
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -21590,8 +23886,8 @@ public:
     /** @name construction and assignment */
     /** @{ */
 
-    Parser() : Parser(get_callbacks()) {}
-    Parser(Callbacks const& cb);
+    Parser(Callbacks const& cb, ParserOptions opts={});
+    Parser(ParserOptions opts={}) : Parser(get_callbacks(), opts) {}
     ~Parser();
 
     Parser(Parser &&);
@@ -21661,6 +23957,8 @@ public:
     size_t locations_capacity() const { return m_newline_offsets_capacity; }
     size_t filter_arena_capacity() const { return m_filter_arena.len; }
 
+    ParserOptions const& options() const { return m_options; }
+
     /** @} */
 
 public:
@@ -21724,7 +24022,7 @@ public:
     /** @{ */
 
     // READ THE NOTE ABOVE!
-    #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a compiler error."
+    #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a linker error."
     RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr csrc);
     RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t);
     RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t, size_t node_id);
@@ -21792,7 +24090,7 @@ public:
     /** Get the location of a node of the last tree to be parsed by this parser. */
     Location location(Tree const& tree, size_t node_id) const;
     /** Get the location of a node of the last tree to be parsed by this parser. */
-    Location location(NodeRef node) const;
+    Location location(ConstNodeRef node) const;
     /** Get the string starting at a particular location, to the end
      * of the parsed source buffer. */
     csubstr location_contents(Location const& loc) const;
@@ -21838,7 +24136,11 @@ private:
      * Will only be written to if this method returns true.
      * Will be set to true if the scanned scalar was quoted, by '', "", > or |.
      */
-    bool    _scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted);
+    bool    _scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted);
+    bool    _scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted);
+    bool    _scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted);
+    bool    _scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted);
+    bool    _scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted);
 
     csubstr _scan_comment();
     csubstr _scan_squot_scalar();
@@ -21908,9 +24210,9 @@ private:
     csubstr _consume_scalar();
     void  _move_scalar_from_top();
 
-    inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_val({str, size_t(0)}); }
-    inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_key_val({str, size_t(0)}); }
-    inline void      _store_scalar_null(const char *str) {  _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); _store_scalar({str, size_t(0)}, false); }
+    inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_val({nullptr, size_t(0)}); }
+    inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_key_val({nullptr, size_t(0)}); }
+    inline void      _store_scalar_null(const char *str) {  _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); _store_scalar({nullptr, size_t(0)}, false); }
 
     void  _set_indentation(size_t behind);
     void  _save_indentation(size_t behind=0);
@@ -22068,11 +24370,13 @@ private:
     void _grow_filter_arena(size_t num_characters);
     substr _finish_filter_arena(substr dst, size_t pos);
 
-    void _prepare_locations() const;         // only changes mutable members
-    void _resize_locations(size_t sz) const; // only changes mutable members
-    void _mark_locations_dirty();
+    void _prepare_locations();
+    void _resize_locations(size_t sz);
     bool _locations_dirty() const;
 
+    bool _location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const;
+    bool _location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const;
+
 private:
 
     void _free();
@@ -22089,6 +24393,8 @@ private:
 
 private:
 
+    ParserOptions m_options;
+
     csubstr m_file;
      substr m_buf;
 
@@ -22113,10 +24419,10 @@ private:
 
     substr m_filter_arena;
 
-    mutable size_t *m_newline_offsets;
-    mutable size_t  m_newline_offsets_size;
-    mutable size_t  m_newline_offsets_capacity;
-    mutable csubstr m_newline_offsets_buf;
+    size_t *m_newline_offsets;
+    size_t  m_newline_offsets_size;
+    size_t  m_newline_offsets_capacity;
+    csubstr m_newline_offsets_buf;
 };
 
 
@@ -22258,7 +24564,7 @@ template<class K, class V, class Less, class Alloc>
 void write(c4::yml::NodeRef *n, std::map<K, V, Less, Alloc> const& m)
 {
     *n |= c4::yml::MAP;
-    for(auto const& p : m)
+    for(auto const& C4_RESTRICT p : m)
     {
         auto ch = n->append_child();
         ch << c4::yml::key(p.first);
@@ -22267,11 +24573,11 @@ void write(c4::yml::NodeRef *n, std::map<K, V, Less, Alloc> const& m)
 }
 
 template<class K, class V, class Less, class Alloc>
-bool read(c4::yml::NodeRef const& n, std::map<K, V, Less, Alloc> * m)
+bool read(c4::yml::ConstNodeRef const& n, std::map<K, V, Less, Alloc> * m)
 {
     K k{};
-    V v;
-    for(auto const ch : n)
+    V v{};
+    for(auto const& C4_RESTRICT ch : n)
     {
         ch >> c4::yml::key(k);
         ch >> v;
@@ -22352,24 +24658,37 @@ namespace yml {
 // in the data tree hierarchy (a SEQ node in ryml parlance).
 // So it should be serialized via write()/read().
 
+
 template<class V, class Alloc>
 void write(c4::yml::NodeRef *n, std::vector<V, Alloc> const& vec)
 {
     *n |= c4::yml::SEQ;
     for(auto const& v : vec)
-    {
         n->append_child() << v;
-    }
 }
 
 template<class V, class Alloc>
-bool read(c4::yml::NodeRef const& n, std::vector<V, Alloc> *vec)
+bool read(c4::yml::ConstNodeRef const& n, std::vector<V, Alloc> *vec)
 {
     vec->resize(n.num_children());
     size_t pos = 0;
     for(auto const ch : n)
-    {
         ch >> (*vec)[pos++];
+    return true;
+}
+
+/** specialization: std::vector<bool> uses std::vector<bool>::reference as
+ * the return value of its operator[]. */
+template<class Alloc>
+bool read(c4::yml::ConstNodeRef const& n, std::vector<bool, Alloc> *vec)
+{
+    vec->resize(n.num_children());
+    size_t pos = 0;
+    bool tmp;
+    for(auto const ch : n)
+    {
+        ch >> tmp;
+        (*vec)[pos++] = tmp;
     }
     return true;
 }
@@ -22450,7 +24769,7 @@ namespace c4 {
 namespace yml {
 
 namespace {
-thread_local Callbacks s_default_callbacks;
+Callbacks s_default_callbacks;
 } // anon namespace
 
 #ifndef RYML_NO_DEFAULT_CALLBACKS
@@ -22838,9 +25157,18 @@ NodeRef Tree::rootref()
 {
     return NodeRef(this, root_id());
 }
-NodeRef const Tree::rootref() const
+ConstNodeRef Tree::rootref() const
 {
-    return NodeRef(const_cast<Tree*>(this), root_id());
+    return ConstNodeRef(this, root_id());
+}
+
+ConstNodeRef Tree::crootref()
+{
+    return ConstNodeRef(this, root_id());
+}
+ConstNodeRef Tree::crootref() const
+{
+    return ConstNodeRef(this, root_id());
 }
 
 NodeRef Tree::ref(size_t id)
@@ -22848,17 +25176,28 @@ NodeRef Tree::ref(size_t id)
     _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size);
     return NodeRef(this, id);
 }
-NodeRef const Tree::ref(size_t id) const
+ConstNodeRef Tree::ref(size_t id) const
 {
     _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size);
-    return NodeRef(const_cast<Tree*>(this), id);
+    return ConstNodeRef(this, id);
+}
+
+ConstNodeRef Tree::cref(size_t id)
+{
+    _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size);
+    return ConstNodeRef(this, id);
+}
+ConstNodeRef Tree::cref(size_t id) const
+{
+    _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size);
+    return ConstNodeRef(this, id);
 }
 
 NodeRef Tree::operator[] (csubstr key)
 {
     return rootref()[key];
 }
-NodeRef const Tree::operator[] (csubstr key) const
+ConstNodeRef Tree::operator[] (csubstr key) const
 {
     return rootref()[key];
 }
@@ -22867,7 +25206,7 @@ NodeRef Tree::operator[] (size_t i)
 {
     return rootref()[i];
 }
-NodeRef const Tree::operator[] (size_t i) const
+ConstNodeRef Tree::operator[] (size_t i) const
 {
     return rootref()[i];
 }
@@ -22876,9 +25215,9 @@ NodeRef Tree::docref(size_t i)
 {
     return ref(doc(i));
 }
-NodeRef const Tree::docref(size_t i) const
+ConstNodeRef Tree::docref(size_t i) const
 {
-    return ref(doc(i));
+    return cref(doc(i));
 }
 
 
@@ -23563,8 +25902,9 @@ void Tree::_swap_props(size_t n_, size_t m_)
 void Tree::move(size_t node, size_t after)
 {
     _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, node != after);
     _RYML_CB_ASSERT(m_callbacks,  ! is_root(node));
-    _RYML_CB_ASSERT(m_callbacks, has_sibling(node, after) && has_sibling(after, node));
+    _RYML_CB_ASSERT(m_callbacks, (after == NONE) || (has_sibling(node, after) && has_sibling(after, node)));
 
     _rem_hierarchy(node);
     _set_hierarchy(node, parent(node), after);
@@ -23575,7 +25915,10 @@ void Tree::move(size_t node, size_t after)
 void Tree::move(size_t node, size_t new_parent, size_t after)
 {
     _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, node != after);
     _RYML_CB_ASSERT(m_callbacks, new_parent != NONE);
+    _RYML_CB_ASSERT(m_callbacks, new_parent != node);
+    _RYML_CB_ASSERT(m_callbacks, new_parent != after);
     _RYML_CB_ASSERT(m_callbacks,  ! is_root(node));
 
     _rem_hierarchy(node);
@@ -23584,8 +25927,10 @@ void Tree::move(size_t node, size_t new_parent, size_t after)
 
 size_t Tree::move(Tree *src, size_t node, size_t new_parent, size_t after)
 {
+    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
     _RYML_CB_ASSERT(m_callbacks, node != NONE);
     _RYML_CB_ASSERT(m_callbacks, new_parent != NONE);
+    _RYML_CB_ASSERT(m_callbacks, new_parent != after);
 
     size_t dup = duplicate(src, node, new_parent, after);
     src->remove(node);
@@ -23786,15 +26131,17 @@ size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t pare
                     remove(rep);
                     prev = duplicate(src, i, parent, prev);
                 }
-                else if(after_pos == NONE || rep_pos >= after_pos)
+                else if(prev == NONE)
+                {
+                    // first iteration with prev = after = NONE and repetition
+                    prev = rep;
+                }
+                else if(rep != prev)
                 {
                     // rep is located after the node which will be inserted
                     // and overrides it. So move the rep into this node's place.
-                    if(rep != prev)
-                    {
-                        move(rep, prev);
-                        prev = rep;
-                    }
+                    move(rep, prev);
+                    prev = rep;
                 }
             } // there's a repetition
         }
@@ -24135,9 +26482,7 @@ size_t Tree::num_children(size_t node) const
 {
     size_t count = 0;
     for(size_t i = first_child(node); i != NONE; i = next_sibling(i))
-    {
         ++count;
-    }
     return count;
 }
 
@@ -24433,7 +26778,7 @@ void Tree::resolve_tags()
         return;
     size_t needed_size = _count_resolved_tags_size(this, root_id());
     if(needed_size)
-        reserve_arena(arena_pos() + needed_size);
+        reserve_arena(arena_size() + needed_size);
     _resolve_tags(this, root_id());
 }
 
@@ -24875,7 +27220,7 @@ void _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args)
 
 bool _is_scalar_next__runk(csubstr s)
 {
-    return !(s.begins_with(": ") || s.begins_with_any("#,:{}[]%&") || s.begins_with("? ") || s == "-" || s.begins_with("- "));
+    return !(s.begins_with(": ") || s.begins_with_any("#,{}[]%&") || s.begins_with("? ") || s == "-" || s.begins_with("- ") || s.begins_with(":\"") || s.begins_with(":'"));
 }
 
 bool _is_scalar_next__rseq_rval(csubstr s)
@@ -24948,8 +27293,9 @@ Parser::~Parser()
     _clr();
 }
 
-Parser::Parser(Callbacks const& cb)
-    : m_file()
+Parser::Parser(Callbacks const& cb, ParserOptions opts)
+    : m_options(opts)
+    , m_file()
     , m_buf()
     , m_root_id(NONE)
     , m_tree()
@@ -24977,7 +27323,8 @@ Parser::Parser(Callbacks const& cb)
 }
 
 Parser::Parser(Parser &&that)
-    : m_file(that.m_file)
+    : m_options(that.m_options)
+    , m_file(that.m_file)
     , m_buf(that.m_buf)
     , m_root_id(that.m_root_id)
     , m_tree(that.m_tree)
@@ -25004,7 +27351,8 @@ Parser::Parser(Parser &&that)
 }
 
 Parser::Parser(Parser const& that)
-    : m_file(that.m_file)
+    : m_options(that.m_options)
+    , m_file(that.m_file)
     , m_buf(that.m_buf)
     , m_root_id(that.m_root_id)
     , m_tree(that.m_tree)
@@ -25043,6 +27391,7 @@ Parser::Parser(Parser const& that)
 Parser& Parser::operator=(Parser &&that)
 {
     _free();
+    m_options = (that.m_options);
     m_file = (that.m_file);
     m_buf = (that.m_buf);
     m_root_id = (that.m_root_id);
@@ -25072,6 +27421,7 @@ Parser& Parser::operator=(Parser &&that)
 Parser& Parser::operator=(Parser const& that)
 {
     _free();
+    m_options = (that.m_options);
     m_file = (that.m_file);
     m_buf = (that.m_buf);
     m_root_id = (that.m_root_id);
@@ -25103,6 +27453,7 @@ Parser& Parser::operator=(Parser const& that)
 
 void Parser::_clr()
 {
+    m_options = {};
     m_file = {};
     m_buf = {};
     m_root_id = {};
@@ -25167,7 +27518,10 @@ void Parser::_reset()
     m_val_anchor_indentation = 0;
     m_val_anchor.clear();
 
-    _mark_locations_dirty();
+    if(m_options.locations())
+    {
+        _prepare_locations();
+    }
 }
 
 //-----------------------------------------------------------------------------
@@ -25473,7 +27827,7 @@ bool Parser::_handle_unk()
 
         csubstr saved_scalar;
         bool is_quoted;
-        if(_scan_scalar(&saved_scalar, &is_quoted))
+        if(_scan_scalar_unk(&saved_scalar, &is_quoted))
         {
             rem = m_state->line_contents.rem;
             _c4dbgpf("... and there's also a scalar next! '{}'", saved_scalar);
@@ -25590,7 +27944,7 @@ bool Parser::_handle_unk()
         csubstr scalar;
         size_t indentation = m_state->line_contents.indentation; // save
         bool is_quoted;
-        if(_scan_scalar(&scalar, &is_quoted))
+        if(_scan_scalar_unk(&scalar, &is_quoted))
         {
             _c4dbgpf("got a {} scalar", is_quoted ? "quoted" : "");
             rem = m_state->line_contents.rem;
@@ -25714,7 +28068,7 @@ bool Parser::_handle_seq_flow()
     {
         _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));
         bool is_quoted;
-        if(_scan_scalar(&rem, &is_quoted))
+        if(_scan_scalar_seq_flow(&rem, &is_quoted))
         {
             _c4dbgp("it's a scalar");
             addrem_flags(RNXT, RVAL);
@@ -25858,7 +28212,6 @@ bool Parser::_handle_seq_blck()
         rem = _scan_comment();
         return true;
     }
-
     if(has_any(RNXT))
     {
         _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
@@ -25912,7 +28265,7 @@ bool Parser::_handle_seq_blck()
 
         csubstr s;
         bool is_quoted;
-        if(_scan_scalar(&s, &is_quoted)) // this also progresses the line
+        if(_scan_scalar_seq_blck(&s, &is_quoted)) // this also progresses the line
         {
             _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");
 
@@ -26171,7 +28524,7 @@ bool Parser::_handle_map_flow()
         _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
 
         bool is_quoted;
-        if(has_none(SSCL) && _scan_scalar(&rem, &is_quoted))
+        if(has_none(SSCL) && _scan_scalar_map_flow(&rem, &is_quoted))
         {
             _c4dbgp("it's a scalar");
             _store_scalar(rem, is_quoted);
@@ -26291,7 +28644,7 @@ bool Parser::_handle_map_flow()
         _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
         _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL));
         bool is_quoted;
-        if(_scan_scalar(&rem, &is_quoted))
+        if(_scan_scalar_map_flow(&rem, &is_quoted))
         {
             _c4dbgp("it's a scalar");
             addrem_flags(RNXT, RVAL|RKEY);
@@ -26375,7 +28728,7 @@ bool Parser::_handle_map_flow()
 //-----------------------------------------------------------------------------
 bool Parser::_handle_map_blck()
 {
-    _c4dbgpf("handle_map_impl: node_id={}  level={}", m_state->node_id, m_state->level);
+    _c4dbgpf("handle_map_blck: node_id={}  level={}", m_state->node_id, m_state->level);
     csubstr rem = m_state->line_contents.rem;
 
     _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP));
@@ -26397,16 +28750,19 @@ bool Parser::_handle_map_blck()
     }
 
     if(_handle_indentation())
+    {
+        _c4dbgp("indentation token");
         return true;
+    }
 
     if(has_any(RKEY))
     {
         _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));
         _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
 
-        _c4dbgp("read scalar?");
+        _c4dbgp("RMAP|RKEY read scalar?");
         bool is_quoted;
-        if(_scan_scalar(&rem, &is_quoted)) // this also progresses the line
+        if(_scan_scalar_map_blck(&rem, &is_quoted)) // this also progresses the line
         {
             _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");
             if(has_all(QMRK|SSCL))
@@ -26515,9 +28871,10 @@ bool Parser::_handle_map_blck()
         _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));
         _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
 
+        _c4dbgp("RMAP|RVAL read scalar?");
         csubstr s;
         bool is_quoted;
-        if(_scan_scalar(&s, &is_quoted)) // this also progresses the line
+        if(_scan_scalar_map_blck(&s, &is_quoted)) // this also progresses the line
         {
             _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");
 
@@ -26625,6 +28982,13 @@ bool Parser::_handle_map_blck()
             _start_new_doc(rem);
             return true;
         }
+        else if(rem.begins_with("..."))
+        {
+            _c4dbgp("end current document");
+            _end_stream();
+            _line_progressed(3);
+            return true;
+        }
         else
         {
             _c4err("parse error");
@@ -27098,9 +29462,16 @@ csubstr Parser::_slurp_doc_scalar()
     return s;
 }
 
+
 //-----------------------------------------------------------------------------
-bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
+
+bool Parser::_scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
 {
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(RSEQ));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(RVAL));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  ! has_any(RKEY));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  ! has_any(FLOW));
+
     csubstr s = m_state->line_contents.rem;
     if(s.len == 0)
         return false;
@@ -27127,136 +29498,339 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
     else if(s.begins_with('|') || s.begins_with('>'))
     {
         *scalar = _scan_block();
-        *quoted = false;
+        *quoted = true;
         return true;
     }
     else if(has_any(RTOP) && _is_doc_sep(s))
     {
         return false;
     }
-    else if(has_any(RSEQ))
+
+    _c4dbgp("RSEQ|RVAL");
+    if( ! _is_scalar_next__rseq_rval(s))
+        return false;
+    _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))
+        return false;
+    )
+
+    if(s.ends_with(':'))
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks,  ! has_all(RKEY));
-        if(has_all(RVAL))
+        --s.len;
+    }
+    else
+    {
+        auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #");
+        if(first)
+            s.len = first.pos;
+    }
+    s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
+
+    if(s.empty())
+        return false;
+
+    m_state->scalar_col = m_state->line_contents.current_col(s);
+    _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);
+    _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);
+
+    if(_at_line_end() && s != '~')
+    {
+        _c4dbgpf("at line end. curr='{}'", s);
+        s = _extend_scanned_scalar(s);
+    }
+
+    _c4dbgpf("scalar was '{}'", s);
+
+    *scalar = s;
+    *quoted = false;
+    return true;
+}
+
+bool Parser::_scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
+{
+    _c4dbgp("_scan_scalar_map_blck");
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(RMAP));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  ! has_any(FLOW));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(RKEY|RVAL));
+
+    csubstr s = m_state->line_contents.rem;
+    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED
+    if(s.len == 0)
+        return false;
+    #endif
+    s = s.trim(" \t");
+    if(s.len == 0)
+        return false;
+
+    if(s.begins_with('\''))
+    {
+        _c4dbgp("got a ': scanning single-quoted scalar");
+        m_state->scalar_col = m_state->line_contents.current_col(s);
+        *scalar = _scan_squot_scalar();
+        *quoted = true;
+        return true;
+    }
+    else if(s.begins_with('"'))
+    {
+        _c4dbgp("got a \": scanning double-quoted scalar");
+        m_state->scalar_col = m_state->line_contents.current_col(s);
+        *scalar = _scan_dquot_scalar();
+        *quoted = true;
+        return true;
+    }
+    else if(s.begins_with('|') || s.begins_with('>'))
+    {
+        *scalar = _scan_block();
+        *quoted = true;
+        return true;
+    }
+    else if(has_any(RTOP) && _is_doc_sep(s))
+    {
+        return false;
+    }
+
+    if( ! _is_scalar_next__rmap(s))
+        return false;
+
+    size_t colon_token = s.find(": ");
+    if(colon_token == npos)
+    {
+        _RYML_WITH_OR_WITHOUT_TAB_TOKENS(
+            // with tab tokens
+            colon_token = s.find(":\t");
+            if(colon_token == npos)
+            {
+                _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);
+                colon_token = s.find(':');
+                if(colon_token != s.len-1)
+                    colon_token = npos;
+            }
+            ,
+            // without tab tokens
+            colon_token = s.find(':');
+            _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);
+            if(colon_token != s.len-1)
+                colon_token = npos;
+        )
+    }
+
+    if(has_all(RKEY))
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' '));
+        if(has_any(QMRK))
         {
-            _c4dbgp("RSEQ|RVAL");
-            if( ! _is_scalar_next__rseq_rval(s))
+            _c4dbgp("RMAP|RKEY|CPLX");
+            _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP));
+            if(s.begins_with("? ") || s == '?')
                 return false;
-            _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))
+            s = s.left_of(colon_token);
+            s = s.left_of(s.first_of("#"));
+            s = s.trimr(" \t");
+            if(s.begins_with("---"))
                 return false;
-            )
-            if(s.ends_with(':'))
-            {
-                --s.len;
-            }
-            else
+            else if(s.begins_with("..."))
+                return false;
+        }
+        else
+        {
+            _c4dbgp("RMAP|RKEY");
+            _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{'));
+            if(s.begins_with("? ") || s == '?')
+                return false;
+            s = s.left_of(colon_token);
+            s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
+            if(s.begins_with("---"))
             {
-                auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #");
-                if(first)
-                    s.len = first.pos;
+                return false;
             }
-            if(has_all(FLOW))
+            else if(s.begins_with("..."))
             {
-                _c4dbgp("RSEQ|RVAL|EXPL");
-                s = s.left_of(s.first_of(",]"));
+                return false;
             }
-            s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
+        }
+    }
+    else if(has_all(RVAL))
+    {
+        _c4dbgp("RMAP|RVAL");
+        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK));
+        if( ! _is_scalar_next__rmap_val(s))
+            return false;
+        _RYML_WITH_TAB_TOKENS(
+        else if(s.begins_with("-\t"))
+            return false;
+        )
+        _c4dbgp("RMAP|RVAL: scalar");
+        s = s.left_of(s.find(" #")); // is there a comment?
+        s = s.left_of(s.find("\t#")); // is there a comment?
+        s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
+        if(s.begins_with("---"))
+            return false;
+        #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED
+        else if(s.begins_with("..."))
+            return false;
+        #endif
+    }
+
+    if(s.empty())
+        return false;
+
+    m_state->scalar_col = m_state->line_contents.current_col(s);
+    _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);
+    _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);
+
+    if(_at_line_end() && s != '~')
+    {
+        _c4dbgpf("at line end. curr='{}'", s);
+        s = _extend_scanned_scalar(s);
+    }
+
+    _c4dbgpf("scalar was '{}'", s);
+
+    *scalar = s;
+    *quoted = false;
+    return true;
+}
+
+bool Parser::_scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
+{
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(RSEQ));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(FLOW));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(RVAL));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  ! has_any(RKEY));
+
+    csubstr s = m_state->line_contents.rem;
+    if(s.len == 0)
+        return false;
+    s = s.trim(" \t");
+    if(s.len == 0)
+        return false;
+
+    if(s.begins_with('\''))
+    {
+        _c4dbgp("got a ': scanning single-quoted scalar");
+        m_state->scalar_col = m_state->line_contents.current_col(s);
+        *scalar = _scan_squot_scalar();
+        *quoted = true;
+        return true;
+    }
+    else if(s.begins_with('"'))
+    {
+        _c4dbgp("got a \": scanning double-quoted scalar");
+        m_state->scalar_col = m_state->line_contents.current_col(s);
+        *scalar = _scan_dquot_scalar();
+        *quoted = true;
+        return true;
+    }
+
+    if(has_all(RVAL))
+    {
+        _c4dbgp("RSEQ|RVAL");
+        if( ! _is_scalar_next__rseq_rval(s))
+            return false;
+        _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))
+            return false;
+        )
+        _c4dbgp("RSEQ|RVAL|FLOW");
+        s = s.left_of(s.first_of(",]"));
+        if(s.ends_with(':'))
+        {
+            --s.len;
         }
         else
         {
-            _c4err("internal error");
+            auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #");
+            if(first)
+                s.len = first.pos;
         }
+        s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
     }
-    else if(has_any(RMAP))
+
+    if(s.empty())
+        return false;
+
+    m_state->scalar_col = m_state->line_contents.current_col(s);
+    _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);
+    _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);
+
+    if(_at_line_end() && s != '~')
     {
-        if( ! _is_scalar_next__rmap(s))
-            return false;
-        size_t colon_space = s.find(": ");
-        if(colon_space == npos)
+        _c4dbgpf("at line end. curr='{}'", s);
+        s = _extend_scanned_scalar(s);
+    }
+
+    _c4dbgpf("scalar was '{}'", s);
+
+    *scalar = s;
+    *quoted = false;
+    return true;
+}
+
+bool Parser::_scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
+{
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(RMAP));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(FLOW));
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(RKEY|RVAL));
+
+    csubstr s = m_state->line_contents.rem;
+    if(s.len == 0)
+        return false;
+    s = s.trim(" \t");
+    if(s.len == 0)
+        return false;
+
+    if(s.begins_with('\''))
+    {
+        _c4dbgp("got a ': scanning single-quoted scalar");
+        m_state->scalar_col = m_state->line_contents.current_col(s);
+        *scalar = _scan_squot_scalar();
+        *quoted = true;
+        return true;
+    }
+    else if(s.begins_with('"'))
+    {
+        _c4dbgp("got a \": scanning double-quoted scalar");
+        m_state->scalar_col = m_state->line_contents.current_col(s);
+        *scalar = _scan_dquot_scalar();
+        *quoted = true;
+        return true;
+    }
+
+    if( ! _is_scalar_next__rmap(s))
+        return false;
+
+    if(has_all(RKEY))
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' '));
+        size_t colon_token = s.find(": ");
+        if(colon_token == npos)
         {
             _RYML_WITH_OR_WITHOUT_TAB_TOKENS(
                 // with tab tokens
-                colon_space = s.find(":\t");
-                if(colon_space == npos)
+                colon_token = s.find(":\t");
+                if(colon_token == npos)
                 {
                     _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);
-                    colon_space = s.find(':');
-                    if(colon_space != s.len-1)
-                        colon_space = npos;
+                    colon_token = s.find(':');
+                    if(colon_token != s.len-1)
+                        colon_token = npos;
                 }
                 ,
                 // without tab tokens
-                colon_space = s.find(':');
+                colon_token = s.find(':');
                 _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);
-                if(colon_space != s.len-1)
-                    colon_space = npos;
+                if(colon_token != s.len-1)
+                    colon_token = npos;
             )
         }
-
-        if(has_all(RKEY))
-        {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' '));
-            if(has_any(QMRK))
-            {
-                _c4dbgp("RMAP|RKEY|CPLX");
-                _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP));
-                if(s.begins_with("? ") || s == '?')
-                    return false;
-                s = s.left_of(colon_space);
-                s = s.left_of(s.first_of("#"));
-                if(has_any(FLOW))
-                    s = s.left_of(s.first_of(':'));
-                s = s.trimr(" \t");
-                if(s.begins_with("---"))
-                    return false;
-                else if(s.begins_with("..."))
-                    return false;
-            }
-            else
-            {
-                _c4dbgp("RMAP|RKEY");
-                _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{'));
-                if(s.begins_with("? ") || s == '?')
-                    return false;
-                s = s.left_of(colon_space);
-                s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
-                if(has_any(FLOW))
-                {
-                    _c4dbgpf("RMAP|RKEY|EXPL: '{}'", s);
-                    s = s.left_of(s.first_of(",}"));
-                    if(s.ends_with(':'))
-                        s = s.offs(0, 1);
-                }
-                else if(s.begins_with("---"))
-                {
-                    return false;
-                }
-                else if(s.begins_with("..."))
-                {
-                    return false;
-                }
-            }
-        }
-        else if(has_all(RVAL))
-        {
-            _c4dbgp("RMAP|RVAL");
-            _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK));
-            if( ! _is_scalar_next__rmap_val(s))
-                return false;
-            _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))
-                return false;
-            )
-            s = s.left_of(s.find(" #")); // is there a comment?
-            s = s.left_of(s.find("\t#")); // is there a comment?
-            if(has_any(FLOW))
-            {
-                _c4dbgp("RMAP|RVAL|EXPL");
-                if(has_none(RSEQIMAP))
-                    s = s.left_of(s.first_of(",}"));
-                else
-                    s = s.left_of(s.first_of(",]"));
-            }
-            s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
+        if(s.begins_with("? ") || s == '?')
+            return false;
+        if(has_any(QMRK))
+        {
+            _c4dbgp("RMAP|RKEY|CPLX");
+            _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP));
+            s = s.left_of(colon_token);
+            s = s.left_of(s.first_of("#"));
+            s = s.left_of(s.first_of(':'));
+            s = s.trimr(" \t");
             if(s.begins_with("---"))
                 return false;
             else if(s.begins_with("..."))
@@ -27264,37 +29838,116 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
         }
         else
         {
-            _c4err("parse error");
+            _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{'));
+            _c4dbgp("RMAP|RKEY");
+            s = s.left_of(colon_token);
+            s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
+            _c4dbgpf("RMAP|RKEY|FLOW: '{}'", s);
+            s = s.left_of(s.first_of(",}"));
+            if(s.ends_with(':'))
+                --s.len;
         }
     }
-    else if(has_all(RUNK))
+    else if(has_all(RVAL))
     {
-        _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s);
-        if( ! _is_scalar_next__runk(s))
-        {
-            _c4dbgp("RUNK: no scalar next");
+        _c4dbgp("RMAP|RVAL");
+        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK));
+        if( ! _is_scalar_next__rmap_val(s))
+            return false;
+        _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))
             return false;
-        }
-        s = s.left_of(s.find(" #"));
-        size_t pos = s.find(": ");
-        if(pos != npos)
-            s = s.left_of(pos);
-        else if(s.ends_with(':'))
-            s = s.left_of(s.len-1);
-        _RYML_WITH_TAB_TOKENS(
-        else if((pos = s.find(":\t")) != npos) // TABS
-            s = s.left_of(pos);
         )
+        _c4dbgp("RMAP|RVAL|FLOW");
+        if(has_none(RSEQIMAP))
+            s = s.left_of(s.first_of(",}"));
         else
-            s = s.left_of(s.first_of(','));
-        s = s.trim(" \t");
-        _c4dbgpf("RUNK: scalar='{}'", s);
+            s = s.left_of(s.first_of(",]"));
+        s = s.left_of(s.find(" #")); // is there a comment?
+        s = s.left_of(s.find("\t#")); // is there a comment?
+        s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
     }
-    else
+
+    if(s.empty())
+        return false;
+
+    m_state->scalar_col = m_state->line_contents.current_col(s);
+    _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);
+    _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);
+
+    if(_at_line_end() && s != '~')
+    {
+        _c4dbgpf("at line end. curr='{}'", s);
+        s = _extend_scanned_scalar(s);
+    }
+
+    _c4dbgpf("scalar was '{}'", s);
+
+    *scalar = s;
+    *quoted = false;
+    return true;
+}
+
+bool Parser::_scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
+{
+    _RYML_CB_ASSERT(m_stack.m_callbacks,  has_any(RUNK));
+
+    csubstr s = m_state->line_contents.rem;
+    if(s.len == 0)
+        return false;
+    s = s.trim(" \t");
+    if(s.len == 0)
+        return false;
+
+    if(s.begins_with('\''))
+    {
+        _c4dbgp("got a ': scanning single-quoted scalar");
+        m_state->scalar_col = m_state->line_contents.current_col(s);
+        *scalar = _scan_squot_scalar();
+        *quoted = true;
+        return true;
+    }
+    else if(s.begins_with('"'))
+    {
+        _c4dbgp("got a \": scanning double-quoted scalar");
+        m_state->scalar_col = m_state->line_contents.current_col(s);
+        *scalar = _scan_dquot_scalar();
+        *quoted = true;
+        return true;
+    }
+    else if(s.begins_with('|') || s.begins_with('>'))
+    {
+        *scalar = _scan_block();
+        *quoted = true;
+        return true;
+    }
+    else if(has_any(RTOP) && _is_doc_sep(s))
     {
-        _c4err("not implemented");
+        return false;
     }
 
+    _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s);
+    if( ! _is_scalar_next__runk(s))
+    {
+        _c4dbgp("RUNK: no scalar next");
+        return false;
+    }
+    size_t pos = s.find(" #");
+    if(pos != npos)
+        s = s.left_of(pos);
+    pos = s.find(": ");
+    if(pos != npos)
+        s = s.left_of(pos);
+    else if(s.ends_with(':'))
+        s = s.left_of(s.len-1);
+    _RYML_WITH_TAB_TOKENS(
+    else if((pos = s.find(":\t")) != npos) // TABS
+        s = s.left_of(pos);
+    )
+    else
+        s = s.left_of(s.first_of(','));
+    s = s.trim(" \t");
+    _c4dbgpf("RUNK: scalar='{}'", s);
+
     if(s.empty())
         return false;
 
@@ -27315,6 +29968,7 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
     return true;
 }
 
+
 //-----------------------------------------------------------------------------
 
 csubstr Parser::_extend_scanned_scalar(csubstr s)
@@ -27381,7 +30035,7 @@ substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line)
             csubstr tpkl = peeked_line.triml(' ').trimr("\r\n");
             if(tpkl.begins_with(": ") || tpkl == ':')
             {
-                _c4dbgpf("rscalar[EXPL]: map value starts on the peeked line: '{}'", peeked_line);
+                _c4dbgpf("rscalar[FLOW]: map value starts on the peeked line: '{}'", peeked_line);
                 peeked_line = peeked_line.first(0);
                 break;
             }
@@ -27391,7 +30045,7 @@ substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line)
                 if(colon_pos && colon_pos.pos < pos)
                 {
                     peeked_line = peeked_line.first(colon_pos.pos);
-                    _c4dbgpf("rscalar[EXPL]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line);
+                    _c4dbgpf("rscalar[FLOW]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line);
                     _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin());
                     _line_progressed(static_cast<size_t>(peeked_line.end() - m_state->line_contents.rem.begin()));
                     break;
@@ -27400,13 +30054,13 @@ substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line)
         }
         if(pos != npos)
         {
-            _c4dbgpf("rscalar[EXPL]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n"));
+            _c4dbgpf("rscalar[FLOW]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n"));
             peeked_line = peeked_line.left_of(pos);
             _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin());
             _line_progressed(static_cast<size_t>(peeked_line.end() - m_state->line_contents.rem.begin()));
             break;
         }
-        _c4dbgpf("rscalar[EXPL]: append another line, full: '{}'", peeked_line.trimr("\r\n"));
+        _c4dbgpf("rscalar[FLOW]: append another line, full: '{}'", peeked_line.trimr("\r\n"));
         if(!first)
         {
             RYML_CHECK(_advance_to_peeked());
@@ -27728,12 +30382,17 @@ void Parser::_line_ended_undo()
     _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == 1u);
     _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line > 0u);
     _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len);
-    _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - (m_state->line_contents.full.len - m_state->line_contents.stripped.len));
-    m_state->pos.offset -= m_state->line_contents.full.len - m_state->line_contents.stripped.len;
+    size_t delta = m_state->line_contents.full.len - m_state->line_contents.stripped.len;
+    _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - delta);
+    m_state->pos.offset -= delta;
     --m_state->pos.line;
     m_state->pos.col = m_state->line_contents.stripped.len + 1u;
+    // don't forget to undo also the changes to the remainder of the line
+    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_buf.len || m_buf[m_state->pos.offset] == '\n' || m_buf[m_state->pos.offset] == '\r');
+    m_state->line_contents.rem = m_buf.sub(m_state->pos.offset, 0);
 }
 
+
 //-----------------------------------------------------------------------------
 void Parser::_set_indentation(size_t indentation)
 {
@@ -28392,7 +31051,8 @@ void Parser::_move_scalar_from_top()
 }
 
 //-----------------------------------------------------------------------------
-/** @todo this function is a monster and needs love. */
+/** @todo this function is a monster and needs love. Likely, it needs
+ * to be split like _scan_scalar_*() */
 bool Parser::_handle_indentation()
 {
     _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));
@@ -28413,38 +31073,40 @@ bool Parser::_handle_indentation()
     _c4dbgpf("indentation? ind={} indref={}", ind, m_state->indref);
     if(ind == m_state->indref)
     {
-        if(has_all(SSCL|RVAL) && ! rem.sub(ind).begins_with('-'))
+        _c4dbgpf("same indentation: {}", ind);
+        if(!rem.sub(ind).begins_with('-'))
         {
-            if(has_all(RMAP))
-            {
-                _append_key_val_null(rem.str + ind - 1);
-                addrem_flags(RKEY, RVAL);
-            }
-            #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-            else if(has_all(RSEQ))
+            _c4dbgp("does not begin with -");
+            if(has_any(RMAP))
             {
-                _append_val(_consume_scalar());
-                addrem_flags(RNXT, RVAL);
+                if(has_all(SSCL|RVAL))
+                {
+                    _c4dbgp("add with null val");
+                    _append_key_val_null(rem.str + ind - 1);
+                    addrem_flags(RKEY, RVAL);
+                }
             }
-            else
+            else if(has_any(RSEQ))
             {
-                _c4err("internal error");
-            }
-            #endif
-        }
-        else if(has_all(RSEQ|RNXT) && ! rem.sub(ind).begins_with('-'))
-        {
-            if(m_stack.size() > 2) // do not pop to root level
-            {
-                _c4dbgp("end the indentless seq");
-                _pop_level();
-                return true;
+                if(m_stack.size() > 2) // do not pop to root level
+                {
+                    if(has_any(RNXT))
+                    {
+                        _c4dbgp("end the indentless seq");
+                        _pop_level();
+                        return true;
+                    }
+                    else if(has_any(RVAL))
+                    {
+                        _c4dbgp("add with null val");
+                        _append_val_null(rem.str);
+                        _c4dbgp("end the indentless seq");
+                        _pop_level();
+                        return true;
+                    }
+                }
             }
         }
-        else
-        {
-            _c4dbgpf("same indentation ({}) -- nothing to see here", ind);
-        }
         _line_progressed(ind);
         return ind > 0;
     }
@@ -28632,10 +31294,9 @@ csubstr Parser::_scan_squot_scalar()
 
         // leading whitespace also needs filtering
         needs_filter = needs_filter
-            || numlines > 1
+            || (numlines > 1)
             || line_is_blank
-            || (_at_line_begin() && line.begins_with(' '))
-            || (m_state->line_contents.full.last_of('\r') != csubstr::npos);
+            || (_at_line_begin() && line.begins_with(' '));
 
         if(pos == npos)
         {
@@ -28734,10 +31395,9 @@ csubstr Parser::_scan_dquot_scalar()
 
         // leading whitespace also needs filtering
         needs_filter = needs_filter
-            || numlines > 1
+            || (numlines > 1)
             || line_is_blank
-            || (_at_line_begin() && line.begins_with(' '))
-            || (m_state->line_contents.full.last_of('\r') != csubstr::npos);
+            || (_at_line_begin() && line.begins_with(' '));
 
         if(pos == npos)
         {
@@ -28839,8 +31499,7 @@ csubstr Parser::_scan_block()
     _line_ended();
     _scan_line();
 
-    _c4dbgpf("scanning block: style={}  chomp={}  indentation={}", newline==BLOCK_FOLD ? "fold" : "literal",
-        chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
+    _c4dbgpf("scanning block: style={}  chomp={}  indentation={}", newline==BLOCK_FOLD ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
 
     // start with a zero-length block, already pointing at the right place
     substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0);
@@ -28887,15 +31546,17 @@ csubstr Parser::_scan_block()
                 _c4dbgpf("scanning block: line not empty. indref={} indprov={} indentation={}", m_state->indref, provisional_indentation, lc.indentation);
                 if(provisional_indentation == npos)
                 {
-                    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
                     if(lc.indentation < m_state->indref)
                     {
                         _c4dbgpf("scanning block: block terminated indentation={} < indref={}", lc.indentation, m_state->indref);
+                        if(raw_block.len == 0)
+                        {
+                            _c4dbgp("scanning block: was empty, undo next line");
+                            _line_ended_undo();
+                        }
                         break;
                     }
-                    else
-                    #endif
-                    if(lc.indentation == m_state->indref)
+                    else if(lc.indentation == m_state->indref)
                     {
                         if(has_any(RSEQ|RMAP))
                         {
@@ -28959,7 +31620,7 @@ csubstr Parser::_scan_block()
         _line_ended();
         ++num_lines;
     }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines));
+    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines) || (raw_block.len == 0));
     C4_UNUSED(num_lines);
     C4_UNUSED(first);
 
@@ -29485,7 +32146,7 @@ csubstr Parser::_filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e
 
     _c4dbgfbl(": indentation={} before=[{}]~~~{}~~~", indentation, s.len, s);
 
-    if(chomp != CHOMP_KEEP && s.trim(" \n\r\t").len == 0u)
+    if(chomp != CHOMP_KEEP && s.trim(" \n\r").len == 0u)
     {
         _c4dbgp("filt_block: empty scalar");
         return s.first(0);
@@ -29994,7 +32655,7 @@ csubstr Parser::location_contents(Location const& loc) const
     return m_buf.sub(loc.offset);
 }
 
-Location Parser::location(NodeRef node) const
+Location Parser::location(ConstNodeRef node) const
 {
     _RYML_CB_ASSERT(m_stack.m_callbacks, node.valid());
     return location(*node.tree(), node.id());
@@ -30002,90 +32663,158 @@ Location Parser::location(NodeRef node) const
 
 Location Parser::location(Tree const& tree, size_t node) const
 {
-    _RYML_CB_CHECK(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
-    _RYML_CB_CHECK(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
+    // try hard to avoid getting the location from a null string.
+    Location loc;
+    if(_location_from_node(tree, node, &loc, 0))
+        return loc;
+    return val_location(m_buf.str);
+}
+
+bool Parser::_location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const
+{
     if(tree.has_key(node))
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, tree.key(node).is_sub(m_buf));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(tree.key(node)));
-        return val_location(tree.key(node).str);
+        csubstr k = tree.key(node);
+        if(C4_LIKELY(k.str != nullptr))
+        {
+            _RYML_CB_ASSERT(m_stack.m_callbacks, k.is_sub(m_buf));
+            _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(k));
+            *loc = val_location(k.str);
+            return true;
+        }
+    }
+
+    if(tree.has_val(node))
+    {
+        csubstr v = tree.val(node);
+        if(C4_LIKELY(v.str != nullptr))
+        {
+            _RYML_CB_ASSERT(m_stack.m_callbacks, v.is_sub(m_buf));
+            _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(v));
+            *loc = val_location(v.str);
+            return true;
+        }
     }
-    else if(tree.has_val(node))
+
+    if(tree.is_container(node))
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, tree.val(node).is_sub(m_buf));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(tree.val(node)));
-        return val_location(tree.val(node).str);
+        if(_location_from_cont(tree, node, loc))
+            return true;
     }
-    else if(tree.is_container(node))
+
+    if(tree.type(node) != NOTYPE && level == 0)
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, !tree.has_key(node));
-        if(!tree.is_stream(node))
+        // try the prev sibling
         {
-            const char *node_start = tree._p(node)->m_val.scalar.str;  // this was stored in the container
-            if(tree.has_children(node))
+            const size_t prev = tree.prev_sibling(node);
+            if(prev != NONE)
             {
-                size_t child = tree.first_child(node);
-                if(tree.has_key(child))
-                {
-                    // when a map starts, the container was set after the key
-                    csubstr k = tree.key(child);
-                    if(node_start > k.str)
-                        node_start = k.str;
-                }
+                if(_location_from_node(tree, prev, loc, level+1))
+                    return true;
             }
-            return val_location(node_start);
         }
-        else // it's a stream
+        // try the next sibling
         {
-            return val_location(m_buf.str); // just return the front of the buffer
+            const size_t next = tree.next_sibling(node);
+            if(next != NONE)
+            {
+                if(_location_from_node(tree, next, loc, level+1))
+                    return true;
+            }
+        }
+        // try the parent
+        {
+            const size_t parent = tree.parent(node);
+            if(parent != NONE)
+            {
+                if(_location_from_node(tree, parent, loc, level+1))
+                    return true;
+            }
         }
     }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, tree.type(node) == NOTYPE);
-    return val_location(m_buf.str);
+
+    return false;
 }
 
+bool Parser::_location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const
+{
+    _RYML_CB_ASSERT(m_stack.m_callbacks, tree.is_container(node));
+    if(!tree.is_stream(node))
+    {
+        const char *node_start = tree._p(node)->m_val.scalar.str;  // this was stored in the container
+        if(tree.has_children(node))
+        {
+            size_t child = tree.first_child(node);
+            if(tree.has_key(child))
+            {
+                // when a map starts, the container was set after the key
+                csubstr k = tree.key(child);
+                if(k.str && node_start > k.str)
+                    node_start = k.str;
+            }
+        }
+        *loc = val_location(node_start);
+        return true;
+    }
+    else // it's a stream
+    {
+        *loc = val_location(m_buf.str); // just return the front of the buffer
+    }
+    return true;
+}
+
+
 Location Parser::val_location(const char *val) const
 {
-    if(_locations_dirty())
-        _prepare_locations();
-    csubstr src = m_buf;
-    _RYML_CB_CHECK(m_stack.m_callbacks, src.str == m_newline_offsets_buf.str);
-    _RYML_CB_CHECK(m_stack.m_callbacks, src.len == m_newline_offsets_buf.len);
-    _RYML_CB_CHECK(m_stack.m_callbacks, val >= src.begin() && val <= src.end());
+    if(C4_UNLIKELY(val == nullptr))
+        return {m_file, 0, 0, 0};
+
+    _RYML_CB_CHECK(m_stack.m_callbacks, m_options.locations());
+    // NOTE: if any of these checks fails, the parser needs to be
+    // instantiated with locations enabled.
+    _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
+    _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
+    _RYML_CB_ASSERT(m_stack.m_callbacks, m_options.locations());
+    _RYML_CB_ASSERT(m_stack.m_callbacks, !_locations_dirty());
     _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets != nullptr);
     _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size > 0);
-    using linetype = size_t const* C4_RESTRICT;
-    linetype line = nullptr;
+    // NOTE: the pointer needs to belong to the buffer that was used to parse.
+    csubstr src = m_buf;
+    _RYML_CB_CHECK(m_stack.m_callbacks, val != nullptr || src.str == nullptr);
+    _RYML_CB_CHECK(m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
+    // ok. search the first stored newline after the given ptr
+    using lineptr_type = size_t const* C4_RESTRICT;
+    lineptr_type lineptr = nullptr;
     size_t offset = (size_t)(val - src.begin());
-    if(m_newline_offsets_size < 30)
+    if(m_newline_offsets_size < 30) // TODO magic number
     {
-        // do a linear search if the size is small.
-        for(linetype curr = m_newline_offsets; curr < m_newline_offsets + m_newline_offsets_size; ++curr)
+        // just do a linear search if the size is small.
+        for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
         {
             if(*curr > offset)
             {
-                line = curr;
+                lineptr = curr;
                 break;
             }
         }
     }
     else
     {
-        // Do a bisection search if the size is not small.
+        // do a bisection search if the size is not small.
         //
         // We could use std::lower_bound but this is simple enough and
         // spares the include of <algorithm>.
         size_t count = m_newline_offsets_size;
         size_t step;
-        linetype it;
-        line = m_newline_offsets;
+        lineptr_type it;
+        lineptr = m_newline_offsets;
         while(count)
         {
             step = count >> 1;
-            it = line + step;
+            it = lineptr + step;
             if(*it < offset)
             {
-                line = ++it;
+                lineptr = ++it;
                 count -= step + 1;
             }
             else
@@ -30094,31 +32823,23 @@ Location Parser::val_location(const char *val) const
             }
         }
     }
-    if(line)
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, *line > offset);
-    }
-    else
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.empty());
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == 1);
-        line = m_newline_offsets;
-    }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, line >= m_newline_offsets && line < m_newline_offsets + m_newline_offsets_size);;
-    Location loc = {};
+    _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr >= m_newline_offsets);
+    _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
+    _RYML_CB_ASSERT(m_stack.m_callbacks, *lineptr > offset);
+    Location loc;
     loc.name = m_file;
     loc.offset = offset;
-    loc.line = (size_t)(line - m_newline_offsets);
-    if(line > m_newline_offsets)
-        loc.col = (offset - *(line-1) - 1u);
+    loc.line = (size_t)(lineptr - m_newline_offsets);
+    if(lineptr > m_newline_offsets)
+        loc.col = (offset - *(lineptr-1) - 1u);
     else
         loc.col = offset;
     return loc;
 }
 
-void Parser::_prepare_locations() const
+void Parser::_prepare_locations()
 {
-    _RYML_CB_ASSERT(m_stack.m_callbacks, !m_file.empty());
+    m_newline_offsets_buf = m_buf;
     size_t numnewlines = 1u + m_buf.count('\n');
     _resize_locations(numnewlines);
     m_newline_offsets_size = 0;
@@ -30129,7 +32850,7 @@ void Parser::_prepare_locations() const
     _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
 }
 
-void Parser::_resize_locations(size_t numnewlines) const
+void Parser::_resize_locations(size_t numnewlines)
 {
     if(numnewlines > m_newline_offsets_capacity)
     {
@@ -30140,12 +32861,6 @@ void Parser::_resize_locations(size_t numnewlines) const
     }
 }
 
-void Parser::_mark_locations_dirty()
-{
-    m_newline_offsets_size = 0u;
-    m_newline_offsets_buf = m_buf;
-}
-
 bool Parser::_locations_dirty() const
 {
     return !m_newline_offsets_size;
@@ -30189,6 +32904,13 @@ bool Parser::_locations_dirty() const
 namespace c4 {
 namespace yml {
 
+
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
 size_t NodeRef::set_key_serialized(c4::fmt::const_base64_wrapper w)
 {
     _apply_seed();
@@ -30205,22 +32927,6 @@ size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w)
     return encoded.len;
 }
 
-size_t NodeRef::deserialize_key(c4::fmt::base64_wrapper w) const
-{
-    RYML_ASSERT( ! is_seed());
-    RYML_ASSERT(valid());
-    RYML_ASSERT(get() != nullptr);
-    return from_chars(key(), &w);
-}
-
-size_t NodeRef::deserialize_val(c4::fmt::base64_wrapper w) const
-{
-    RYML_ASSERT( ! is_seed());
-    RYML_ASSERT(valid());
-    RYML_ASSERT(get() != nullptr);
-    return from_chars(val(), &w);
-}
-
 } // namespace yml
 } // namespace c4
 
@@ -30823,7 +33529,7 @@ inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bo
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-inline void print_node(NodeRef const& p, int level=0)
+inline void print_node(ConstNodeRef const& p, int level=0)
 {
     print_node(*p.tree(), p.id(), level, 0, true);
 }