4 files changed, 818 insertions, 0 deletions
diff --git a/src/third-party/scnlib/include/scn/unicode/common.h b/src/third-party/scnlib/include/scn/unicode/common.h
new file mode 100644
index 0000000..3807793
--- /dev/null
+++ b/src/third-party/scnlib/include/scn/unicode/common.h
@@ -0,0 +1,139 @@
+// Copyright 2017 Elias Kosunen
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is a part of scnlib:
+//     https://github.com/eliaskosunen/scnlib
+//
+// The contents of this file are based on utfcpp:
+//     https://github.com/nemtrif/utfcpp
+//     Copyright (c) 2006 Nemanja Trifunovic
+//     Distributed under the Boost Software License, version 1.0
+
+#ifndef SCN_UNICODE_COMMON_H
+#define SCN_UNICODE_COMMON_H
+
+#include "../detail/fwd.h"
+
+#include <cstdint>
+
+namespace scn {
+    SCN_BEGIN_NAMESPACE
+
+    /**
+     * A Unicode code point
+     */
+    enum class code_point : uint32_t {};
+
+    template <typename T>
+    constexpr bool operator==(code_point a, T b)
+    {
+        return static_cast<uint32_t>(a) == static_cast<uint32_t>(b);
+    }
+    template <typename T>
+    constexpr bool operator!=(code_point a, T b)
+    {
+        return static_cast<uint32_t>(a) != static_cast<uint32_t>(b);
+    }
+    template <typename T>
+    constexpr bool operator<(code_point a, T b)
+    {
+        return static_cast<uint32_t>(a) < static_cast<uint32_t>(b);
+    }
+    template <typename T>
+    constexpr bool operator>(code_point a, T b)
+    {
+        return static_cast<uint32_t>(a) > static_cast<uint32_t>(b);
+    }
+    template <typename T>
+    constexpr bool operator<=(code_point a, T b)
+    {
+        return static_cast<uint32_t>(a) <= static_cast<uint32_t>(b);
+    }
+    template <typename T>
+    constexpr bool operator>=(code_point a, T b)
+    {
+        return static_cast<uint32_t>(a) >= static_cast<uint32_t>(b);
+    }
+
+    namespace detail {
+        static constexpr const uint16_t lead_surrogate_min = 0xd800;
+        static constexpr const uint16_t lead_surrogate_max = 0xdbff;
+        static constexpr const uint16_t trail_surrogate_min = 0xdc00;
+        static constexpr const uint16_t trail_surrogate_max = 0xdfff;
+        static constexpr const uint16_t lead_offset =
+            lead_surrogate_min - (0x10000u >> 10);
+        static constexpr const uint32_t surrogate_offset =
+            0x10000u - (lead_surrogate_min << 10) - trail_surrogate_min;
+        static constexpr const uint32_t code_point_max = 0x10ffff;
+
+        template <typename Octet>
+        constexpr uint8_t mask8(Octet o)
+        {
+            return static_cast<uint8_t>(0xff & o);
+        }
+        template <typename U16>
+        constexpr uint16_t mask16(U16 v)
+        {
+            return static_cast<uint16_t>(0xffff & v);
+        }
+        template <typename U16>
+        constexpr bool is_lead_surrogate(U16 cp)
+        {
+            return cp >= lead_surrogate_min && cp <= lead_surrogate_max;
+        }
+        template <typename U16>
+        constexpr bool is_trail_surrogate(U16 cp)
+        {
+            return cp >= trail_surrogate_min && cp <= trail_surrogate_max;
+        }
+        template <typename U16>
+        constexpr bool is_surrogate(U16 cp)
+        {
+            return cp >= lead_surrogate_min && cp <= trail_surrogate_max;
+        }
+
+        constexpr inline bool is_code_point_valid(code_point cp)
+        {
+            return cp <= code_point_max && !is_surrogate(cp);
+        }
+    }  // namespace detail
+
+    template <typename T>
+    constexpr code_point make_code_point(T ch)
+    {
+        return static_cast<code_point>(ch);
+    }
+
+    /**
+     * Returns `true`, if `cp` is valid, e.g. is less than or equal to the
+     * maximum value for a code point (U+10FFFF), and is not a surrogate (U+D800
+     * to U+DFFF).
+     */
+    constexpr inline bool is_valid_code_point(code_point cp)
+    {
+        return detail::is_code_point_valid(cp);
+    }
+    /**
+     * Returns `true` if `cp` can be encoded in ASCII as-is (is between U+0 and
+     * U+7F)
+     */
+    constexpr inline bool is_ascii_code_point(code_point cp)
+    {
+        return cp <= 0x7f;
+    }
+
+    SCN_END_NAMESPACE
+}  // namespace scn
+
+#endif
diff --git a/src/third-party/scnlib/include/scn/unicode/unicode.h b/src/third-party/scnlib/include/scn/unicode/unicode.h
new file mode 100644
index 0000000..011b0b9
--- /dev/null
+++ b/src/third-party/scnlib/include/scn/unicode/unicode.h
@@ -0,0 +1,243 @@
+// Copyright 2017 Elias Kosunen
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is a part of scnlib:
+//     https://github.com/eliaskosunen/scnlib
+//
+// The contents of this file are based on utfcpp:
+//     https://github.com/nemtrif/utfcpp
+//     Copyright (c) 2006 Nemanja Trifunovic
+//     Distributed under the Boost Software License, version 1.0
+
+#ifndef SCN_UNICODE_UNICODE_H
+#define SCN_UNICODE_UNICODE_H
+
+#include "utf16.h"
+#include "utf8.h"
+
+namespace scn {
+    SCN_BEGIN_NAMESPACE
+
+    namespace detail {
+        inline constexpr bool is_wide_multichar()
+        {
+            return sizeof(wchar_t) == 2;
+        }
+
+        inline constexpr bool is_multichar_type(char)
+        {
+            return true;
+        }
+        inline constexpr bool is_multichar_type(wchar_t)
+        {
+            return is_wide_multichar();
+        }
+
+        using utf8_tag = std::integral_constant<size_t, 1>;
+        using utf16_tag = std::integral_constant<size_t, 2>;
+        using utf32_tag = std::integral_constant<size_t, 4>;
+
+#define SCN_MAKE_UTF_TAG(CharT) \
+    std::integral_constant<size_t, sizeof(CharT)> {}
+
+        template <typename I, typename S>
+        SCN_CONSTEXPR14 expected<I> parse_code_point(I begin,
+                                                     S end,
+                                                     code_point& cp,
+                                                     utf8_tag)
+        {
+            return utf8::parse_code_point(begin, end, cp);
+        }
+        template <typename I, typename S>
+        SCN_CONSTEXPR14 expected<I> parse_code_point(I begin,
+                                                     S end,
+                                                     code_point& cp,
+                                                     utf16_tag)
+        {
+            return utf16::parse_code_point(begin, end, cp);
+        }
+        template <typename I, typename S>
+        SCN_CONSTEXPR14 expected<I> parse_code_point(I begin,
+                                                     S end,
+                                                     code_point& cp,
+                                                     utf32_tag)
+        {
+            SCN_EXPECT(begin != end);
+            cp = make_code_point(*begin);
+            return {++begin};
+        }
+    }  // namespace detail
+
+    /**
+     * Parses a Unicode code point from the range at `[begin, end)`, and writes
+     * it into `cp`.
+     *
+     * The encoding is determined by the size of the value type of the range.
+     * Let `n = sizeof(typename std::iterator_traits<I>::value_type)`.
+     * If `n == 1` -> UTF-8. If `n == 2` -> UTF-16. If `n == 4` -> UTF-32.
+     *
+     * `begin != end` must be `true`.
+     *
+     * On error, `cp` is not written into.
+     *
+     * \return On success, returns an iterator one-past the last code unit used
+     * to parse `cp`. If the code point is encoded incorrectly, returns
+     * `error::invalid_encoding`.
+     */
+    template <typename I, typename S>
+    SCN_CONSTEXPR14 expected<I> parse_code_point(I begin, S end, code_point& cp)
+    {
+        return detail::parse_code_point(
+            begin, end, cp,
+            SCN_MAKE_UTF_TAG(typename std::iterator_traits<I>::value_type));
+    }
+
+    namespace detail {
+        template <typename I, typename S>
+        SCN_CONSTEXPR14 expected<I> encode_code_point(I begin,
+                                                      S end,
+                                                      code_point cp,
+                                                      utf8_tag)
+        {
+            return utf8::encode_code_point(begin, end, cp);
+        }
+        template <typename I, typename S>
+        SCN_CONSTEXPR14 expected<I> encode_code_point(I begin,
+                                                      S end,
+                                                      code_point cp,
+                                                      utf16_tag)
+        {
+            return utf16::encode_code_point(begin, end, cp);
+        }
+        template <typename I, typename S>
+        SCN_CONSTEXPR14 expected<I> encode_code_point(I begin,
+                                                      S end,
+                                                      code_point cp,
+                                                      utf32_tag)
+        {
+            SCN_EXPECT(begin + 1 >= end);
+            *begin++ = static_cast<uint32_t>(cp);
+            return {begin};
+        }
+    }  // namespace detail
+
+    /**
+     * Writes the code point `cp` into `begin`, using the encoding determined by
+     * the type of `begin`.
+     *
+     * For more information on how the encoding is determined, see \ref
+     * parse_code_point().
+     *
+     * `end` must be reachable from `begin`, and must have enough room to encode
+     * the code point (4 code units for UTF-8, 2 for UTF-16, and 1 for UTF-32).
+     *
+     * \param begin Beginning of the range to write the result to
+     * \param end End of the range to write the result to
+     * \param cp Code point to encode
+     * \return On success, one-past the last code unit written.
+     * If `cp` was not a valid code point, returns `error::invalid_encoding`.
+     */
+    template <typename I, typename S>
+    SCN_CONSTEXPR14 expected<I> encode_code_point(I begin, S end, code_point cp)
+    {
+        return detail::encode_code_point(
+            begin, end, cp,
+            SCN_MAKE_UTF_TAG(typename std::iterator_traits<I>::value_type));
+    }
+
+    namespace detail {
+        template <typename T>
+        SCN_CONSTEXPR14 int get_sequence_length(T a, utf8_tag)
+        {
+            return utf8::get_sequence_length(a);
+        }
+        template <typename T>
+        SCN_CONSTEXPR14 int get_sequence_length(T a, utf16_tag)
+        {
+            return utf16::get_sequence_length(a);
+        }
+        template <typename T>
+        SCN_CONSTEXPR14 int get_sequence_length(T, utf32_tag)
+        {
+            return 1;
+        }
+    }  // namespace detail
+
+    /**
+     * Returns the length of the code point starting from code unit `a` in code
+     * units.
+     *
+     * For information on how the encoding is determined, see \ref
+     * parse_code_point().
+     *
+     * \param a The first code unit in a code point.
+     *
+     * \return Length of the code point starting from `a`, in code units
+     * If the code point is encoded incorrectly, or this code unit is not the
+     * first code unit in a code point, returns 0.
+     */
+    template <typename T>
+    SCN_CONSTEXPR14 int get_sequence_length(T a)
+    {
+        return detail::get_sequence_length(a, SCN_MAKE_UTF_TAG(T));
+    }
+
+    namespace detail {
+        template <typename I, typename S>
+        SCN_CONSTEXPR14 expected<std::ptrdiff_t> code_point_distance(I begin,
+                                                                     S end,
+                                                                     utf8_tag)
+        {
+            return utf8::code_point_distance(begin, end);
+        }
+        template <typename I, typename S>
+        SCN_CONSTEXPR14 expected<std::ptrdiff_t> code_point_distance(I begin,
+                                                                     S end,
+                                                                     utf16_tag)
+        {
+            return utf16::code_point_distance(begin, end);
+        }
+        template <typename I, typename S>
+        SCN_CONSTEXPR14 expected<std::ptrdiff_t> code_point_distance(I begin,
+                                                                     S end,
+                                                                     utf32_tag)
+        {
+            return {end - begin};
+        }
+    }  // namespace detail
+
+    /**
+     * Get the distance between two code points, in code points.
+     *
+     * `end >= begin` must be `true`.
+     * `begin` and `end` must both point to the first code units in a code
+     * point.
+     *
+     * \return The distance between `begin` and `end`, in code points. If the
+     * string was encoded incorrectly, returns `error::invalid_encoding`.
+     */
+    template <typename I, typename S>
+    SCN_CONSTEXPR14 expected<std::ptrdiff_t> code_point_distance(I begin, S end)
+    {
+        return detail::code_point_distance(
+            begin, end,
+            SCN_MAKE_UTF_TAG(typename std::iterator_traits<I>::value_type));
+    }
+
+#undef SCN_MAKE_UTF_TAG
+
+    SCN_END_NAMESPACE
+}  // namespace scn
+
+#endif
diff --git a/src/third-party/scnlib/include/scn/unicode/utf16.h b/src/third-party/scnlib/include/scn/unicode/utf16.h
new file mode 100644
index 0000000..8d8a400
--- /dev/null
+++ b/src/third-party/scnlib/include/scn/unicode/utf16.h
@@ -0,0 +1,139 @@
+// Copyright 2017 Elias Kosunen
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is a part of scnlib:
+//     https://github.com/eliaskosunen/scnlib
+//
+// The contents of this file are based on utfcpp:
+//     https://github.com/nemtrif/utfcpp
+//     Copyright (c) 2006 Nemanja Trifunovic
+//     Distributed under the Boost Software License, version 1.0
+
+#ifndef SCN_UNICODE_UTF16_H
+#define SCN_UNICODE_UTF16_H
+
+#include "../detail/error.h"
+#include "../util/expected.h"
+#include "common.h"
+
+namespace scn {
+    SCN_BEGIN_NAMESPACE
+
+    namespace detail {
+        namespace utf16 {
+            template <typename U16>
+            SCN_CONSTEXPR14 int get_sequence_length(U16 ch)
+            {
+                uint16_t lead = mask16(ch);
+                if (is_lead_surrogate(lead)) {
+                    return 2;
+                }
+                if (SCN_UNLIKELY(is_trail_surrogate(lead))) {
+                    return 0;
+                }
+                return 1;
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 error validate_next(I& it, S end, code_point& cp)
+            {
+                SCN_EXPECT(it != end);
+
+                uint16_t lead = mask16(*it);
+                if (is_lead_surrogate(lead)) {
+                    ++it;
+                    if (it == end) {
+                        return {error::invalid_encoding,
+                                "Lone utf16 lead surrogate"};
+                    }
+                    uint16_t trail = mask16(*it);
+                    if (!is_trail_surrogate(trail)) {
+                        return {error::invalid_encoding,
+                                "Invalid utf16 trail surrogate"};
+                    }
+                    ++it;
+                    cp = static_cast<code_point>(
+                        static_cast<uint32_t>(lead << 10u) + trail +
+                        surrogate_offset);
+                    return {};
+                }
+                if (is_trail_surrogate(lead)) {
+                    return {error::invalid_encoding,
+                            "Lone utf16 trail surrogate"};
+                }
+
+                cp = static_cast<code_point>(*it);
+                ++it;
+                return {};
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 expected<I> parse_code_point(I begin,
+                                                         S end,
+                                                         code_point& cp)
+            {
+                auto e = validate_next(begin, end, cp);
+                if (!e) {
+                    return e;
+                }
+                return {begin};
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 expected<I> encode_code_point(I begin,
+                                                          S end,
+                                                          code_point cp)
+            {
+                SCN_EXPECT(begin + 2 <= end);
+
+                if (!is_valid_code_point(cp)) {
+                    return error(error::invalid_encoding,
+                                 "Invalid code point, cannot encode in UTF-16");
+                }
+
+                if (cp > 0xffffu) {
+                    *begin++ = static_cast<uint16_t>(
+                        (static_cast<uint32_t>(cp) >> 10u) + lead_offset);
+                    *begin++ = static_cast<uint16_t>(
+                        (static_cast<uint32_t>(cp) & 0x3ffu) +
+                        trail_surrogate_min);
+                }
+                else {
+                    *begin++ = static_cast<uint16_t>(cp);
+                }
+                return {begin};
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 expected<std::ptrdiff_t> code_point_distance(
+                I begin,
+                S end)
+            {
+                std::ptrdiff_t dist{};
+                code_point cp{};
+                for (; begin < end; ++dist) {
+                    auto e = validate_next(begin, end, cp);
+                    if (!e) {
+                        return e;
+                    }
+                }
+                return {dist};
+            }
+        }  // namespace utf16
+    }      // namespace detail
+
+    SCN_END_NAMESPACE
+}  // namespace scn
+
+#endif
diff --git a/src/third-party/scnlib/include/scn/unicode/utf8.h b/src/third-party/scnlib/include/scn/unicode/utf8.h
new file mode 100644
index 0000000..d2ee54d
--- /dev/null
+++ b/src/third-party/scnlib/include/scn/unicode/utf8.h
@@ -0,0 +1,297 @@
+// Copyright 2017 Elias Kosunen
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is a part of scnlib:
+//     https://github.com/eliaskosunen/scnlib
+//
+// The contents of this file are based on utfcpp:
+//     https://github.com/nemtrif/utfcpp
+//     Copyright (c) 2006 Nemanja Trifunovic
+//     Distributed under the Boost Software License, version 1.0
+
+#ifndef SCN_UNICODE_UTF8_H
+#define SCN_UNICODE_UTF8_H
+
+#include "../detail/error.h"
+#include "../util/expected.h"
+#include "common.h"
+
+namespace scn {
+    SCN_BEGIN_NAMESPACE
+
+    namespace detail {
+        namespace utf8 {
+            template <typename Octet>
+            constexpr bool is_trail(Octet o)
+            {
+                return (mask8(o) >> 6) == 2;
+            }
+
+            template <typename Octet>
+            SCN_CONSTEXPR14 int get_sequence_length(Octet ch)
+            {
+                uint8_t lead = detail::mask8(ch);
+                if (lead < 0x80) {
+                    return 1;
+                }
+                else if ((lead >> 5) == 6) {
+                    return 2;
+                }
+                else if ((lead >> 4) == 0xe) {
+                    return 3;
+                }
+                else if ((lead >> 3) == 0x1e) {
+                    return 4;
+                }
+                return 0;
+            }
+
+            SCN_CONSTEXPR14 bool is_overlong_sequence(code_point cp,
+                                                      std::ptrdiff_t len)
+            {
+                if (cp < 0x80) {
+                    if (len != 1) {
+                        return true;
+                    }
+                }
+                else if (cp < 0x800) {
+                    if (len != 2) {
+                        return true;
+                    }
+                }
+                else if (cp < 0x10000) {
+                    if (len != 3) {
+                        return true;
+                    }
+                }
+
+                return false;
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 error increase_safely(I& it, S end)
+            {
+                if (++it == end) {
+                    return {error::invalid_encoding,
+                            "Unexpected end of range when decoding utf8 "
+                            "(partial codepoint)"};
+                }
+                if (!is_trail(*it)) {
+                    return {error::invalid_encoding,
+                            "Invalid utf8 codepoint parsed"};
+                }
+                return {};
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 error get_sequence_1(I& it, S end, code_point& cp)
+            {
+                SCN_EXPECT(it != end);
+                cp = make_code_point(mask8(*it));
+                return {};
+            }
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 error get_sequence_2(I& it, S end, code_point& cp)
+            {
+                SCN_EXPECT(it != end);
+                uint32_t c = mask8(*it);
+
+                auto e = increase_safely(it, end);
+                if (!e) {
+                    return e;
+                }
+
+                c = static_cast<uint32_t>((c << 6u) & 0x7ffu) +
+                    (static_cast<uint32_t>(*it) & 0x3fu);
+                cp = make_code_point(c);
+
+                return {};
+            }
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 error get_sequence_3(I& it, S end, code_point& cp)
+            {
+                SCN_EXPECT(it != end);
+                uint32_t c = mask8(*it);
+
+                auto e = increase_safely(it, end);
+                if (!e) {
+                    return e;
+                }
+
+                c = static_cast<uint32_t>((c << 12u) & 0xffffu) +
+                    (static_cast<uint32_t>(mask8(*it) << 6u) & 0xfffu);
+
+                e = increase_safely(it, end);
+                if (!e) {
+                    return e;
+                }
+
+                c += static_cast<uint32_t>(*it) & 0x3fu;
+                cp = make_code_point(c);
+
+                return {};
+            }
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 error get_sequence_4(I& it, S end, code_point& cp)
+            {
+                SCN_EXPECT(it != end);
+                uint32_t c = mask8(*it);
+
+                auto e = increase_safely(it, end);
+                if (!e) {
+                    return e;
+                }
+
+                c = ((c << 18u) & 0x1fffffu) +
+                    (static_cast<uint32_t>(mask8(*it) << 12u) & 0x3ffffu);
+
+                e = increase_safely(it, end);
+                if (!e) {
+                    return e;
+                }
+
+                c += static_cast<uint32_t>(mask8(*it) << 6u) & 0xfffu;
+
+                e = increase_safely(it, end);
+                if (!e) {
+                    return e;
+                }
+
+                c += static_cast<uint32_t>(*it) & 0x3fu;
+                cp = make_code_point(c);
+
+                return {};
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 error validate_next(I& it, S end, code_point& cp)
+            {
+                SCN_EXPECT(it != end);
+
+                int len = get_sequence_length(*it);
+                error e{};
+                switch (len) {
+                    case 1:
+                        e = get_sequence_1(it, end, cp);
+                        break;
+                    case 2:
+                        e = get_sequence_2(it, end, cp);
+                        break;
+                    case 3:
+                        e = get_sequence_3(it, end, cp);
+                        break;
+                    case 4:
+                        e = get_sequence_4(it, end, cp);
+                        break;
+                    default:
+                        return {error::invalid_encoding,
+                                "Invalid lead byte for utf8"};
+                }
+
+                if (!e) {
+                    return e;
+                }
+                if (!is_valid_code_point(cp) || is_overlong_sequence(cp, len)) {
+                    return {error::invalid_encoding, "Invalid utf8 code point"};
+                }
+                ++it;
+                return {};
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 expected<I> parse_code_point(I begin,
+                                                         S end,
+                                                         code_point& cp)
+            {
+                code_point c{};
+                auto e = validate_next(begin, end, c);
+                if (e) {
+                    cp = c;
+                    return {begin};
+                }
+                return e;
+            }
+
+            template <typename I>
+            I append(code_point cp, I it)
+            {
+                SCN_EXPECT(is_code_point_valid(cp));
+
+                if (cp < 0x80) {
+                    *(it++) = static_cast<uint8_t>(cp);
+                }
+                else if (cp < 0x800) {
+                    *(it++) = static_cast<uint8_t>(
+                        (static_cast<uint32_t>(cp) >> 6u) | 0xc0u);
+                    *(it++) = static_cast<uint8_t>(
+                        (static_cast<uint32_t>(cp) & 0x3fu) | 0x80u);
+                }
+                else if (cp < 0x10000) {
+                    *(it++) = static_cast<uint8_t>(
+                        (static_cast<uint32_t>(cp) >> 12u) | 0xe0u);
+                    *(it++) = static_cast<uint8_t>(
+                        ((static_cast<uint32_t>(cp) >> 6u) & 0x3fu) | 0x80u);
+                    *(it++) = static_cast<uint8_t>(
+                        (static_cast<uint32_t>(cp) & 0x3fu) | 0x80u);
+                }
+                else {
+                    *(it++) = static_cast<uint8_t>(
+                        (static_cast<uint32_t>(cp) >> 18u) | 0xf0u);
+                    *(it++) = static_cast<uint8_t>(
+                        ((static_cast<uint32_t>(cp) >> 12u) & 0x3fu) | 0x80u);
+                    *(it++) = static_cast<uint8_t>(
+                        ((static_cast<uint32_t>(cp) >> 6u) & 0x3fu) | 0x80u);
+                    *(it++) = static_cast<uint8_t>(
+                        (static_cast<uint32_t>(cp) & 0x3fu) | 0x80u);
+                }
+                return it;
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 expected<I> encode_code_point(I begin,
+                                                          S end,
+                                                          code_point cp)
+            {
+                SCN_EXPECT(begin + 4 <= end);
+
+                if (!is_code_point_valid(cp)) {
+                    return error(error::invalid_encoding,
+                                 "Invalid code point, cannot encode in UTF-8");
+                }
+                return {append(cp, begin)};
+            }
+
+            template <typename I, typename S>
+            SCN_CONSTEXPR14 expected<std::ptrdiff_t> code_point_distance(
+                I begin,
+                S end)
+            {
+                std::ptrdiff_t dist{};
+                code_point cp{};
+                for (; begin < end; ++dist) {
+                    auto e = validate_next(begin, end, cp);
+                    if (!e) {
+                        return e;
+                    }
+                }
+                return {dist};
+            }
+
+        }  // namespace utf8
+    }      // namespace detail
+
+    SCN_END_NAMESPACE
+}  // namespace scn
+
+#endif