summaryrefslogtreecommitdiffstats
path: root/src/third-party/scnlib/include/scn/unicode/common.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/third-party/scnlib/include/scn/unicode/common.h')
-rw-r--r--src/third-party/scnlib/include/scn/unicode/common.h139
1 files changed, 139 insertions, 0 deletions
diff --git a/src/third-party/scnlib/include/scn/unicode/common.h b/src/third-party/scnlib/include/scn/unicode/common.h
new file mode 100644
index 0000000..3807793
--- /dev/null
+++ b/src/third-party/scnlib/include/scn/unicode/common.h
@@ -0,0 +1,139 @@
+// Copyright 2017 Elias Kosunen
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file is a part of scnlib:
+// https://github.com/eliaskosunen/scnlib
+//
+// The contents of this file are based on utfcpp:
+// https://github.com/nemtrif/utfcpp
+// Copyright (c) 2006 Nemanja Trifunovic
+// Distributed under the Boost Software License, version 1.0
+
+#ifndef SCN_UNICODE_COMMON_H
+#define SCN_UNICODE_COMMON_H
+
+#include "../detail/fwd.h"
+
+#include <cstdint>
+
+namespace scn {
+ SCN_BEGIN_NAMESPACE
+
+ /**
+ * A Unicode code point
+ */
+ enum class code_point : uint32_t {};
+
+ template <typename T>
+ constexpr bool operator==(code_point a, T b)
+ {
+ return static_cast<uint32_t>(a) == static_cast<uint32_t>(b);
+ }
+ template <typename T>
+ constexpr bool operator!=(code_point a, T b)
+ {
+ return static_cast<uint32_t>(a) != static_cast<uint32_t>(b);
+ }
+ template <typename T>
+ constexpr bool operator<(code_point a, T b)
+ {
+ return static_cast<uint32_t>(a) < static_cast<uint32_t>(b);
+ }
+ template <typename T>
+ constexpr bool operator>(code_point a, T b)
+ {
+ return static_cast<uint32_t>(a) > static_cast<uint32_t>(b);
+ }
+ template <typename T>
+ constexpr bool operator<=(code_point a, T b)
+ {
+ return static_cast<uint32_t>(a) <= static_cast<uint32_t>(b);
+ }
+ template <typename T>
+ constexpr bool operator>=(code_point a, T b)
+ {
+ return static_cast<uint32_t>(a) >= static_cast<uint32_t>(b);
+ }
+
+ namespace detail {
+ static constexpr const uint16_t lead_surrogate_min = 0xd800;
+ static constexpr const uint16_t lead_surrogate_max = 0xdbff;
+ static constexpr const uint16_t trail_surrogate_min = 0xdc00;
+ static constexpr const uint16_t trail_surrogate_max = 0xdfff;
+ static constexpr const uint16_t lead_offset =
+ lead_surrogate_min - (0x10000u >> 10);
+ static constexpr const uint32_t surrogate_offset =
+ 0x10000u - (lead_surrogate_min << 10) - trail_surrogate_min;
+ static constexpr const uint32_t code_point_max = 0x10ffff;
+
+ template <typename Octet>
+ constexpr uint8_t mask8(Octet o)
+ {
+ return static_cast<uint8_t>(0xff & o);
+ }
+ template <typename U16>
+ constexpr uint16_t mask16(U16 v)
+ {
+ return static_cast<uint16_t>(0xffff & v);
+ }
+ template <typename U16>
+ constexpr bool is_lead_surrogate(U16 cp)
+ {
+ return cp >= lead_surrogate_min && cp <= lead_surrogate_max;
+ }
+ template <typename U16>
+ constexpr bool is_trail_surrogate(U16 cp)
+ {
+ return cp >= trail_surrogate_min && cp <= trail_surrogate_max;
+ }
+ template <typename U16>
+ constexpr bool is_surrogate(U16 cp)
+ {
+ return cp >= lead_surrogate_min && cp <= trail_surrogate_max;
+ }
+
+ constexpr inline bool is_code_point_valid(code_point cp)
+ {
+ return cp <= code_point_max && !is_surrogate(cp);
+ }
+ } // namespace detail
+
+ template <typename T>
+ constexpr code_point make_code_point(T ch)
+ {
+ return static_cast<code_point>(ch);
+ }
+
+ /**
+ * Returns `true`, if `cp` is valid, e.g. is less than or equal to the
+ * maximum value for a code point (U+10FFFF), and is not a surrogate (U+D800
+ * to U+DFFF).
+ */
+ constexpr inline bool is_valid_code_point(code_point cp)
+ {
+ return detail::is_code_point_valid(cp);
+ }
+ /**
+ * Returns `true` if `cp` can be encoded in ASCII as-is (is between U+0 and
+ * U+7F)
+ */
+ constexpr inline bool is_ascii_code_point(code_point cp)
+ {
+ return cp <= 0x7f;
+ }
+
+ SCN_END_NAMESPACE
+} // namespace scn
+
+#endif