From 62e4c68907d8d33709c2c1f92a161dff00b3d5f2 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 22:01:36 +0200 Subject: Adding upstream version 0.11.2. Signed-off-by: Daniel Baumann --- .../scnlib/include/scn/unicode/common.h | 139 +++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 src/third-party/scnlib/include/scn/unicode/common.h (limited to 'src/third-party/scnlib/include/scn/unicode/common.h') diff --git a/src/third-party/scnlib/include/scn/unicode/common.h b/src/third-party/scnlib/include/scn/unicode/common.h new file mode 100644 index 0000000..3807793 --- /dev/null +++ b/src/third-party/scnlib/include/scn/unicode/common.h @@ -0,0 +1,139 @@ +// Copyright 2017 Elias Kosunen +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is a part of scnlib: +// https://github.com/eliaskosunen/scnlib +// +// The contents of this file are based on utfcpp: +// https://github.com/nemtrif/utfcpp +// Copyright (c) 2006 Nemanja Trifunovic +// Distributed under the Boost Software License, version 1.0 + +#ifndef SCN_UNICODE_COMMON_H +#define SCN_UNICODE_COMMON_H + +#include "../detail/fwd.h" + +#include + +namespace scn { + SCN_BEGIN_NAMESPACE + + /** + * A Unicode code point + */ + enum class code_point : uint32_t {}; + + template + constexpr bool operator==(code_point a, T b) + { + return static_cast(a) == static_cast(b); + } + template + constexpr bool operator!=(code_point a, T b) + { + return static_cast(a) != static_cast(b); + } + template + constexpr bool operator<(code_point a, T b) + { + return static_cast(a) < static_cast(b); + } + template + constexpr bool operator>(code_point a, T b) + { + return static_cast(a) > static_cast(b); + } + template + constexpr bool operator<=(code_point a, T b) + { + return static_cast(a) <= static_cast(b); + } + template + constexpr bool operator>=(code_point a, T b) + { + return static_cast(a) >= static_cast(b); + } + + namespace detail { + static constexpr const uint16_t lead_surrogate_min = 0xd800; + static constexpr const uint16_t lead_surrogate_max = 0xdbff; + static constexpr const uint16_t trail_surrogate_min = 0xdc00; + static constexpr const uint16_t trail_surrogate_max = 0xdfff; + static constexpr const uint16_t lead_offset = + lead_surrogate_min - (0x10000u >> 10); + static constexpr const uint32_t surrogate_offset = + 0x10000u - (lead_surrogate_min << 10) - trail_surrogate_min; + static constexpr const uint32_t code_point_max = 0x10ffff; + + template + constexpr uint8_t mask8(Octet o) + { + return static_cast(0xff & o); + } + template + constexpr uint16_t mask16(U16 v) + { + return static_cast(0xffff & v); + } + template + constexpr bool is_lead_surrogate(U16 cp) + { + return cp >= lead_surrogate_min && cp <= lead_surrogate_max; + } + template + constexpr bool is_trail_surrogate(U16 cp) + { + return cp >= trail_surrogate_min && cp <= trail_surrogate_max; + } + template + constexpr bool is_surrogate(U16 cp) + { + return cp >= lead_surrogate_min && cp <= trail_surrogate_max; + } + + constexpr inline bool is_code_point_valid(code_point cp) + { + return cp <= code_point_max && !is_surrogate(cp); + } + } // namespace detail + + template + constexpr code_point make_code_point(T ch) + { + return static_cast(ch); + } + + /** + * Returns `true`, if `cp` is valid, e.g. is less than or equal to the + * maximum value for a code point (U+10FFFF), and is not a surrogate (U+D800 + * to U+DFFF). + */ + constexpr inline bool is_valid_code_point(code_point cp) + { + return detail::is_code_point_valid(cp); + } + /** + * Returns `true` if `cp` can be encoded in ASCII as-is (is between U+0 and + * U+7F) + */ + constexpr inline bool is_ascii_code_point(code_point cp) + { + return cp <= 0x7f; + } + + SCN_END_NAMESPACE +} // namespace scn + +#endif -- cgit v1.2.3