From f215e02bf85f68d3a6106c2a1f4f7f063f819064 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:17:27 +0200 Subject: Adding upstream version 7.0.14-dfsg. Signed-off-by: Daniel Baumann --- src/VBox/Runtime/common/string/utf-16-case.cpp | 449 +++++++++++++++++++++++++ 1 file changed, 449 insertions(+) create mode 100644 src/VBox/Runtime/common/string/utf-16-case.cpp (limited to 'src/VBox/Runtime/common/string/utf-16-case.cpp') diff --git a/src/VBox/Runtime/common/string/utf-16-case.cpp b/src/VBox/Runtime/common/string/utf-16-case.cpp new file mode 100644 index 00000000..2f21c833 --- /dev/null +++ b/src/VBox/Runtime/common/string/utf-16-case.cpp @@ -0,0 +1,449 @@ +/* $Id: utf-16-case.cpp $ */ +/** @file + * IPRT - UTF-16, Case Sensitivity. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include +#include "internal/iprt.h" + +#include +#include +#include +#include +#include +#include "internal/string.h" + + +RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2) +{ + if (pwsz1 == pwsz2) + return 0; + if (!pwsz1) + return -1; + if (!pwsz2) + return 1; + + PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */ + for (;;) + { + RTUTF16 wc1 = *pwsz1; + RTUTF16 wc2 = *pwsz2; + int iDiff = wc1 - wc2; + if (iDiff) + { + /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */ + if ( wc1 < 0xd800 + || wc2 < 0xd800 + || wc1 > 0xdfff + || wc2 > 0xdfff) + { + /* simple UCS-2 char */ + iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2); + if (iDiff) + iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2); + } + else + { + /* a damned pair */ + RTUNICP uc1; + RTUNICP uc2; + if (wc1 >= 0xdc00) + { + if (pwsz1Start == pwsz1) + return iDiff; + uc1 = pwsz1[-1]; + if (uc1 < 0xd800 || uc1 >= 0xdc00) + return iDiff; + uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff)); + uc2 = 0x10000 + (((pwsz2[-1] & 0x3ff) << 10) | (wc2 & 0x3ff)); + } + else + { + uc1 = *++pwsz1; + if (uc1 < 0xdc00 || uc1 >= 0xe000) + return iDiff; + uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff)); + uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (*++pwsz2 & 0x3ff)); + } + iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2); + if (iDiff) + iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */ + } + if (iDiff) + return iDiff; + } + if (!wc1) + return 0; + pwsz1++; + pwsz2++; + } +} +RT_EXPORT_SYMBOL(RTUtf16ICmp); + + +RTDECL(int) RTUtf16BigICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2) +{ + if (pwsz1 == pwsz2) + return 0; + if (!pwsz1) + return -1; + if (!pwsz2) + return 1; + + PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */ + for (;;) + { + RTUTF16 wc1 = *pwsz1; + RTUTF16 wc2 = *pwsz2; + int iDiff = wc1 - wc2; + if (iDiff) + { + /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */ + wc1 = RT_BE2H_U16(wc1); + wc2 = RT_BE2H_U16(wc2); + if ( wc1 < 0xd800 + || wc2 < 0xd800 + || wc1 > 0xdfff + || wc2 > 0xdfff) + { + /* simple UCS-2 char */ + iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2); + if (iDiff) + iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2); + } + else + { + /* a damned pair */ + RTUNICP uc1; + RTUNICP uc2; + if (wc1 >= 0xdc00) + { + if (pwsz1Start == pwsz1) + return iDiff; + uc1 = RT_BE2H_U16(pwsz1[-1]); + if (uc1 < 0xd800 || uc1 >= 0xdc00) + return iDiff; + uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff)); + uc2 = 0x10000 + (((RT_BE2H_U16(pwsz2[-1]) & 0x3ff) << 10) | (wc2 & 0x3ff)); + } + else + { + RTUTF16 wcTmp = *++pwsz1; + uc1 = RT_BE2H_U16(wcTmp); + if (uc1 < 0xdc00 || uc1 >= 0xe000) + return iDiff; + uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff)); + wcTmp = *++pwsz2; + uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (RT_BE2H_U16(wcTmp) & 0x3ff)); + } + iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2); + if (iDiff) + iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */ + } + if (iDiff) + return iDiff; + } + if (!wc1) + return 0; + pwsz1++; + pwsz2++; + } +} +RT_EXPORT_SYMBOL(RTUtf16BigICmp); + + +RTDECL(int) RTUtf16ICmpUtf8(PCRTUTF16 pwsz1, const char *psz2) +{ + /* + * NULL and empty strings are all the same. + */ + if (!pwsz1) + return !psz2 || !*psz2 ? 0 : -1; + if (!psz2) + return !*pwsz1 ? 0 : 1; + + /* + * Compare with a UTF-8 string by enumerating them char by char. + */ + for (;;) + { + RTUNICP uc1; + int rc = RTUtf16GetCpEx(&pwsz1, &uc1); + AssertRCReturn(rc, 1); + + RTUNICP uc2; + rc = RTStrGetCpEx(&psz2, &uc2); + AssertRCReturn(rc, -1); + if (uc1 == uc2) + { + if (uc1) + continue; + return 0; + } + + if (RTUniCpToUpper(uc1) == RTUniCpToUpper(uc2)) + continue; + if (RTUniCpToLower(uc1) == RTUniCpToLower(uc2)) + continue; + return uc1 < uc2 ? -1 : 1; + } +} +RT_EXPORT_SYMBOL(RTUtf16CmpIUtf8); + + +RTDECL(int) RTUtf16NICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2, size_t cwcMax) +{ + if (pwsz1 == pwsz2) + return 0; + if (!pwsz1) + return -1; + if (!pwsz2) + return 1; + + PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */ + while (cwcMax-- > 0) + { + RTUTF16 wc1 = *pwsz1; + RTUTF16 wc2 = *pwsz2; + int iDiff = wc1 - wc2; + if (iDiff) + { + /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */ + if ( wc1 < 0xd800 + || wc2 < 0xd800 + || wc1 > 0xdfff + || wc2 > 0xdfff) + { + /* simple UCS-2 char */ + iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2); + if (iDiff) + iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2); + } + else + { + /* a damned pair */ + RTUNICP uc1; + RTUNICP uc2; + if (wc1 >= 0xdc00) + { + if (pwsz1Start == pwsz1) + return iDiff; + uc1 = pwsz1[-1]; + if (uc1 < 0xd800 || uc1 >= 0xdc00) + return iDiff; + uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff)); + uc2 = 0x10000 + (((pwsz2[-1] & 0x3ff) << 10) | (wc2 & 0x3ff)); + } + else if (cwcMax-- > 0) + { + uc1 = *++pwsz1; + if (uc1 < 0xdc00 || uc1 >= 0xe000) + return iDiff; + uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff)); + uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (*++pwsz2 & 0x3ff)); + } + else + { + iDiff = wc1 - wc2; + return iDiff; + } + iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2); + if (iDiff) + iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */ + } + if (iDiff) + return iDiff; + } + if (!wc1) + return 0; + pwsz1++; + pwsz2++; + } + return 0; +} +RT_EXPORT_SYMBOL(RTUtf16NICmp); + + +RTDECL(int) RTUtf16BigNICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2, size_t cwcMax) +{ + if (pwsz1 == pwsz2) + return 0; + if (!pwsz1) + return -1; + if (!pwsz2) + return 1; + + PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */ + while (cwcMax-- > 0) + { + RTUTF16 wc1 = *pwsz1; + RTUTF16 wc2 = *pwsz2; + int iDiff = wc1 - wc2; + if (iDiff) + { + /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */ + wc1 = RT_BE2H_U16(wc1); + wc2 = RT_BE2H_U16(wc2); + if ( wc1 < 0xd800 + || wc2 < 0xd800 + || wc1 > 0xdfff + || wc2 > 0xdfff) + { + /* simple UCS-2 char */ + iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2); + if (iDiff) + iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2); + } + else + { + /* a damned pair */ + RTUNICP uc1; + RTUNICP uc2; + if (wc1 >= 0xdc00) + { + if (pwsz1Start == pwsz1) + return iDiff; + uc1 = RT_BE2H_U16(pwsz1[-1]); + if (uc1 < 0xd800 || uc1 >= 0xdc00) + return iDiff; + uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff)); + uc2 = 0x10000 + (((RT_BE2H_U16(pwsz2[-1]) & 0x3ff) << 10) | (wc2 & 0x3ff)); + } + else if (cwcMax > 0) + { + RTUTF16 wcTmp = *++pwsz1; + uc1 = RT_BE2H_U16(wcTmp); + if (uc1 < 0xdc00 || uc1 >= 0xe000) + return iDiff; + uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff)); + wcTmp = *++pwsz2; + uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (RT_BE2H_U16(wcTmp) & 0x3ff)); + } + else + { + iDiff = wc1 - wc2; + return iDiff; + } + iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2); + if (iDiff) + iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */ + } + if (iDiff) + return iDiff; + } + if (!wc1) + return 0; + pwsz1++; + pwsz2++; + } + return 0; +} +RT_EXPORT_SYMBOL(RTUtf16BigNICmp); + + +RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz) +{ + PRTUTF16 pwc = pwsz; + for (;;) + { + RTUTF16 wc = *pwc; + if (!wc) + break; + if (wc < 0xd800 || wc >= 0xdc00) + { + RTUNICP ucFolded = RTUniCpToLower(wc); + if (ucFolded < 0x10000) + *pwc++ = RTUniCpToLower(wc); + } + else + { + /* surrogate */ + RTUTF16 wc2 = pwc[1]; + if (wc2 >= 0xdc00 && wc2 <= 0xdfff) + { + RTUNICP uc = 0x10000 + (((wc & 0x3ff) << 10) | (wc2 & 0x3ff)); + RTUNICP ucFolded = RTUniCpToLower(uc); + if (uc != ucFolded && ucFolded >= 0x10000) /* we don't support shrinking the string */ + { + uc -= 0x10000; + *pwc++ = 0xd800 | (uc >> 10); + *pwc++ = 0xdc00 | (uc & 0x3ff); + } + } + else /* invalid encoding. */ + pwc++; + } + } + return pwsz; +} +RT_EXPORT_SYMBOL(RTUtf16ToLower); + + +RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz) +{ + PRTUTF16 pwc = pwsz; + for (;;) + { + RTUTF16 wc = *pwc; + if (!wc) + break; + if (wc < 0xd800 || wc >= 0xdc00) + *pwc++ = RTUniCpToUpper(wc); + else + { + /* surrogate */ + RTUTF16 wc2 = pwc[1]; + if (wc2 >= 0xdc00 && wc2 <= 0xdfff) + { + RTUNICP uc = 0x10000 + (((wc & 0x3ff) << 10) | (wc2 & 0x3ff)); + RTUNICP ucFolded = RTUniCpToUpper(uc); + if (uc != ucFolded && ucFolded >= 0x10000) /* we don't support shrinking the string */ + { + uc -= 0x10000; + *pwc++ = 0xd800 | (uc >> 10); + *pwc++ = 0xdc00 | (uc & 0x3ff); + } + } + else /* invalid encoding. */ + pwc++; + } + } + return pwsz; +} +RT_EXPORT_SYMBOL(RTUtf16ToUpper); + -- cgit v1.2.3