diff options
Diffstat (limited to 'intl/unicharutil/util/nsUnicharUtils.h')
-rw-r--r-- | intl/unicharutil/util/nsUnicharUtils.h | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/intl/unicharutil/util/nsUnicharUtils.h b/intl/unicharutil/util/nsUnicharUtils.h new file mode 100644 index 0000000000..693e192e41 --- /dev/null +++ b/intl/unicharutil/util/nsUnicharUtils.h @@ -0,0 +1,167 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUnicharUtils_h__ +#define nsUnicharUtils_h__ + +#include "nsString.h" + +/* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */ +/* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables */ +#define IS_CJ_CHAR(u) \ + ((0x2e80u <= (u) && (u) <= 0x312fu) || (0x3190u <= (u) && (u) <= 0xabffu) || \ + (0xf900u <= (u) && (u) <= 0xfaffu) || (0xff00u <= (u) && (u) <= 0xffefu)) + +#define IS_ZERO_WIDTH_SPACE(u) ((u) == 0x200B) + +#define IS_ASCII(u) ((u) < 0x80) +#define IS_ASCII_UPPER(u) (('A' <= (u)) && ((u) <= 'Z')) +#define IS_ASCII_LOWER(u) (('a' <= (u)) && ((u) <= 'z')) +#define IS_ASCII_ALPHA(u) (IS_ASCII_UPPER(u) || IS_ASCII_LOWER(u)) +#define IS_ASCII_SPACE(u) (' ' == (u)) + +void ToLowerCase(nsAString& aString); +void ToLowerCaseASCII(nsAString& aString); +void ToUpperCase(nsAString& aString); + +void ToLowerCase(const nsAString& aSource, nsAString& aDest); +void ToLowerCaseASCII(const nsAString& aSource, nsAString& aDest); +void ToUpperCase(const nsAString& aSource, nsAString& aDest); + +uint32_t ToLowerCase(uint32_t aChar); +uint32_t ToUpperCase(uint32_t aChar); +uint32_t ToTitleCase(uint32_t aChar); + +void ToLowerCase(const char16_t* aIn, char16_t* aOut, size_t aLen); +void ToLowerCaseASCII(const char16_t* aIn, char16_t* aOut, size_t aLen); +void ToUpperCase(const char16_t* aIn, char16_t* aOut, size_t aLen); + +char ToLowerCaseASCII(const char aChar); +char16_t ToLowerCaseASCII(const char16_t aChar); +char32_t ToLowerCaseASCII(const char32_t aChar); + +char ToUpperCaseASCII(const char aChar); +char16_t ToUpperCaseASCII(const char16_t aChar); +char32_t ToUpperCaseASCII(const char32_t aChar); + +inline bool IsUpperCase(uint32_t c) { return ToLowerCase(c) != c; } + +inline bool IsLowerCase(uint32_t c) { return ToUpperCase(c) != c; } + +#ifdef MOZILLA_INTERNAL_API + +uint32_t ToFoldedCase(uint32_t aChar); +void ToFoldedCase(nsAString& aString); +void ToFoldedCase(const char16_t* aIn, char16_t* aOut, size_t aLen); + +uint32_t ToNaked(uint32_t aChar); +void ToNaked(nsAString& aString); + +int32_t nsCaseInsensitiveStringComparator(const char16_t*, const char16_t*, + size_t, size_t); + +int32_t nsCaseInsensitiveUTF8StringComparator(const char*, const char*, size_t, + size_t); + +class nsCaseInsensitiveStringArrayComparator { + public: + template <class A, class B> + bool Equals(const A& a, const B& b) const { + return a.Equals(b, nsCaseInsensitiveStringComparator); + } +}; + +int32_t nsASCIICaseInsensitiveStringComparator(const char16_t*, const char16_t*, + size_t, size_t); + +inline bool CaseInsensitiveFindInReadable( + const nsAString& aPattern, nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd) { + return FindInReadable(aPattern, aSearchStart, aSearchEnd, + nsCaseInsensitiveStringComparator); +} + +inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern, + const nsAString& aHay) { + nsAString::const_iterator searchBegin, searchEnd; + return FindInReadable(aPattern, aHay.BeginReading(searchBegin), + aHay.EndReading(searchEnd), + nsCaseInsensitiveStringComparator); +} + +#endif // MOZILLA_INTERNAL_API + +int32_t CaseInsensitiveCompare(const char16_t* a, const char16_t* b, + size_t len); + +int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight, + size_t aLeftBytes, size_t aRightBytes); + +/** + * Calculates the lower-case of the codepoint of the UTF8 sequence starting at + * aStr. Sets aNext to the byte following the end of the sequence. + * + * If the sequence is invalid, or if computing the codepoint would take us off + * the end of the string (as marked by aEnd), returns -1 and does not set + * aNext. Note that this function doesn't check that aStr < aEnd -- it assumes + * you've done that already. + */ +uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd, + const char** aNext); + +/** + * This function determines whether the UTF-8 sequence pointed to by aLeft is + * case insensitively equal to the UTF-8 sequence pointed to by aRight (or + * optionally, case and diacritic insensitively equal), as defined by having + * matching (naked) lower-cased codepoints. + * + * aLeftEnd marks the first memory location past aLeft that is not part of + * aLeft; aRightEnd similarly marks the end of aRight. + * + * The function assumes that aLeft < aLeftEnd and aRight < aRightEnd. + * + * The function stores the addresses of the next characters in the sequence + * into aLeftNext and aRightNext. It's up to the caller to make sure that the + * returned pointers are valid -- i.e. the function may return aLeftNext >= + * aLeftEnd or aRightNext >= aRightEnd. + * + * If the function encounters invalid text, it sets aErr to true and returns + * false, possibly leaving aLeftNext and aRightNext uninitialized. If the + * function returns true, aErr is guaranteed to be false and both aLeftNext and + * aRightNext are guaranteed to be initialized. + * + * If aMatchDiacritics is false, the comparison is neither case-sensitive nor + * diacritic-sensitive. + */ +bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight, + const char* aLeftEnd, const char* aRightEnd, + const char** aLeftNext, + const char** aRightNext, bool* aErr, + bool aMatchDiacritics = true); + +namespace mozilla { + +/** + * Hash a UTF8 string as though it were a UTF16 string. + * + * The value returned is the same as if we converted the string to UTF16 and + * then ran HashString() on the result. + * + * The given |length| is in bytes. + */ +uint32_t HashUTF8AsUTF16(const char* aUTF8, size_t aLength, bool* aErr); + +bool IsSegmentBreakSkipChar(uint32_t u); + +/** + * Return true for all Punctuation categories (Unicode general category P?), + * and also for Symbol categories (S?) except for Modifier Symbol, which is + * kept together with any adjacent letter/number. (Bug 1066756) + */ +bool IsPunctuationForWordSelect(char16_t aCh); + +} // namespace mozilla + +#endif /* nsUnicharUtils_h__ */ |