intl/unicharutil/util/nsUnicharUtils.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsUnicharUtils_h__
#define nsUnicharUtils_h__

#include "nsString.h"

/* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */
/* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables          */
#define IS_CJ_CHAR(u)                                                          \
  ((0x2e80u <= (u) && (u) <= 0x312fu) || (0x3190u <= (u) && (u) <= 0xabffu) || \
   (0xf900u <= (u) && (u) <= 0xfaffu) || (0xff00u <= (u) && (u) <= 0xffefu))

#define IS_ZERO_WIDTH_SPACE(u) ((u) == 0x200B)

#define IS_ASCII(u) ((u) < 0x80)
#define IS_ASCII_UPPER(u) (('A' <= (u)) && ((u) <= 'Z'))
#define IS_ASCII_LOWER(u) (('a' <= (u)) && ((u) <= 'z'))
#define IS_ASCII_ALPHA(u) (IS_ASCII_UPPER(u) || IS_ASCII_LOWER(u))
#define IS_ASCII_SPACE(u) (' ' == (u))

void ToLowerCase(nsAString& aString);
void ToLowerCaseASCII(nsAString& aString);
void ToUpperCase(nsAString& aString);

void ToLowerCase(const nsAString& aSource, nsAString& aDest);
void ToLowerCaseASCII(const nsAString& aSource, nsAString& aDest);
void ToUpperCase(const nsAString& aSource, nsAString& aDest);

uint32_t ToLowerCase(uint32_t aChar);
uint32_t ToUpperCase(uint32_t aChar);
uint32_t ToTitleCase(uint32_t aChar);

void ToLowerCase(const char16_t* aIn, char16_t* aOut, size_t aLen);
void ToLowerCaseASCII(const char16_t* aIn, char16_t* aOut, size_t aLen);
void ToUpperCase(const char16_t* aIn, char16_t* aOut, size_t aLen);

char ToLowerCaseASCII(const char aChar);
char16_t ToLowerCaseASCII(const char16_t aChar);
char32_t ToLowerCaseASCII(const char32_t aChar);

char ToUpperCaseASCII(const char aChar);
char16_t ToUpperCaseASCII(const char16_t aChar);
char32_t ToUpperCaseASCII(const char32_t aChar);

inline bool IsUpperCase(uint32_t c) { return ToLowerCase(c) != c; }

inline bool IsLowerCase(uint32_t c) { return ToUpperCase(c) != c; }

#ifdef MOZILLA_INTERNAL_API

uint32_t ToFoldedCase(uint32_t aChar);
void ToFoldedCase(nsAString& aString);
void ToFoldedCase(const char16_t* aIn, char16_t* aOut, size_t aLen);

uint32_t ToNaked(uint32_t aChar);
void ToNaked(nsAString& aString);

int32_t nsCaseInsensitiveStringComparator(const char16_t*, const char16_t*,
                                          size_t, size_t);

int32_t nsCaseInsensitiveUTF8StringComparator(const char*, const char*, size_t,
                                              size_t);

class nsCaseInsensitiveStringArrayComparator {
 public:
  template <class A, class B>
  bool Equals(const A& a, const B& b) const {
    return a.Equals(b, nsCaseInsensitiveStringComparator);
  }
};

int32_t nsASCIICaseInsensitiveStringComparator(const char16_t*, const char16_t*,
                                               size_t, size_t);

inline bool CaseInsensitiveFindInReadable(
    const nsAString& aPattern, nsAString::const_iterator& aSearchStart,
    nsAString::const_iterator& aSearchEnd) {
  return FindInReadable(aPattern, aSearchStart, aSearchEnd,
                        nsCaseInsensitiveStringComparator);
}

inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern,
                                          const nsAString& aHay) {
  nsAString::const_iterator searchBegin, searchEnd;
  return FindInReadable(aPattern, aHay.BeginReading(searchBegin),
                        aHay.EndReading(searchEnd),
                        nsCaseInsensitiveStringComparator);
}

#endif  // MOZILLA_INTERNAL_API

int32_t CaseInsensitiveCompare(const char16_t* a, const char16_t* b,
                               size_t len);

int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight,
                               size_t aLeftBytes, size_t aRightBytes);

/**
 * Calculates the lower-case of the codepoint of the UTF8 sequence starting at
 * aStr.  Sets aNext to the byte following the end of the sequence.
 *
 * If the sequence is invalid, or if computing the codepoint would take us off
 * the end of the string (as marked by aEnd), returns -1 and does not set
 * aNext.  Note that this function doesn't check that aStr < aEnd -- it assumes
 * you've done that already.
 */
uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd,
                               const char** aNext);

/**
 * This function determines whether the UTF-8 sequence pointed to by aLeft is
 * case insensitively equal to the UTF-8 sequence pointed to by aRight (or
 * optionally, case and diacritic insensitively equal), as defined by having
 * matching (naked) lower-cased codepoints.
 *
 * aLeftEnd marks the first memory location past aLeft that is not part of
 * aLeft; aRightEnd similarly marks the end of aRight.
 *
 * The function assumes that aLeft < aLeftEnd and aRight < aRightEnd.
 *
 * The function stores the addresses of the next characters in the sequence
 * into aLeftNext and aRightNext.  It's up to the caller to make sure that the
 * returned pointers are valid -- i.e. the function may return aLeftNext >=
 * aLeftEnd or aRightNext >= aRightEnd.
 *
 * If the function encounters invalid text, it sets aErr to true and returns
 * false, possibly leaving aLeftNext and aRightNext uninitialized.  If the
 * function returns true, aErr is guaranteed to be false and both aLeftNext and
 * aRightNext are guaranteed to be initialized.
 *
 * If aMatchDiacritics is false, the comparison is neither case-sensitive nor
 * diacritic-sensitive.
 */
bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
                                   const char* aLeftEnd, const char* aRightEnd,
                                   const char** aLeftNext,
                                   const char** aRightNext, bool* aErr,
                                   bool aMatchDiacritics = true);

namespace mozilla {

/**
 * Hash a UTF8 string as though it were a UTF16 string.
 *
 * The value returned is the same as if we converted the string to UTF16 and
 * then ran HashString() on the result.
 *
 * The given |length| is in bytes.
 */
uint32_t HashUTF8AsUTF16(const char* aUTF8, size_t aLength, bool* aErr);

bool IsSegmentBreakSkipChar(uint32_t u);

/**
 * Return true for all Punctuation categories (Unicode general category P?),
 * and also for Symbol categories (S?) except for Modifier Symbol, which is
 * kept together with any adjacent letter/number. (Bug 1066756)
 */
bool IsPunctuationForWordSelect(char16_t aCh);

}  // namespace mozilla

#endif /* nsUnicharUtils_h__ */