1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
|
diff --git a/src/csutil.cxx b/src/csutil.cxx
--- a/src/csutil.cxx
+++ b/src/csutil.cxx
@@ -90,21 +90,17 @@
#else
#ifndef MOZILLA_CLIENT
#include "utf_info.hxx"
#define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info)))
#endif
#endif
#ifdef MOZILLA_CLIENT
-#include "nsCOMPtr.h"
-#include "nsUnicharUtils.h"
-#include "mozilla/Encoding.h"
-
-using namespace mozilla;
+#include "mozHunspellRLBoxGlue.h"
#endif
struct unicode_info2 {
char cletter;
unsigned short cupper;
unsigned short clower;
};
@@ -2277,101 +2273,18 @@ struct cs_info* get_current_cs(const std
"error: unknown encoding %s: using %s as fallback\n", es.c_str(),
encds[0].enc_name);
ccs = encds[0].cs_table;
}
return ccs;
}
#else
-// XXX This function was rewritten for mozilla. Instead of storing the
-// conversion tables static in this file, create them when needed
-// with help the mozilla backend.
struct cs_info* get_current_cs(const std::string& es) {
- struct cs_info* ccs = new cs_info[256];
- // Initialze the array with dummy data so that we wouldn't need
- // to return null in case of failures.
- for (int i = 0; i <= 0xff; ++i) {
- ccs[i].ccase = false;
- ccs[i].clower = i;
- ccs[i].cupper = i;
- }
-
- auto encoding = Encoding::ForLabelNoReplacement(es);
- if (!encoding) {
- return ccs;
- }
- auto encoder = encoding->NewEncoder();
- auto decoder = encoding->NewDecoderWithoutBOMHandling();
-
- for (unsigned int i = 0; i <= 0xff; ++i) {
- bool success = false;
- // We want to find the upper/lowercase equivalents of each byte
- // in this 1-byte character encoding. Call our encoding/decoding
- // APIs separately for each byte since they may reject some of the
- // bytes, and we want to handle errors separately for each byte.
- uint8_t lower, upper;
- do {
- if (i == 0)
- break;
- uint8_t source = uint8_t(i);
- char16_t uni[2];
- char16_t uniCased;
- uint8_t destination[4];
- auto src1 = MakeSpan(&source, 1);
- auto dst1 = MakeSpan(uni);
- auto src2 = MakeSpan(&uniCased, 1);
- auto dst2 = MakeSpan(destination);
-
- uint32_t result;
- size_t read;
- size_t written;
- Tie(result, read, written) =
- decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true);
- if (result != kInputEmpty || read != 1 || written != 1) {
- break;
- }
-
- uniCased = ToLowerCase(uni[0]);
- Tie(result, read, written) =
- encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
- if (result != kInputEmpty || read != 1 || written != 1) {
- break;
- }
- lower = destination[0];
-
- uniCased = ToUpperCase(uni[0]);
- Tie(result, read, written) =
- encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
- if (result != kInputEmpty || read != 1 || written != 1) {
- break;
- }
- upper = destination[0];
-
- success = true;
- } while (0);
-
- encoding->NewEncoderInto(*encoder);
- encoding->NewDecoderWithoutBOMHandlingInto(*decoder);
-
- if (success) {
- ccs[i].cupper = upper;
- ccs[i].clower = lower;
- } else {
- ccs[i].cupper = i;
- ccs[i].clower = i;
- }
-
- if (ccs[i].clower != (unsigned char)i)
- ccs[i].ccase = true;
- else
- ccs[i].ccase = false;
- }
-
- return ccs;
+ return moz_hunspell_GetCurrentCS(es.c_str());
}
#endif
// primitive isalpha() replacement for tokenization
std::string get_casechars(const char* enc) {
struct cs_info* csconv = get_current_cs(enc);
std::string expw;
for (int i = 0; i <= 255; ++i) {
@@ -2455,34 +2368,34 @@ unsigned short unicodetoupper(unsigned s
// There are a dotless lower case i pair of upper `I',
// and an upper I with dot pair of lower `i'.
if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
return 0x0130;
#ifdef OPENOFFICEORG
return static_cast<unsigned short>(u_toupper(c));
#else
#ifdef MOZILLA_CLIENT
- return ToUpperCase((char16_t)c);
+ return moz_hunspell_ToUpperCase((char16_t)c);
#else
return (utf_tbl) ? utf_tbl[c].cupper : c;
#endif
#endif
}
unsigned short unicodetolower(unsigned short c, int langnum) {
// In Azeri and Turkish, I and i dictinct letters:
// There are a dotless lower case i pair of upper `I',
// and an upper I with dot pair of lower `i'.
if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
return 0x0131;
#ifdef OPENOFFICEORG
return static_cast<unsigned short>(u_tolower(c));
#else
#ifdef MOZILLA_CLIENT
- return ToLowerCase((char16_t)c);
+ return moz_hunspell_ToLowerCase((char16_t)c);
#else
return (utf_tbl) ? utf_tbl[c].clower : c;
#endif
#endif
}
int unicodeisalpha(unsigned short c) {
#ifdef OPENOFFICEORG
diff --git a/src/csutil.hxx b/src/csutil.hxx
--- a/src/csutil.hxx
+++ b/src/csutil.hxx
@@ -77,20 +77,16 @@
#include <fstream>
#include <string>
#include <vector>
#include <string.h>
#include "w_char.hxx"
#include "htypes.hxx"
-#ifdef MOZILLA_CLIENT
-#include "nscore.h" // for mozalloc headers
-#endif
-
// casing
#define NOCAP 0
#define INITCAP 1
#define ALLCAP 2
#define HUHCAP 3
#define HUHINITCAP 4
// default encoding and keystring
|