diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /extensions/spellcheck/hunspell/patches | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
3 files changed, 842 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/patches/bug1410214.patch b/extensions/spellcheck/hunspell/patches/bug1410214.patch new file mode 100644 index 0000000000..4db781b5e7 --- /dev/null +++ b/extensions/spellcheck/hunspell/patches/bug1410214.patch @@ -0,0 +1,37 @@ +diff --git a/extensions/spellcheck/hunspell/src/filemgr.hxx b/extensions/spellcheck/hunspell/src/filemgr.hxx +--- a/extensions/spellcheck/hunspell/src/filemgr.hxx ++++ b/extensions/spellcheck/hunspell/src/filemgr.hxx +@@ -67,32 +67,11 @@ + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + /* file manager class - read lines of files [filename] OR [filename.hz] */ + #ifndef FILEMGR_HXX_ + #define FILEMGR_HXX_ + +-#include "hunzip.hxx" +-#include <stdio.h> +-#include <string> +-#include <fstream> +- +-class FileMgr { +- private: +- FileMgr(const FileMgr&); +- FileMgr& operator=(const FileMgr&); ++#include "mozHunspellRLBoxSandbox.h" + +- protected: +- std::ifstream fin; +- Hunzip* hin; +- char in[BUFSIZE + 50]; // input buffer +- int fail(const char* err, const char* par); +- int linenum; +- +- public: +- FileMgr(const char* filename, const char* key = NULL); +- ~FileMgr(); +- bool getline(std::string&); +- int getlinenum(); +-}; + #endif diff --git a/extensions/spellcheck/hunspell/patches/bug1653659.patch b/extensions/spellcheck/hunspell/patches/bug1653659.patch new file mode 100644 index 0000000000..ab3e752015 --- /dev/null +++ b/extensions/spellcheck/hunspell/patches/bug1653659.patch @@ -0,0 +1,190 @@ +diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx +--- a/extensions/spellcheck/hunspell/src/csutil.cxx ++++ b/extensions/spellcheck/hunspell/src/csutil.cxx +@@ -90,21 +90,17 @@ + #else + #ifndef MOZILLA_CLIENT + #include "utf_info.hxx" + #define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info))) + #endif + #endif + + #ifdef MOZILLA_CLIENT +-#include "nsCOMPtr.h" +-#include "nsUnicharUtils.h" +-#include "mozilla/Encoding.h" +- +-using namespace mozilla; ++#include "mozHunspellRLBoxGlue.h" + #endif + + struct unicode_info2 { + char cletter; + unsigned short cupper; + unsigned short clower; + }; + +@@ -2277,101 +2273,18 @@ struct cs_info* get_current_cs(const std + "error: unknown encoding %s: using %s as fallback\n", es.c_str(), + encds[0].enc_name); + ccs = encds[0].cs_table; + } + + return ccs; + } + #else +-// XXX This function was rewritten for mozilla. Instead of storing the +-// conversion tables static in this file, create them when needed +-// with help the mozilla backend. + struct cs_info* get_current_cs(const std::string& es) { +- struct cs_info* ccs = new cs_info[256]; +- // Initialze the array with dummy data so that we wouldn't need +- // to return null in case of failures. +- for (int i = 0; i <= 0xff; ++i) { +- ccs[i].ccase = false; +- ccs[i].clower = i; +- ccs[i].cupper = i; +- } +- +- auto encoding = Encoding::ForLabelNoReplacement(es); +- if (!encoding) { +- return ccs; +- } +- auto encoder = encoding->NewEncoder(); +- auto decoder = encoding->NewDecoderWithoutBOMHandling(); +- +- for (unsigned int i = 0; i <= 0xff; ++i) { +- bool success = false; +- // We want to find the upper/lowercase equivalents of each byte +- // in this 1-byte character encoding. Call our encoding/decoding +- // APIs separately for each byte since they may reject some of the +- // bytes, and we want to handle errors separately for each byte. +- uint8_t lower, upper; +- do { +- if (i == 0) +- break; +- uint8_t source = uint8_t(i); +- char16_t uni[2]; +- char16_t uniCased; +- uint8_t destination[4]; +- auto src1 = MakeSpan(&source, 1); +- auto dst1 = MakeSpan(uni); +- auto src2 = MakeSpan(&uniCased, 1); +- auto dst2 = MakeSpan(destination); +- +- uint32_t result; +- size_t read; +- size_t written; +- Tie(result, read, written) = +- decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true); +- if (result != kInputEmpty || read != 1 || written != 1) { +- break; +- } +- +- uniCased = ToLowerCase(uni[0]); +- Tie(result, read, written) = +- encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); +- if (result != kInputEmpty || read != 1 || written != 1) { +- break; +- } +- lower = destination[0]; +- +- uniCased = ToUpperCase(uni[0]); +- Tie(result, read, written) = +- encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); +- if (result != kInputEmpty || read != 1 || written != 1) { +- break; +- } +- upper = destination[0]; +- +- success = true; +- } while (0); +- +- encoding->NewEncoderInto(*encoder); +- encoding->NewDecoderWithoutBOMHandlingInto(*decoder); +- +- if (success) { +- ccs[i].cupper = upper; +- ccs[i].clower = lower; +- } else { +- ccs[i].cupper = i; +- ccs[i].clower = i; +- } +- +- if (ccs[i].clower != (unsigned char)i) +- ccs[i].ccase = true; +- else +- ccs[i].ccase = false; +- } +- +- return ccs; ++ return moz_hunspell_GetCurrentCS(es.c_str()); + } + #endif + + // primitive isalpha() replacement for tokenization + std::string get_casechars(const char* enc) { + struct cs_info* csconv = get_current_cs(enc); + std::string expw; + for (int i = 0; i <= 255; ++i) { +@@ -2455,34 +2368,34 @@ unsigned short unicodetoupper(unsigned s + // There are a dotless lower case i pair of upper `I', + // and an upper I with dot pair of lower `i'. + if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) + return 0x0130; + #ifdef OPENOFFICEORG + return static_cast<unsigned short>(u_toupper(c)); + #else + #ifdef MOZILLA_CLIENT +- return ToUpperCase((char16_t)c); ++ return moz_hunspell_ToUpperCase((char16_t)c); + #else + return (utf_tbl) ? utf_tbl[c].cupper : c; + #endif + #endif + } + + unsigned short unicodetolower(unsigned short c, int langnum) { + // In Azeri and Turkish, I and i dictinct letters: + // There are a dotless lower case i pair of upper `I', + // and an upper I with dot pair of lower `i'. + if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) + return 0x0131; + #ifdef OPENOFFICEORG + return static_cast<unsigned short>(u_tolower(c)); + #else + #ifdef MOZILLA_CLIENT +- return ToLowerCase((char16_t)c); ++ return moz_hunspell_ToLowerCase((char16_t)c); + #else + return (utf_tbl) ? utf_tbl[c].clower : c; + #endif + #endif + } + + int unicodeisalpha(unsigned short c) { + #ifdef OPENOFFICEORG +diff --git a/extensions/spellcheck/hunspell/src/csutil.hxx b/extensions/spellcheck/hunspell/src/csutil.hxx +--- a/extensions/spellcheck/hunspell/src/csutil.hxx ++++ b/extensions/spellcheck/hunspell/src/csutil.hxx +@@ -77,20 +77,16 @@ + + #include <fstream> + #include <string> + #include <vector> + #include <string.h> + #include "w_char.hxx" + #include "htypes.hxx" + +-#ifdef MOZILLA_CLIENT +-#include "nscore.h" // for mozalloc headers +-#endif +- + // casing + #define NOCAP 0 + #define INITCAP 1 + #define ALLCAP 2 + #define HUHCAP 3 + #define HUHINITCAP 4 + + // default encoding and keystring diff --git a/extensions/spellcheck/hunspell/patches/bug1739761.patch b/extensions/spellcheck/hunspell/patches/bug1739761.patch new file mode 100644 index 0000000000..66ffefadb5 --- /dev/null +++ b/extensions/spellcheck/hunspell/patches/bug1739761.patch @@ -0,0 +1,615 @@ +diff --git a/extensions/spellcheck/hunspell/src/hashmgr.cxx b/extensions/spellcheck/hunspell/src/hashmgr.cxx +--- a/extensions/spellcheck/hunspell/src/hashmgr.cxx ++++ b/extensions/spellcheck/hunspell/src/hashmgr.cxx +@@ -63,16 +63,17 @@ + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + ++#include <assert.h> + #include <stdlib.h> + #include <string.h> + #include <stdio.h> + #include <ctype.h> + #include <limits> + #include <sstream> + + #include "hashmgr.hxx" +@@ -118,52 +119,54 @@ HashMgr::~HashMgr() { + // go through column by column of the table + for (int i = 0; i < tablesize; i++) { + struct hentry* pt = tableptr[i]; + struct hentry* nt = NULL; + while (pt) { + nt = pt->next; + if (pt->astr && + (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) +- free(pt->astr); +- free(pt); ++ arena_free(pt->astr); ++ arena_free(pt); + pt = nt; + } + } + free(tableptr); + } + tablesize = 0; + + if (aliasf) { + for (int j = 0; j < (numaliasf); j++) +- free(aliasf[j]); +- free(aliasf); ++ arena_free(aliasf[j]); ++ arena_free(aliasf); + aliasf = NULL; + if (aliasflen) { +- free(aliasflen); ++ arena_free(aliasflen); + aliasflen = NULL; + } + } + if (aliasm) { + for (int j = 0; j < (numaliasm); j++) +- free(aliasm[j]); +- free(aliasm); ++ arena_free(aliasm[j]); ++ arena_free(aliasm); + aliasm = NULL; + } + + #ifndef OPENOFFICEORG + #ifndef MOZILLA_CLIENT + if (utf8) + free_utf_tbl(); + #endif + #endif + + #ifdef MOZILLA_CLIENT + delete[] csconv; + #endif ++ ++ assert(outstanding_arena_allocations == 0); + } + + // lookup a root word in the hashtable + + struct hentry* HashMgr::lookup(const char* word) const { + struct hentry* dp; + if (tableptr) { + dp = tableptr[hash(word)]; +@@ -222,17 +225,17 @@ int HashMgr::add_word(const std::string& + + word = word_copy; + } + + bool upcasehomonym = false; + int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0; + // variable-length hash record with word and optional fields + struct hentry* hp = +- (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl); ++ (struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl); + if (!hp) { + delete desc_copy; + delete word_copy; + return 1; + } + + char* hpw = hp->word; + strcpy(hpw, word->c_str()); +@@ -366,57 +369,57 @@ int HashMgr::add_word(const std::string& + delete word_copy; + return 0; + } + while (dp->next != NULL) { + if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { +- free(dp->astr); ++ arena_free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; +- free(hp); ++ arena_free(hp); + delete desc_copy; + delete word_copy; + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + dp = dp->next; + } + if (strcmp(hp->word, dp->word) == 0) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { +- free(dp->astr); ++ arena_free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; +- free(hp); ++ arena_free(hp); + delete desc_copy; + delete word_copy; + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + if (!upcasehomonym) { + dp->next = hp; + } else { + // remove hidden onlyupcase homonym + if (hp->astr) +- free(hp->astr); +- free(hp); ++ arena_free(hp->astr); ++ arena_free(hp); + } + + delete desc_copy; + delete word_copy; + return 0; + } + + int HashMgr::add_hidden_capitalized_word(const std::string& word, +@@ -430,17 +433,17 @@ int HashMgr::add_hidden_capitalized_word + + // add inner capitalized forms to handle the following allcap forms: + // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG + // Allcaps with suffixes: CIA's -> CIA'S + if (((captype == HUHCAP) || (captype == HUHINITCAP) || + ((captype == ALLCAP) && (flagslen != 0))) && + !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { + unsigned short* flags2 = +- (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1)); ++ (unsigned short*)arena_alloc(sizeof(unsigned short) * (flagslen + 1)); + if (!flags2) + return 1; + if (flagslen) + memcpy(flags2, flags, flagslen * sizeof(unsigned short)); + flags2[flagslen] = ONLYUPCASEFLAG; + if (utf8) { + std::string st; + std::vector<w_char> w; +@@ -479,23 +482,23 @@ int HashMgr::get_clen_and_captype(const + } + + // remove word (personal dictionary function for standalone applications) + int HashMgr::remove(const std::string& word) { + struct hentry* dp = lookup(word.c_str()); + while (dp) { + if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { + unsigned short* flags = +- (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1)); ++ (unsigned short*)arena_alloc(sizeof(unsigned short) * (dp->alen + 1)); + if (!flags) + return 1; + for (int i = 0; i < dp->alen; i++) + flags[i] = dp->astr[i]; + flags[dp->alen] = forbiddenword; +- free(dp->astr); ++ arena_free(dp->astr); + dp->astr = flags; + dp->alen++; + std::sort(flags, flags + dp->alen); + } + dp = dp->next_homonym; + } + return 0; + } +@@ -533,17 +536,17 @@ int HashMgr::add_with_affix(const std::s + remove_forbidden_flag(word); + if (dp && dp->astr) { + int captype; + int wcl = get_clen_and_captype(word, &captype); + if (aliasf) { + add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype); + } else { + unsigned short* flags = +- (unsigned short*)malloc(dp->alen * sizeof(unsigned short)); ++ (unsigned short*) arena_alloc(dp->alen * sizeof(unsigned short)); + if (flags) { + memcpy((void*)flags, (void*)dp->astr, + dp->alen * sizeof(unsigned short)); + add_word(word, wcl, flags, dp->alen, NULL, false, captype); + } else + return 1; + } + return add_hidden_capitalized_word(word, wcl, dp->astr, +@@ -668,17 +671,17 @@ int HashMgr::load_tables(const char* tpa + if (aliasf) { + int index = atoi(ap.c_str()); + al = get_aliasf(index, &flags, dict); + if (!al) { + HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", + dict->getlinenum()); + } + } else { +- al = decode_flags(&flags, ap.c_str(), dict); ++ al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true); + if (al == -1) { + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); + delete dict; + return 6; + } + std::sort(flags, flags + al); + } + } else { +@@ -709,47 +712,48 @@ int HashMgr::hash(const char* word) cons + hv = (hv << 8) | (*word++); + while (*word != 0) { + ROTATE(hv, ROTATE_LEN); + hv ^= (*word++); + } + return (unsigned long)hv % tablesize; + } + +-int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const { ++int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const { ++ auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); }; + int len; + if (flags.empty()) { + *result = NULL; + return 0; + } + switch (flag_mode) { + case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) + len = flags.size(); + if (len % 2 == 1) + HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", + af->getlinenum()); + len /= 2; +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + for (int i = 0; i < len; i++) { + (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) + + (unsigned char)flags[i * 2 + 1]; + } + break; + } + case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 + // 23 233) + len = 1; + unsigned short* dest; + for (size_t i = 0; i < flags.size(); ++i) { + if (flags[i] == ',') + len++; + } +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + const char* src = flags.c_str(); + for (const char* p = src; *p; p++) { + if (*p == ',') { + int i = atoi(src); + if (i >= DEFAULTFLAGS) +@@ -774,26 +778,26 @@ int HashMgr::decode_flags(unsigned short + HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", + af->getlinenum()); + break; + } + case FLAG_UNI: { // UTF-8 characters + std::vector<w_char> w; + u8_u16(w, flags); + len = w.size(); +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + memcpy(*result, w.data(), len * sizeof(short)); + break; + } + default: { // Ispell's one-character flags (erfg -> e r f g) + unsigned short* dest; + len = flags.size(); +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + for (size_t i = 0; i < flags.size(); ++i) { + *dest = (unsigned char)flags[i]; + dest++; + } + } +@@ -890,16 +894,18 @@ unsigned short HashMgr::decode_flag(cons + default: + s = *(unsigned char*)f; + } + if (s == 0) + HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); + return s; + } + ++// This function is only called by external consumers, and so using the default ++// allocator with mystrdup is correct. + char* HashMgr::encode_flag(unsigned short f) const { + if (f == 0) + return mystrdup("(NULL)"); + std::string ch; + if (flag_mode == FLAG_LONG) { + ch.push_back((unsigned char)(f >> 8)); + ch.push_back((unsigned char)(f - ((f >> 8) << 8))); + } else if (flag_mode == FLAG_NUM) { +@@ -1070,42 +1076,42 @@ bool HashMgr::parse_aliasf(const std::st + numaliasf = 0; + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + aliasf = +- (unsigned short**)malloc(numaliasf * sizeof(unsigned short*)); ++ (unsigned short**)arena_alloc(numaliasf * sizeof(unsigned short*)); + aliasflen = +- (unsigned short*)malloc(numaliasf * sizeof(unsigned short)); ++ (unsigned short*)arena_alloc(numaliasf * sizeof(unsigned short)); + if (!aliasf || !aliasflen) { + numaliasf = 0; + if (aliasf) +- free(aliasf); ++ arena_free(aliasf); + if (aliasflen) +- free(aliasflen); ++ arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + return false; + } + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + numaliasf = 0; +- free(aliasf); +- free(aliasflen); ++ arena_free(aliasf); ++ arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numaliasf lines to read in the remainder of the table */ +@@ -1126,33 +1132,33 @@ bool HashMgr::parse_aliasf(const std::st + errored = true; + break; + } + break; + } + case 1: { + std::string piece(start_piece, iter); + aliasflen[j] = +- (unsigned short)decode_flags(&(aliasf[j]), piece, af); ++ (unsigned short)decode_flags(&(aliasf[j]), piece, af, /* arena = */ true); + std::sort(aliasf[j], aliasf[j] + aliasflen[j]); + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + if (!aliasf[j]) { + for (int k = 0; k < j; ++k) { +- free(aliasf[k]); ++ arena_free(aliasf[k]); + } +- free(aliasf); +- free(aliasflen); ++ arena_free(aliasf); ++ arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + numaliasf = 0; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } +@@ -1193,33 +1199,33 @@ bool HashMgr::parse_aliasm(const std::st + } + case 1: { + numaliasm = atoi(std::string(start_piece, iter).c_str()); + if (numaliasm < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } +- aliasm = (char**)malloc(numaliasm * sizeof(char*)); ++ aliasm = (char**)arena_alloc(numaliasm * sizeof(char*)); + if (!aliasm) { + numaliasm = 0; + return false; + } + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + numaliasm = 0; +- free(aliasm); ++ arena_free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numaliasm lines to read in the remainder of the table */ + for (int j = 0; j < numaliasm; j++) { +@@ -1245,32 +1251,36 @@ bool HashMgr::parse_aliasm(const std::st + std::string::const_iterator end = nl.end(); + std::string chunk(start_piece, end); + if (complexprefixes) { + if (utf8) + reverseword_utf(chunk); + else + reverseword(chunk); + } +- aliasm[j] = mystrdup(chunk.c_str()); ++ size_t sl = chunk.length() + 1; ++ aliasm[j] = (char*)arena_alloc(sl); ++ if (aliasm[j]) { ++ memcpy(aliasm[j], chunk.c_str(), sl); ++ } + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + if (!aliasm[j]) { + numaliasm = 0; + for (int k = 0; k < j; ++k) { +- free(aliasm[k]); ++ arena_free(aliasm[k]); + } +- free(aliasm); ++ arena_free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } + return true; + } +@@ -1379,8 +1389,27 @@ bool HashMgr::parse_reptable(const std:: + } + return true; + } + + // return replacing table + const std::vector<replentry>& HashMgr::get_reptable() const { + return reptable; + } ++ ++void* HashMgr::arena_alloc(int num_bytes) { ++ static const int MIN_CHUNK_SIZE = 4096; ++ if (arena.empty() || (current_chunk_size - current_chunk_offset < num_bytes)) { ++ current_chunk_size = std::max(MIN_CHUNK_SIZE, num_bytes); ++ arena.push_back(std::make_unique<uint8_t[]>(current_chunk_size)); ++ current_chunk_offset = 0; ++ } ++ ++ uint8_t* ptr = &arena.back()[current_chunk_offset]; ++ current_chunk_offset += num_bytes; ++ outstanding_arena_allocations++; ++ return ptr; ++} ++ ++void HashMgr::arena_free(void* ptr) { ++ --outstanding_arena_allocations; ++ assert(outstanding_arena_allocations >= 0); ++} +diff --git a/extensions/spellcheck/hunspell/src/hashmgr.hxx b/extensions/spellcheck/hunspell/src/hashmgr.hxx +--- a/extensions/spellcheck/hunspell/src/hashmgr.hxx ++++ b/extensions/spellcheck/hunspell/src/hashmgr.hxx +@@ -67,16 +67,18 @@ + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + #ifndef HASHMGR_HXX_ + #define HASHMGR_HXX_ + + #include <stdio.h> ++#include <stdint.h> ++#include <memory> + #include <string> + #include <vector> + + #include "htypes.hxx" + #include "filemgr.hxx" + #include "w_char.hxx" + + enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; +@@ -116,17 +118,23 @@ class HashMgr { + + struct hentry* lookup(const char*) const; + int hash(const char*) const; + struct hentry* walk_hashtable(int& col, struct hentry* hp) const; + + int add(const std::string& word); + int add_with_affix(const std::string& word, const std::string& pattern); + int remove(const std::string& word); +- int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const; ++private: ++ // Only internal consumers are allowed to arena-allocate. ++ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const; ++public: ++ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const { ++ return decode_flags(result, flags, af, /* arena = */ false); ++ } + bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const; + unsigned short decode_flag(const char* flag) const; + char* encode_flag(unsigned short flag) const; + int is_aliasf() const; + int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const; + int is_aliasm() const; + char* get_aliasm(int index) const; + const std::vector<replentry>& get_reptable() const; +@@ -148,11 +156,27 @@ class HashMgr { + int wcl, + unsigned short* flags, + int al, + const std::string* dp, + int captype); + bool parse_aliasm(const std::string& line, FileMgr* af); + bool parse_reptable(const std::string& line, FileMgr* af); + int remove_forbidden_flag(const std::string& word); ++ ++ // Our Mozilla fork uses a simple arena allocator for certain strings which ++ // persist for the lifetime of the HashMgr in order to avoid heap fragmentation. ++ // It's a simple bump-allocator, so we can't actually free() memory midway ++ // through the lifecycle, but we have a dummy free() implementation to ensure ++ // that our calls to arena_alloc() and arena_free() are balanced. ++ void* arena_alloc(int num_bytes); ++ void* arena_alloc(int num_bytes) const { ++ return const_cast<HashMgr*>(this)->arena_alloc(num_bytes); ++ } ++ void arena_free(void* ptr); ++ ++ std::vector<std::unique_ptr<uint8_t[]>> arena; ++ int current_chunk_size = 0; ++ int current_chunk_offset = 0; ++ int outstanding_arena_allocations = 0; + }; + + #endif |