diff options
Diffstat (limited to 'extensions/spellcheck/hunspell/patches/bug1739761.patch')
-rw-r--r-- | extensions/spellcheck/hunspell/patches/bug1739761.patch | 615 |
1 files changed, 615 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/patches/bug1739761.patch b/extensions/spellcheck/hunspell/patches/bug1739761.patch new file mode 100644 index 0000000000..66ffefadb5 --- /dev/null +++ b/extensions/spellcheck/hunspell/patches/bug1739761.patch @@ -0,0 +1,615 @@ +diff --git a/extensions/spellcheck/hunspell/src/hashmgr.cxx b/extensions/spellcheck/hunspell/src/hashmgr.cxx +--- a/extensions/spellcheck/hunspell/src/hashmgr.cxx ++++ b/extensions/spellcheck/hunspell/src/hashmgr.cxx +@@ -63,16 +63,17 @@ + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + ++#include <assert.h> + #include <stdlib.h> + #include <string.h> + #include <stdio.h> + #include <ctype.h> + #include <limits> + #include <sstream> + + #include "hashmgr.hxx" +@@ -118,52 +119,54 @@ HashMgr::~HashMgr() { + // go through column by column of the table + for (int i = 0; i < tablesize; i++) { + struct hentry* pt = tableptr[i]; + struct hentry* nt = NULL; + while (pt) { + nt = pt->next; + if (pt->astr && + (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) +- free(pt->astr); +- free(pt); ++ arena_free(pt->astr); ++ arena_free(pt); + pt = nt; + } + } + free(tableptr); + } + tablesize = 0; + + if (aliasf) { + for (int j = 0; j < (numaliasf); j++) +- free(aliasf[j]); +- free(aliasf); ++ arena_free(aliasf[j]); ++ arena_free(aliasf); + aliasf = NULL; + if (aliasflen) { +- free(aliasflen); ++ arena_free(aliasflen); + aliasflen = NULL; + } + } + if (aliasm) { + for (int j = 0; j < (numaliasm); j++) +- free(aliasm[j]); +- free(aliasm); ++ arena_free(aliasm[j]); ++ arena_free(aliasm); + aliasm = NULL; + } + + #ifndef OPENOFFICEORG + #ifndef MOZILLA_CLIENT + if (utf8) + free_utf_tbl(); + #endif + #endif + + #ifdef MOZILLA_CLIENT + delete[] csconv; + #endif ++ ++ assert(outstanding_arena_allocations == 0); + } + + // lookup a root word in the hashtable + + struct hentry* HashMgr::lookup(const char* word) const { + struct hentry* dp; + if (tableptr) { + dp = tableptr[hash(word)]; +@@ -222,17 +225,17 @@ int HashMgr::add_word(const std::string& + + word = word_copy; + } + + bool upcasehomonym = false; + int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0; + // variable-length hash record with word and optional fields + struct hentry* hp = +- (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl); ++ (struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl); + if (!hp) { + delete desc_copy; + delete word_copy; + return 1; + } + + char* hpw = hp->word; + strcpy(hpw, word->c_str()); +@@ -366,57 +369,57 @@ int HashMgr::add_word(const std::string& + delete word_copy; + return 0; + } + while (dp->next != NULL) { + if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { +- free(dp->astr); ++ arena_free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; +- free(hp); ++ arena_free(hp); + delete desc_copy; + delete word_copy; + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + dp = dp->next; + } + if (strcmp(hp->word, dp->word) == 0) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { +- free(dp->astr); ++ arena_free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; +- free(hp); ++ arena_free(hp); + delete desc_copy; + delete word_copy; + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + if (!upcasehomonym) { + dp->next = hp; + } else { + // remove hidden onlyupcase homonym + if (hp->astr) +- free(hp->astr); +- free(hp); ++ arena_free(hp->astr); ++ arena_free(hp); + } + + delete desc_copy; + delete word_copy; + return 0; + } + + int HashMgr::add_hidden_capitalized_word(const std::string& word, +@@ -430,17 +433,17 @@ int HashMgr::add_hidden_capitalized_word + + // add inner capitalized forms to handle the following allcap forms: + // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG + // Allcaps with suffixes: CIA's -> CIA'S + if (((captype == HUHCAP) || (captype == HUHINITCAP) || + ((captype == ALLCAP) && (flagslen != 0))) && + !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { + unsigned short* flags2 = +- (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1)); ++ (unsigned short*)arena_alloc(sizeof(unsigned short) * (flagslen + 1)); + if (!flags2) + return 1; + if (flagslen) + memcpy(flags2, flags, flagslen * sizeof(unsigned short)); + flags2[flagslen] = ONLYUPCASEFLAG; + if (utf8) { + std::string st; + std::vector<w_char> w; +@@ -479,23 +482,23 @@ int HashMgr::get_clen_and_captype(const + } + + // remove word (personal dictionary function for standalone applications) + int HashMgr::remove(const std::string& word) { + struct hentry* dp = lookup(word.c_str()); + while (dp) { + if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { + unsigned short* flags = +- (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1)); ++ (unsigned short*)arena_alloc(sizeof(unsigned short) * (dp->alen + 1)); + if (!flags) + return 1; + for (int i = 0; i < dp->alen; i++) + flags[i] = dp->astr[i]; + flags[dp->alen] = forbiddenword; +- free(dp->astr); ++ arena_free(dp->astr); + dp->astr = flags; + dp->alen++; + std::sort(flags, flags + dp->alen); + } + dp = dp->next_homonym; + } + return 0; + } +@@ -533,17 +536,17 @@ int HashMgr::add_with_affix(const std::s + remove_forbidden_flag(word); + if (dp && dp->astr) { + int captype; + int wcl = get_clen_and_captype(word, &captype); + if (aliasf) { + add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype); + } else { + unsigned short* flags = +- (unsigned short*)malloc(dp->alen * sizeof(unsigned short)); ++ (unsigned short*) arena_alloc(dp->alen * sizeof(unsigned short)); + if (flags) { + memcpy((void*)flags, (void*)dp->astr, + dp->alen * sizeof(unsigned short)); + add_word(word, wcl, flags, dp->alen, NULL, false, captype); + } else + return 1; + } + return add_hidden_capitalized_word(word, wcl, dp->astr, +@@ -668,17 +671,17 @@ int HashMgr::load_tables(const char* tpa + if (aliasf) { + int index = atoi(ap.c_str()); + al = get_aliasf(index, &flags, dict); + if (!al) { + HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", + dict->getlinenum()); + } + } else { +- al = decode_flags(&flags, ap.c_str(), dict); ++ al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true); + if (al == -1) { + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); + delete dict; + return 6; + } + std::sort(flags, flags + al); + } + } else { +@@ -709,47 +712,48 @@ int HashMgr::hash(const char* word) cons + hv = (hv << 8) | (*word++); + while (*word != 0) { + ROTATE(hv, ROTATE_LEN); + hv ^= (*word++); + } + return (unsigned long)hv % tablesize; + } + +-int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const { ++int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const { ++ auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); }; + int len; + if (flags.empty()) { + *result = NULL; + return 0; + } + switch (flag_mode) { + case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) + len = flags.size(); + if (len % 2 == 1) + HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", + af->getlinenum()); + len /= 2; +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + for (int i = 0; i < len; i++) { + (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) + + (unsigned char)flags[i * 2 + 1]; + } + break; + } + case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 + // 23 233) + len = 1; + unsigned short* dest; + for (size_t i = 0; i < flags.size(); ++i) { + if (flags[i] == ',') + len++; + } +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + const char* src = flags.c_str(); + for (const char* p = src; *p; p++) { + if (*p == ',') { + int i = atoi(src); + if (i >= DEFAULTFLAGS) +@@ -774,26 +778,26 @@ int HashMgr::decode_flags(unsigned short + HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", + af->getlinenum()); + break; + } + case FLAG_UNI: { // UTF-8 characters + std::vector<w_char> w; + u8_u16(w, flags); + len = w.size(); +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + memcpy(*result, w.data(), len * sizeof(short)); + break; + } + default: { // Ispell's one-character flags (erfg -> e r f g) + unsigned short* dest; + len = flags.size(); +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + for (size_t i = 0; i < flags.size(); ++i) { + *dest = (unsigned char)flags[i]; + dest++; + } + } +@@ -890,16 +894,18 @@ unsigned short HashMgr::decode_flag(cons + default: + s = *(unsigned char*)f; + } + if (s == 0) + HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); + return s; + } + ++// This function is only called by external consumers, and so using the default ++// allocator with mystrdup is correct. + char* HashMgr::encode_flag(unsigned short f) const { + if (f == 0) + return mystrdup("(NULL)"); + std::string ch; + if (flag_mode == FLAG_LONG) { + ch.push_back((unsigned char)(f >> 8)); + ch.push_back((unsigned char)(f - ((f >> 8) << 8))); + } else if (flag_mode == FLAG_NUM) { +@@ -1070,42 +1076,42 @@ bool HashMgr::parse_aliasf(const std::st + numaliasf = 0; + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + aliasf = +- (unsigned short**)malloc(numaliasf * sizeof(unsigned short*)); ++ (unsigned short**)arena_alloc(numaliasf * sizeof(unsigned short*)); + aliasflen = +- (unsigned short*)malloc(numaliasf * sizeof(unsigned short)); ++ (unsigned short*)arena_alloc(numaliasf * sizeof(unsigned short)); + if (!aliasf || !aliasflen) { + numaliasf = 0; + if (aliasf) +- free(aliasf); ++ arena_free(aliasf); + if (aliasflen) +- free(aliasflen); ++ arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + return false; + } + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + numaliasf = 0; +- free(aliasf); +- free(aliasflen); ++ arena_free(aliasf); ++ arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numaliasf lines to read in the remainder of the table */ +@@ -1126,33 +1132,33 @@ bool HashMgr::parse_aliasf(const std::st + errored = true; + break; + } + break; + } + case 1: { + std::string piece(start_piece, iter); + aliasflen[j] = +- (unsigned short)decode_flags(&(aliasf[j]), piece, af); ++ (unsigned short)decode_flags(&(aliasf[j]), piece, af, /* arena = */ true); + std::sort(aliasf[j], aliasf[j] + aliasflen[j]); + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + if (!aliasf[j]) { + for (int k = 0; k < j; ++k) { +- free(aliasf[k]); ++ arena_free(aliasf[k]); + } +- free(aliasf); +- free(aliasflen); ++ arena_free(aliasf); ++ arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + numaliasf = 0; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } +@@ -1193,33 +1199,33 @@ bool HashMgr::parse_aliasm(const std::st + } + case 1: { + numaliasm = atoi(std::string(start_piece, iter).c_str()); + if (numaliasm < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } +- aliasm = (char**)malloc(numaliasm * sizeof(char*)); ++ aliasm = (char**)arena_alloc(numaliasm * sizeof(char*)); + if (!aliasm) { + numaliasm = 0; + return false; + } + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + numaliasm = 0; +- free(aliasm); ++ arena_free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numaliasm lines to read in the remainder of the table */ + for (int j = 0; j < numaliasm; j++) { +@@ -1245,32 +1251,36 @@ bool HashMgr::parse_aliasm(const std::st + std::string::const_iterator end = nl.end(); + std::string chunk(start_piece, end); + if (complexprefixes) { + if (utf8) + reverseword_utf(chunk); + else + reverseword(chunk); + } +- aliasm[j] = mystrdup(chunk.c_str()); ++ size_t sl = chunk.length() + 1; ++ aliasm[j] = (char*)arena_alloc(sl); ++ if (aliasm[j]) { ++ memcpy(aliasm[j], chunk.c_str(), sl); ++ } + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + if (!aliasm[j]) { + numaliasm = 0; + for (int k = 0; k < j; ++k) { +- free(aliasm[k]); ++ arena_free(aliasm[k]); + } +- free(aliasm); ++ arena_free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } + return true; + } +@@ -1379,8 +1389,27 @@ bool HashMgr::parse_reptable(const std:: + } + return true; + } + + // return replacing table + const std::vector<replentry>& HashMgr::get_reptable() const { + return reptable; + } ++ ++void* HashMgr::arena_alloc(int num_bytes) { ++ static const int MIN_CHUNK_SIZE = 4096; ++ if (arena.empty() || (current_chunk_size - current_chunk_offset < num_bytes)) { ++ current_chunk_size = std::max(MIN_CHUNK_SIZE, num_bytes); ++ arena.push_back(std::make_unique<uint8_t[]>(current_chunk_size)); ++ current_chunk_offset = 0; ++ } ++ ++ uint8_t* ptr = &arena.back()[current_chunk_offset]; ++ current_chunk_offset += num_bytes; ++ outstanding_arena_allocations++; ++ return ptr; ++} ++ ++void HashMgr::arena_free(void* ptr) { ++ --outstanding_arena_allocations; ++ assert(outstanding_arena_allocations >= 0); ++} +diff --git a/extensions/spellcheck/hunspell/src/hashmgr.hxx b/extensions/spellcheck/hunspell/src/hashmgr.hxx +--- a/extensions/spellcheck/hunspell/src/hashmgr.hxx ++++ b/extensions/spellcheck/hunspell/src/hashmgr.hxx +@@ -67,16 +67,18 @@ + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + #ifndef HASHMGR_HXX_ + #define HASHMGR_HXX_ + + #include <stdio.h> ++#include <stdint.h> ++#include <memory> + #include <string> + #include <vector> + + #include "htypes.hxx" + #include "filemgr.hxx" + #include "w_char.hxx" + + enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; +@@ -116,17 +118,23 @@ class HashMgr { + + struct hentry* lookup(const char*) const; + int hash(const char*) const; + struct hentry* walk_hashtable(int& col, struct hentry* hp) const; + + int add(const std::string& word); + int add_with_affix(const std::string& word, const std::string& pattern); + int remove(const std::string& word); +- int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const; ++private: ++ // Only internal consumers are allowed to arena-allocate. ++ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const; ++public: ++ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const { ++ return decode_flags(result, flags, af, /* arena = */ false); ++ } + bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const; + unsigned short decode_flag(const char* flag) const; + char* encode_flag(unsigned short flag) const; + int is_aliasf() const; + int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const; + int is_aliasm() const; + char* get_aliasm(int index) const; + const std::vector<replentry>& get_reptable() const; +@@ -148,11 +156,27 @@ class HashMgr { + int wcl, + unsigned short* flags, + int al, + const std::string* dp, + int captype); + bool parse_aliasm(const std::string& line, FileMgr* af); + bool parse_reptable(const std::string& line, FileMgr* af); + int remove_forbidden_flag(const std::string& word); ++ ++ // Our Mozilla fork uses a simple arena allocator for certain strings which ++ // persist for the lifetime of the HashMgr in order to avoid heap fragmentation. ++ // It's a simple bump-allocator, so we can't actually free() memory midway ++ // through the lifecycle, but we have a dummy free() implementation to ensure ++ // that our calls to arena_alloc() and arena_free() are balanced. ++ void* arena_alloc(int num_bytes); ++ void* arena_alloc(int num_bytes) const { ++ return const_cast<HashMgr*>(this)->arena_alloc(num_bytes); ++ } ++ void arena_free(void* ptr); ++ ++ std::vector<std::unique_ptr<uint8_t[]>> arena; ++ int current_chunk_size = 0; ++ int current_chunk_offset = 0; ++ int outstanding_arena_allocations = 0; + }; + + #endif |