summaryrefslogtreecommitdiffstats
path: root/extensions/spellcheck/hunspell/patches/bug1739761.patch
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--extensions/spellcheck/hunspell/patches/bug1739761.patch615
1 files changed, 615 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/patches/bug1739761.patch b/extensions/spellcheck/hunspell/patches/bug1739761.patch
new file mode 100644
index 0000000000..66ffefadb5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/patches/bug1739761.patch
@@ -0,0 +1,615 @@
+diff --git a/extensions/spellcheck/hunspell/src/hashmgr.cxx b/extensions/spellcheck/hunspell/src/hashmgr.cxx
+--- a/extensions/spellcheck/hunspell/src/hashmgr.cxx
++++ b/extensions/spellcheck/hunspell/src/hashmgr.cxx
+@@ -63,16 +63,17 @@
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
++#include <assert.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <stdio.h>
+ #include <ctype.h>
+ #include <limits>
+ #include <sstream>
+
+ #include "hashmgr.hxx"
+@@ -118,52 +119,54 @@ HashMgr::~HashMgr() {
+ // go through column by column of the table
+ for (int i = 0; i < tablesize; i++) {
+ struct hentry* pt = tableptr[i];
+ struct hentry* nt = NULL;
+ while (pt) {
+ nt = pt->next;
+ if (pt->astr &&
+ (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)))
+- free(pt->astr);
+- free(pt);
++ arena_free(pt->astr);
++ arena_free(pt);
+ pt = nt;
+ }
+ }
+ free(tableptr);
+ }
+ tablesize = 0;
+
+ if (aliasf) {
+ for (int j = 0; j < (numaliasf); j++)
+- free(aliasf[j]);
+- free(aliasf);
++ arena_free(aliasf[j]);
++ arena_free(aliasf);
+ aliasf = NULL;
+ if (aliasflen) {
+- free(aliasflen);
++ arena_free(aliasflen);
+ aliasflen = NULL;
+ }
+ }
+ if (aliasm) {
+ for (int j = 0; j < (numaliasm); j++)
+- free(aliasm[j]);
+- free(aliasm);
++ arena_free(aliasm[j]);
++ arena_free(aliasm);
+ aliasm = NULL;
+ }
+
+ #ifndef OPENOFFICEORG
+ #ifndef MOZILLA_CLIENT
+ if (utf8)
+ free_utf_tbl();
+ #endif
+ #endif
+
+ #ifdef MOZILLA_CLIENT
+ delete[] csconv;
+ #endif
++
++ assert(outstanding_arena_allocations == 0);
+ }
+
+ // lookup a root word in the hashtable
+
+ struct hentry* HashMgr::lookup(const char* word) const {
+ struct hentry* dp;
+ if (tableptr) {
+ dp = tableptr[hash(word)];
+@@ -222,17 +225,17 @@ int HashMgr::add_word(const std::string&
+
+ word = word_copy;
+ }
+
+ bool upcasehomonym = false;
+ int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0;
+ // variable-length hash record with word and optional fields
+ struct hentry* hp =
+- (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl);
++ (struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl);
+ if (!hp) {
+ delete desc_copy;
+ delete word_copy;
+ return 1;
+ }
+
+ char* hpw = hp->word;
+ strcpy(hpw, word->c_str());
+@@ -366,57 +369,57 @@ int HashMgr::add_word(const std::string&
+ delete word_copy;
+ return 0;
+ }
+ while (dp->next != NULL) {
+ if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
+ // remove hidden onlyupcase homonym
+ if (!onlyupcase) {
+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
+- free(dp->astr);
++ arena_free(dp->astr);
+ dp->astr = hp->astr;
+ dp->alen = hp->alen;
+- free(hp);
++ arena_free(hp);
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+ } else {
+ dp->next_homonym = hp;
+ }
+ } else {
+ upcasehomonym = true;
+ }
+ }
+ dp = dp->next;
+ }
+ if (strcmp(hp->word, dp->word) == 0) {
+ // remove hidden onlyupcase homonym
+ if (!onlyupcase) {
+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
+- free(dp->astr);
++ arena_free(dp->astr);
+ dp->astr = hp->astr;
+ dp->alen = hp->alen;
+- free(hp);
++ arena_free(hp);
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+ } else {
+ dp->next_homonym = hp;
+ }
+ } else {
+ upcasehomonym = true;
+ }
+ }
+ if (!upcasehomonym) {
+ dp->next = hp;
+ } else {
+ // remove hidden onlyupcase homonym
+ if (hp->astr)
+- free(hp->astr);
+- free(hp);
++ arena_free(hp->astr);
++ arena_free(hp);
+ }
+
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+ }
+
+ int HashMgr::add_hidden_capitalized_word(const std::string& word,
+@@ -430,17 +433,17 @@ int HashMgr::add_hidden_capitalized_word
+
+ // add inner capitalized forms to handle the following allcap forms:
+ // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
+ // Allcaps with suffixes: CIA's -> CIA'S
+ if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
+ ((captype == ALLCAP) && (flagslen != 0))) &&
+ !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
+ unsigned short* flags2 =
+- (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1));
++ (unsigned short*)arena_alloc(sizeof(unsigned short) * (flagslen + 1));
+ if (!flags2)
+ return 1;
+ if (flagslen)
+ memcpy(flags2, flags, flagslen * sizeof(unsigned short));
+ flags2[flagslen] = ONLYUPCASEFLAG;
+ if (utf8) {
+ std::string st;
+ std::vector<w_char> w;
+@@ -479,23 +482,23 @@ int HashMgr::get_clen_and_captype(const
+ }
+
+ // remove word (personal dictionary function for standalone applications)
+ int HashMgr::remove(const std::string& word) {
+ struct hentry* dp = lookup(word.c_str());
+ while (dp) {
+ if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
+ unsigned short* flags =
+- (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1));
++ (unsigned short*)arena_alloc(sizeof(unsigned short) * (dp->alen + 1));
+ if (!flags)
+ return 1;
+ for (int i = 0; i < dp->alen; i++)
+ flags[i] = dp->astr[i];
+ flags[dp->alen] = forbiddenword;
+- free(dp->astr);
++ arena_free(dp->astr);
+ dp->astr = flags;
+ dp->alen++;
+ std::sort(flags, flags + dp->alen);
+ }
+ dp = dp->next_homonym;
+ }
+ return 0;
+ }
+@@ -533,17 +536,17 @@ int HashMgr::add_with_affix(const std::s
+ remove_forbidden_flag(word);
+ if (dp && dp->astr) {
+ int captype;
+ int wcl = get_clen_and_captype(word, &captype);
+ if (aliasf) {
+ add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype);
+ } else {
+ unsigned short* flags =
+- (unsigned short*)malloc(dp->alen * sizeof(unsigned short));
++ (unsigned short*) arena_alloc(dp->alen * sizeof(unsigned short));
+ if (flags) {
+ memcpy((void*)flags, (void*)dp->astr,
+ dp->alen * sizeof(unsigned short));
+ add_word(word, wcl, flags, dp->alen, NULL, false, captype);
+ } else
+ return 1;
+ }
+ return add_hidden_capitalized_word(word, wcl, dp->astr,
+@@ -668,17 +671,17 @@ int HashMgr::load_tables(const char* tpa
+ if (aliasf) {
+ int index = atoi(ap.c_str());
+ al = get_aliasf(index, &flags, dict);
+ if (!al) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
+ dict->getlinenum());
+ }
+ } else {
+- al = decode_flags(&flags, ap.c_str(), dict);
++ al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true);
+ if (al == -1) {
+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
+ delete dict;
+ return 6;
+ }
+ std::sort(flags, flags + al);
+ }
+ } else {
+@@ -709,47 +712,48 @@ int HashMgr::hash(const char* word) cons
+ hv = (hv << 8) | (*word++);
+ while (*word != 0) {
+ ROTATE(hv, ROTATE_LEN);
+ hv ^= (*word++);
+ }
+ return (unsigned long)hv % tablesize;
+ }
+
+-int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {
++int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const {
++ auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); };
+ int len;
+ if (flags.empty()) {
+ *result = NULL;
+ return 0;
+ }
+ switch (flag_mode) {
+ case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
+ len = flags.size();
+ if (len % 2 == 1)
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
+ af->getlinenum());
+ len /= 2;
+- *result = (unsigned short*)malloc(len * sizeof(unsigned short));
++ *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ for (int i = 0; i < len; i++) {
+ (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) +
+ (unsigned char)flags[i * 2 + 1];
+ }
+ break;
+ }
+ case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521
+ // 23 233)
+ len = 1;
+ unsigned short* dest;
+ for (size_t i = 0; i < flags.size(); ++i) {
+ if (flags[i] == ',')
+ len++;
+ }
+- *result = (unsigned short*)malloc(len * sizeof(unsigned short));
++ *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ dest = *result;
+ const char* src = flags.c_str();
+ for (const char* p = src; *p; p++) {
+ if (*p == ',') {
+ int i = atoi(src);
+ if (i >= DEFAULTFLAGS)
+@@ -774,26 +778,26 @@ int HashMgr::decode_flags(unsigned short
+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
+ af->getlinenum());
+ break;
+ }
+ case FLAG_UNI: { // UTF-8 characters
+ std::vector<w_char> w;
+ u8_u16(w, flags);
+ len = w.size();
+- *result = (unsigned short*)malloc(len * sizeof(unsigned short));
++ *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ memcpy(*result, w.data(), len * sizeof(short));
+ break;
+ }
+ default: { // Ispell's one-character flags (erfg -> e r f g)
+ unsigned short* dest;
+ len = flags.size();
+- *result = (unsigned short*)malloc(len * sizeof(unsigned short));
++ *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ dest = *result;
+ for (size_t i = 0; i < flags.size(); ++i) {
+ *dest = (unsigned char)flags[i];
+ dest++;
+ }
+ }
+@@ -890,16 +894,18 @@ unsigned short HashMgr::decode_flag(cons
+ default:
+ s = *(unsigned char*)f;
+ }
+ if (s == 0)
+ HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
+ return s;
+ }
+
++// This function is only called by external consumers, and so using the default
++// allocator with mystrdup is correct.
+ char* HashMgr::encode_flag(unsigned short f) const {
+ if (f == 0)
+ return mystrdup("(NULL)");
+ std::string ch;
+ if (flag_mode == FLAG_LONG) {
+ ch.push_back((unsigned char)(f >> 8));
+ ch.push_back((unsigned char)(f - ((f >> 8) << 8)));
+ } else if (flag_mode == FLAG_NUM) {
+@@ -1070,42 +1076,42 @@ bool HashMgr::parse_aliasf(const std::st
+ numaliasf = 0;
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ aliasf =
+- (unsigned short**)malloc(numaliasf * sizeof(unsigned short*));
++ (unsigned short**)arena_alloc(numaliasf * sizeof(unsigned short*));
+ aliasflen =
+- (unsigned short*)malloc(numaliasf * sizeof(unsigned short));
++ (unsigned short*)arena_alloc(numaliasf * sizeof(unsigned short));
+ if (!aliasf || !aliasflen) {
+ numaliasf = 0;
+ if (aliasf)
+- free(aliasf);
++ arena_free(aliasf);
+ if (aliasflen)
+- free(aliasflen);
++ arena_free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ return false;
+ }
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ numaliasf = 0;
+- free(aliasf);
+- free(aliasflen);
++ arena_free(aliasf);
++ arena_free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the numaliasf lines to read in the remainder of the table */
+@@ -1126,33 +1132,33 @@ bool HashMgr::parse_aliasf(const std::st
+ errored = true;
+ break;
+ }
+ break;
+ }
+ case 1: {
+ std::string piece(start_piece, iter);
+ aliasflen[j] =
+- (unsigned short)decode_flags(&(aliasf[j]), piece, af);
++ (unsigned short)decode_flags(&(aliasf[j]), piece, af, /* arena = */ true);
+ std::sort(aliasf[j], aliasf[j] + aliasflen[j]);
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(nl, iter);
+ }
+ }
+ if (!aliasf[j]) {
+ for (int k = 0; k < j; ++k) {
+- free(aliasf[k]);
++ arena_free(aliasf[k]);
+ }
+- free(aliasf);
+- free(aliasflen);
++ arena_free(aliasf);
++ arena_free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ numaliasf = 0;
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+ }
+@@ -1193,33 +1199,33 @@ bool HashMgr::parse_aliasm(const std::st
+ }
+ case 1: {
+ numaliasm = atoi(std::string(start_piece, iter).c_str());
+ if (numaliasm < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return false;
+ }
+- aliasm = (char**)malloc(numaliasm * sizeof(char*));
++ aliasm = (char**)arena_alloc(numaliasm * sizeof(char*));
+ if (!aliasm) {
+ numaliasm = 0;
+ return false;
+ }
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ numaliasm = 0;
+- free(aliasm);
++ arena_free(aliasm);
+ aliasm = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the numaliasm lines to read in the remainder of the table */
+ for (int j = 0; j < numaliasm; j++) {
+@@ -1245,32 +1251,36 @@ bool HashMgr::parse_aliasm(const std::st
+ std::string::const_iterator end = nl.end();
+ std::string chunk(start_piece, end);
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(chunk);
+ else
+ reverseword(chunk);
+ }
+- aliasm[j] = mystrdup(chunk.c_str());
++ size_t sl = chunk.length() + 1;
++ aliasm[j] = (char*)arena_alloc(sl);
++ if (aliasm[j]) {
++ memcpy(aliasm[j], chunk.c_str(), sl);
++ }
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(nl, iter);
+ }
+ }
+ if (!aliasm[j]) {
+ numaliasm = 0;
+ for (int k = 0; k < j; ++k) {
+- free(aliasm[k]);
++ arena_free(aliasm[k]);
+ }
+- free(aliasm);
++ arena_free(aliasm);
+ aliasm = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+ }
+ return true;
+ }
+@@ -1379,8 +1389,27 @@ bool HashMgr::parse_reptable(const std::
+ }
+ return true;
+ }
+
+ // return replacing table
+ const std::vector<replentry>& HashMgr::get_reptable() const {
+ return reptable;
+ }
++
++void* HashMgr::arena_alloc(int num_bytes) {
++ static const int MIN_CHUNK_SIZE = 4096;
++ if (arena.empty() || (current_chunk_size - current_chunk_offset < num_bytes)) {
++ current_chunk_size = std::max(MIN_CHUNK_SIZE, num_bytes);
++ arena.push_back(std::make_unique<uint8_t[]>(current_chunk_size));
++ current_chunk_offset = 0;
++ }
++
++ uint8_t* ptr = &arena.back()[current_chunk_offset];
++ current_chunk_offset += num_bytes;
++ outstanding_arena_allocations++;
++ return ptr;
++}
++
++void HashMgr::arena_free(void* ptr) {
++ --outstanding_arena_allocations;
++ assert(outstanding_arena_allocations >= 0);
++}
+diff --git a/extensions/spellcheck/hunspell/src/hashmgr.hxx b/extensions/spellcheck/hunspell/src/hashmgr.hxx
+--- a/extensions/spellcheck/hunspell/src/hashmgr.hxx
++++ b/extensions/spellcheck/hunspell/src/hashmgr.hxx
+@@ -67,16 +67,18 @@
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+ #ifndef HASHMGR_HXX_
+ #define HASHMGR_HXX_
+
+ #include <stdio.h>
++#include <stdint.h>
++#include <memory>
+ #include <string>
+ #include <vector>
+
+ #include "htypes.hxx"
+ #include "filemgr.hxx"
+ #include "w_char.hxx"
+
+ enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
+@@ -116,17 +118,23 @@ class HashMgr {
+
+ struct hentry* lookup(const char*) const;
+ int hash(const char*) const;
+ struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
+
+ int add(const std::string& word);
+ int add_with_affix(const std::string& word, const std::string& pattern);
+ int remove(const std::string& word);
+- int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const;
++private:
++ // Only internal consumers are allowed to arena-allocate.
++ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const;
++public:
++ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {
++ return decode_flags(result, flags, af, /* arena = */ false);
++ }
+ bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const;
+ unsigned short decode_flag(const char* flag) const;
+ char* encode_flag(unsigned short flag) const;
+ int is_aliasf() const;
+ int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const;
+ int is_aliasm() const;
+ char* get_aliasm(int index) const;
+ const std::vector<replentry>& get_reptable() const;
+@@ -148,11 +156,27 @@ class HashMgr {
+ int wcl,
+ unsigned short* flags,
+ int al,
+ const std::string* dp,
+ int captype);
+ bool parse_aliasm(const std::string& line, FileMgr* af);
+ bool parse_reptable(const std::string& line, FileMgr* af);
+ int remove_forbidden_flag(const std::string& word);
++
++ // Our Mozilla fork uses a simple arena allocator for certain strings which
++ // persist for the lifetime of the HashMgr in order to avoid heap fragmentation.
++ // It's a simple bump-allocator, so we can't actually free() memory midway
++ // through the lifecycle, but we have a dummy free() implementation to ensure
++ // that our calls to arena_alloc() and arena_free() are balanced.
++ void* arena_alloc(int num_bytes);
++ void* arena_alloc(int num_bytes) const {
++ return const_cast<HashMgr*>(this)->arena_alloc(num_bytes);
++ }
++ void arena_free(void* ptr);
++
++ std::vector<std::unique_ptr<uint8_t[]>> arena;
++ int current_chunk_size = 0;
++ int current_chunk_offset = 0;
++ int outstanding_arena_allocations = 0;
+ };
+
+ #endif