Adding upstream version 1:115.7.0.upstream/1%115.7.0 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
commit: 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree: a68f146d7fa01f0134297619fbe7e33db084e0aa /extensions/spellcheck/hunspell/patches
parent: Initial commit. (diff)
download: thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz
thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip
4 files changed, 862 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/patches/bug1410214.patch b/extensions/spellcheck/hunspell/patches/bug1410214.patch
new file mode 100644
index 0000000000..4db781b5e7
--- /dev/null
+++ b/extensions/spellcheck/hunspell/patches/bug1410214.patch
@@ -0,0 +1,37 @@
+diff --git a/extensions/spellcheck/hunspell/src/filemgr.hxx b/extensions/spellcheck/hunspell/src/filemgr.hxx
+--- a/extensions/spellcheck/hunspell/src/filemgr.hxx
++++ b/extensions/spellcheck/hunspell/src/filemgr.hxx
+@@ -67,32 +67,11 @@
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  */
+ 
+ /* file manager class - read lines of files [filename] OR [filename.hz] */
+ #ifndef FILEMGR_HXX_
+ #define FILEMGR_HXX_
+ 
+-#include "hunzip.hxx"
+-#include <stdio.h>
+-#include <string>
+-#include <fstream>
+-
+-class FileMgr {
+- private:
+-  FileMgr(const FileMgr&);
+-  FileMgr& operator=(const FileMgr&);
++#include "mozHunspellRLBoxSandbox.h"
+ 
+- protected:
+-  std::ifstream fin;
+-  Hunzip* hin;
+-  char in[BUFSIZE + 50];  // input buffer
+-  int fail(const char* err, const char* par);
+-  int linenum;
+-
+- public:
+-  FileMgr(const char* filename, const char* key = NULL);
+-  ~FileMgr();
+-  bool getline(std::string&);
+-  int getlinenum();
+-};
+ #endif
diff --git a/extensions/spellcheck/hunspell/patches/bug1653659.patch b/extensions/spellcheck/hunspell/patches/bug1653659.patch
new file mode 100644
index 0000000000..ab3e752015
--- /dev/null
+++ b/extensions/spellcheck/hunspell/patches/bug1653659.patch
@@ -0,0 +1,190 @@
+diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
+--- a/extensions/spellcheck/hunspell/src/csutil.cxx
++++ b/extensions/spellcheck/hunspell/src/csutil.cxx
+@@ -90,21 +90,17 @@
+ #else
+ #ifndef MOZILLA_CLIENT
+ #include "utf_info.hxx"
+ #define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info)))
+ #endif
+ #endif
+ 
+ #ifdef MOZILLA_CLIENT
+-#include "nsCOMPtr.h"
+-#include "nsUnicharUtils.h"
+-#include "mozilla/Encoding.h"
+-
+-using namespace mozilla;
++#include "mozHunspellRLBoxGlue.h"
+ #endif
+ 
+ struct unicode_info2 {
+   char cletter;
+   unsigned short cupper;
+   unsigned short clower;
+ };
+ 
+@@ -2277,101 +2273,18 @@ struct cs_info* get_current_cs(const std
+                      "error: unknown encoding %s: using %s as fallback\n", es.c_str(),
+                      encds[0].enc_name);
+     ccs = encds[0].cs_table;
+   }
+ 
+   return ccs;
+ }
+ #else
+-// XXX This function was rewritten for mozilla. Instead of storing the
+-// conversion tables static in this file, create them when needed
+-// with help the mozilla backend.
+ struct cs_info* get_current_cs(const std::string& es) {
+-  struct cs_info* ccs = new cs_info[256];
+-  // Initialze the array with dummy data so that we wouldn't need
+-  // to return null in case of failures.
+-  for (int i = 0; i <= 0xff; ++i) {
+-    ccs[i].ccase = false;
+-    ccs[i].clower = i;
+-    ccs[i].cupper = i;
+-  }
+-
+-  auto encoding = Encoding::ForLabelNoReplacement(es);
+-  if (!encoding) {
+-    return ccs;
+-  }
+-  auto encoder = encoding->NewEncoder();
+-  auto decoder = encoding->NewDecoderWithoutBOMHandling();
+-
+-  for (unsigned int i = 0; i <= 0xff; ++i) {
+-    bool success = false;
+-    // We want to find the upper/lowercase equivalents of each byte
+-    // in this 1-byte character encoding.  Call our encoding/decoding
+-    // APIs separately for each byte since they may reject some of the
+-    // bytes, and we want to handle errors separately for each byte.
+-    uint8_t lower, upper;
+-    do {
+-      if (i == 0)
+-        break;
+-      uint8_t source = uint8_t(i);
+-      char16_t uni[2];
+-      char16_t uniCased;
+-      uint8_t destination[4];
+-      auto src1 = MakeSpan(&source, 1);
+-      auto dst1 = MakeSpan(uni);
+-      auto src2 = MakeSpan(&uniCased, 1);
+-      auto dst2 = MakeSpan(destination);
+-
+-      uint32_t result;
+-      size_t read;
+-      size_t written;
+-      Tie(result, read, written) =
+-        decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true);
+-      if (result != kInputEmpty || read != 1 || written != 1) {
+-        break;
+-      }
+-
+-      uniCased = ToLowerCase(uni[0]);
+-      Tie(result, read, written) =
+-        encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
+-      if (result != kInputEmpty || read != 1 || written != 1) {
+-        break;
+-      }
+-      lower = destination[0];
+-
+-      uniCased = ToUpperCase(uni[0]);
+-      Tie(result, read, written) =
+-        encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
+-      if (result != kInputEmpty || read != 1 || written != 1) {
+-        break;
+-      }
+-      upper = destination[0];
+-
+-      success = true;
+-    } while (0);
+-
+-    encoding->NewEncoderInto(*encoder);
+-    encoding->NewDecoderWithoutBOMHandlingInto(*decoder);
+-
+-    if (success) {
+-      ccs[i].cupper = upper;
+-      ccs[i].clower = lower;
+-    } else {
+-      ccs[i].cupper = i;
+-      ccs[i].clower = i;
+-    }
+-
+-    if (ccs[i].clower != (unsigned char)i)
+-      ccs[i].ccase = true;
+-    else
+-      ccs[i].ccase = false;
+-  }
+-
+-  return ccs;
++  return moz_hunspell_GetCurrentCS(es.c_str());
+ }
+ #endif
+ 
+ // primitive isalpha() replacement for tokenization
+ std::string get_casechars(const char* enc) {
+   struct cs_info* csconv = get_current_cs(enc);
+   std::string expw;
+   for (int i = 0; i <= 255; ++i) {
+@@ -2455,34 +2368,34 @@ unsigned short unicodetoupper(unsigned s
+   // There are a dotless lower case i pair of upper `I',
+   // and an upper I with dot pair of lower `i'.
+   if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
+     return 0x0130;
+ #ifdef OPENOFFICEORG
+   return static_cast<unsigned short>(u_toupper(c));
+ #else
+ #ifdef MOZILLA_CLIENT
+-  return ToUpperCase((char16_t)c);
++  return moz_hunspell_ToUpperCase((char16_t)c);
+ #else
+   return (utf_tbl) ? utf_tbl[c].cupper : c;
+ #endif
+ #endif
+ }
+ 
+ unsigned short unicodetolower(unsigned short c, int langnum) {
+   // In Azeri and Turkish, I and i dictinct letters:
+   // There are a dotless lower case i pair of upper `I',
+   // and an upper I with dot pair of lower `i'.
+   if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
+     return 0x0131;
+ #ifdef OPENOFFICEORG
+   return static_cast<unsigned short>(u_tolower(c));
+ #else
+ #ifdef MOZILLA_CLIENT
+-  return ToLowerCase((char16_t)c);
++  return moz_hunspell_ToLowerCase((char16_t)c);
+ #else
+   return (utf_tbl) ? utf_tbl[c].clower : c;
+ #endif
+ #endif
+ }
+ 
+ int unicodeisalpha(unsigned short c) {
+ #ifdef OPENOFFICEORG
+diff --git a/extensions/spellcheck/hunspell/src/csutil.hxx b/extensions/spellcheck/hunspell/src/csutil.hxx
+--- a/extensions/spellcheck/hunspell/src/csutil.hxx
++++ b/extensions/spellcheck/hunspell/src/csutil.hxx
+@@ -77,20 +77,16 @@
+ 
+ #include <fstream>
+ #include <string>
+ #include <vector>
+ #include <string.h>
+ #include "w_char.hxx"
+ #include "htypes.hxx"
+ 
+-#ifdef MOZILLA_CLIENT
+-#include "nscore.h"  // for mozalloc headers
+-#endif
+-
+ // casing
+ #define NOCAP 0
+ #define INITCAP 1
+ #define ALLCAP 2
+ #define HUHCAP 3
+ #define HUHINITCAP 4
+ 
+ // default encoding and keystring
diff --git a/extensions/spellcheck/hunspell/patches/bug1739761.patch b/extensions/spellcheck/hunspell/patches/bug1739761.patch
new file mode 100644
index 0000000000..66ffefadb5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/patches/bug1739761.patch
@@ -0,0 +1,615 @@
+diff --git a/extensions/spellcheck/hunspell/src/hashmgr.cxx b/extensions/spellcheck/hunspell/src/hashmgr.cxx
+--- a/extensions/spellcheck/hunspell/src/hashmgr.cxx
++++ b/extensions/spellcheck/hunspell/src/hashmgr.cxx
+@@ -63,16 +63,17 @@
+  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  */
+ 
++#include <assert.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <stdio.h>
+ #include <ctype.h>
+ #include <limits>
+ #include <sstream>
+ 
+ #include "hashmgr.hxx"
+@@ -118,52 +119,54 @@ HashMgr::~HashMgr() {
+     // go through column by column of the table
+     for (int i = 0; i < tablesize; i++) {
+       struct hentry* pt = tableptr[i];
+       struct hentry* nt = NULL;
+       while (pt) {
+         nt = pt->next;
+         if (pt->astr &&
+             (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)))
+-          free(pt->astr);
+-        free(pt);
++          arena_free(pt->astr);
++        arena_free(pt);
+         pt = nt;
+       }
+     }
+     free(tableptr);
+   }
+   tablesize = 0;
+ 
+   if (aliasf) {
+     for (int j = 0; j < (numaliasf); j++)
+-      free(aliasf[j]);
+-    free(aliasf);
++      arena_free(aliasf[j]);
++    arena_free(aliasf);
+     aliasf = NULL;
+     if (aliasflen) {
+-      free(aliasflen);
++      arena_free(aliasflen);
+       aliasflen = NULL;
+     }
+   }
+   if (aliasm) {
+     for (int j = 0; j < (numaliasm); j++)
+-      free(aliasm[j]);
+-    free(aliasm);
++      arena_free(aliasm[j]);
++    arena_free(aliasm);
+     aliasm = NULL;
+   }
+ 
+ #ifndef OPENOFFICEORG
+ #ifndef MOZILLA_CLIENT
+   if (utf8)
+     free_utf_tbl();
+ #endif
+ #endif
+ 
+ #ifdef MOZILLA_CLIENT
+   delete[] csconv;
+ #endif
++
++  assert(outstanding_arena_allocations == 0);
+ }
+ 
+ // lookup a root word in the hashtable
+ 
+ struct hentry* HashMgr::lookup(const char* word) const {
+   struct hentry* dp;
+   if (tableptr) {
+     dp = tableptr[hash(word)];
+@@ -222,17 +225,17 @@ int HashMgr::add_word(const std::string&
+ 
+     word = word_copy;
+   }
+ 
+   bool upcasehomonym = false;
+   int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0;
+   // variable-length hash record with word and optional fields
+   struct hentry* hp =
+-      (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl);
++      (struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl);
+   if (!hp) {
+     delete desc_copy;
+     delete word_copy;
+     return 1;
+   }
+ 
+   char* hpw = hp->word;
+   strcpy(hpw, word->c_str());
+@@ -366,57 +369,57 @@ int HashMgr::add_word(const std::string&
+     delete word_copy;
+     return 0;
+   }
+   while (dp->next != NULL) {
+     if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
+       // remove hidden onlyupcase homonym
+       if (!onlyupcase) {
+         if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
+-          free(dp->astr);
++          arena_free(dp->astr);
+           dp->astr = hp->astr;
+           dp->alen = hp->alen;
+-          free(hp);
++          arena_free(hp);
+           delete desc_copy;
+           delete word_copy;
+           return 0;
+         } else {
+           dp->next_homonym = hp;
+         }
+       } else {
+         upcasehomonym = true;
+       }
+     }
+     dp = dp->next;
+   }
+   if (strcmp(hp->word, dp->word) == 0) {
+     // remove hidden onlyupcase homonym
+     if (!onlyupcase) {
+       if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
+-        free(dp->astr);
++        arena_free(dp->astr);
+         dp->astr = hp->astr;
+         dp->alen = hp->alen;
+-        free(hp);
++        arena_free(hp);
+         delete desc_copy;
+         delete word_copy;
+         return 0;
+       } else {
+         dp->next_homonym = hp;
+       }
+     } else {
+       upcasehomonym = true;
+     }
+   }
+   if (!upcasehomonym) {
+     dp->next = hp;
+   } else {
+     // remove hidden onlyupcase homonym
+     if (hp->astr)
+-      free(hp->astr);
+-    free(hp);
++      arena_free(hp->astr);
++    arena_free(hp);
+   }
+ 
+   delete desc_copy;
+   delete word_copy;
+   return 0;
+ }
+ 
+ int HashMgr::add_hidden_capitalized_word(const std::string& word,
+@@ -430,17 +433,17 @@ int HashMgr::add_hidden_capitalized_word
+ 
+   // add inner capitalized forms to handle the following allcap forms:
+   // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
+   // Allcaps with suffixes: CIA's -> CIA'S
+   if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
+        ((captype == ALLCAP) && (flagslen != 0))) &&
+       !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
+     unsigned short* flags2 =
+-        (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1));
++        (unsigned short*)arena_alloc(sizeof(unsigned short) * (flagslen + 1));
+     if (!flags2)
+       return 1;
+     if (flagslen)
+       memcpy(flags2, flags, flagslen * sizeof(unsigned short));
+     flags2[flagslen] = ONLYUPCASEFLAG;
+     if (utf8) {
+       std::string st;
+       std::vector<w_char> w;
+@@ -479,23 +482,23 @@ int HashMgr::get_clen_and_captype(const 
+ }
+ 
+ // remove word (personal dictionary function for standalone applications)
+ int HashMgr::remove(const std::string& word) {
+   struct hentry* dp = lookup(word.c_str());
+   while (dp) {
+     if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
+       unsigned short* flags =
+-          (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1));
++          (unsigned short*)arena_alloc(sizeof(unsigned short) * (dp->alen + 1));
+       if (!flags)
+         return 1;
+       for (int i = 0; i < dp->alen; i++)
+         flags[i] = dp->astr[i];
+       flags[dp->alen] = forbiddenword;
+-      free(dp->astr);
++      arena_free(dp->astr);
+       dp->astr = flags;
+       dp->alen++;
+       std::sort(flags, flags + dp->alen);
+     }
+     dp = dp->next_homonym;
+   }
+   return 0;
+ }
+@@ -533,17 +536,17 @@ int HashMgr::add_with_affix(const std::s
+   remove_forbidden_flag(word);
+   if (dp && dp->astr) {
+     int captype;
+     int wcl = get_clen_and_captype(word, &captype);
+     if (aliasf) {
+       add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype);
+     } else {
+       unsigned short* flags =
+-          (unsigned short*)malloc(dp->alen * sizeof(unsigned short));
++          (unsigned short*) arena_alloc(dp->alen * sizeof(unsigned short));
+       if (flags) {
+         memcpy((void*)flags, (void*)dp->astr,
+                dp->alen * sizeof(unsigned short));
+         add_word(word, wcl, flags, dp->alen, NULL, false, captype);
+       } else
+         return 1;
+     }
+     return add_hidden_capitalized_word(word, wcl, dp->astr,
+@@ -668,17 +671,17 @@ int HashMgr::load_tables(const char* tpa
+       if (aliasf) {
+         int index = atoi(ap.c_str());
+         al = get_aliasf(index, &flags, dict);
+         if (!al) {
+           HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
+                            dict->getlinenum());
+         }
+       } else {
+-        al = decode_flags(&flags, ap.c_str(), dict);
++        al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true);
+         if (al == -1) {
+           HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
+           delete dict;
+           return 6;
+         }
+         std::sort(flags, flags + al);
+       }
+     } else {
+@@ -709,47 +712,48 @@ int HashMgr::hash(const char* word) cons
+     hv = (hv << 8) | (*word++);
+   while (*word != 0) {
+     ROTATE(hv, ROTATE_LEN);
+     hv ^= (*word++);
+   }
+   return (unsigned long)hv % tablesize;
+ }
+ 
+-int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {
++int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const {
++  auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); };
+   int len;
+   if (flags.empty()) {
+     *result = NULL;
+     return 0;
+   }
+   switch (flag_mode) {
+     case FLAG_LONG: {  // two-character flags (1x2yZz -> 1x 2y Zz)
+       len = flags.size();
+       if (len % 2 == 1)
+         HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
+                          af->getlinenum());
+       len /= 2;
+-      *result = (unsigned short*)malloc(len * sizeof(unsigned short));
++      *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+       if (!*result)
+         return -1;
+       for (int i = 0; i < len; i++) {
+         (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) +
+                        (unsigned char)flags[i * 2 + 1];
+       }
+       break;
+     }
+     case FLAG_NUM: {  // decimal numbers separated by comma (4521,23,233 -> 4521
+                       // 23 233)
+       len = 1;
+       unsigned short* dest;
+       for (size_t i = 0; i < flags.size(); ++i) {
+         if (flags[i] == ',')
+           len++;
+       }
+-      *result = (unsigned short*)malloc(len * sizeof(unsigned short));
++      *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+       if (!*result)
+         return -1;
+       dest = *result;
+       const char* src = flags.c_str();
+       for (const char* p = src; *p; p++) {
+         if (*p == ',') {
+           int i = atoi(src);
+           if (i >= DEFAULTFLAGS)
+@@ -774,26 +778,26 @@ int HashMgr::decode_flags(unsigned short
+         HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
+                          af->getlinenum());
+       break;
+     }
+     case FLAG_UNI: {  // UTF-8 characters
+       std::vector<w_char> w;
+       u8_u16(w, flags);
+       len = w.size();
+-      *result = (unsigned short*)malloc(len * sizeof(unsigned short));
++      *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+       if (!*result)
+         return -1;
+       memcpy(*result, w.data(), len * sizeof(short));
+       break;
+     }
+     default: {  // Ispell's one-character flags (erfg -> e r f g)
+       unsigned short* dest;
+       len = flags.size();
+-      *result = (unsigned short*)malloc(len * sizeof(unsigned short));
++      *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+       if (!*result)
+         return -1;
+       dest = *result;
+       for (size_t i = 0; i < flags.size(); ++i) {
+         *dest = (unsigned char)flags[i];
+         dest++;
+       }
+     }
+@@ -890,16 +894,18 @@ unsigned short HashMgr::decode_flag(cons
+     default:
+       s = *(unsigned char*)f;
+   }
+   if (s == 0)
+     HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
+   return s;
+ }
+ 
++// This function is only called by external consumers, and so using the default
++// allocator with mystrdup is correct.
+ char* HashMgr::encode_flag(unsigned short f) const {
+   if (f == 0)
+     return mystrdup("(NULL)");
+   std::string ch;
+   if (flag_mode == FLAG_LONG) {
+     ch.push_back((unsigned char)(f >> 8));
+     ch.push_back((unsigned char)(f - ((f >> 8) << 8)));
+   } else if (flag_mode == FLAG_NUM) {
+@@ -1070,42 +1076,42 @@ bool HashMgr::parse_aliasf(const std::st
+           numaliasf = 0;
+           aliasf = NULL;
+           aliasflen = NULL;
+           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+                            af->getlinenum());
+           return false;
+         }
+         aliasf =
+-            (unsigned short**)malloc(numaliasf * sizeof(unsigned short*));
++            (unsigned short**)arena_alloc(numaliasf * sizeof(unsigned short*));
+         aliasflen =
+-            (unsigned short*)malloc(numaliasf * sizeof(unsigned short));
++            (unsigned short*)arena_alloc(numaliasf * sizeof(unsigned short));
+         if (!aliasf || !aliasflen) {
+           numaliasf = 0;
+           if (aliasf)
+-            free(aliasf);
++            arena_free(aliasf);
+           if (aliasflen)
+-            free(aliasflen);
++            arena_free(aliasflen);
+           aliasf = NULL;
+           aliasflen = NULL;
+           return false;
+         }
+         np++;
+         break;
+       }
+       default:
+         break;
+     }
+     ++i;
+     start_piece = mystrsep(line, iter);
+   }
+   if (np != 2) {
+     numaliasf = 0;
+-    free(aliasf);
+-    free(aliasflen);
++    arena_free(aliasf);
++    arena_free(aliasflen);
+     aliasf = NULL;
+     aliasflen = NULL;
+     HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+                      af->getlinenum());
+     return false;
+   }
+ 
+   /* now parse the numaliasf lines to read in the remainder of the table */
+@@ -1126,33 +1132,33 @@ bool HashMgr::parse_aliasf(const std::st
+               errored = true;
+               break;
+             }
+             break;
+           }
+           case 1: {
+             std::string piece(start_piece, iter);
+             aliasflen[j] =
+-                (unsigned short)decode_flags(&(aliasf[j]), piece, af);
++                (unsigned short)decode_flags(&(aliasf[j]), piece, af, /* arena = */ true);
+             std::sort(aliasf[j], aliasf[j] + aliasflen[j]);
+             break;
+           }
+           default:
+             break;
+         }
+         ++i;
+         start_piece = mystrsep(nl, iter);
+       }
+     }
+     if (!aliasf[j]) {
+       for (int k = 0; k < j; ++k) {
+-        free(aliasf[k]);
++        arena_free(aliasf[k]);
+       }
+-      free(aliasf);
+-      free(aliasflen);
++      arena_free(aliasf);
++      arena_free(aliasflen);
+       aliasf = NULL;
+       aliasflen = NULL;
+       numaliasf = 0;
+       HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+                        af->getlinenum());
+       return false;
+     }
+   }
+@@ -1193,33 +1199,33 @@ bool HashMgr::parse_aliasm(const std::st
+       }
+       case 1: {
+         numaliasm = atoi(std::string(start_piece, iter).c_str());
+         if (numaliasm < 1) {
+           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+                            af->getlinenum());
+           return false;
+         }
+-        aliasm = (char**)malloc(numaliasm * sizeof(char*));
++        aliasm = (char**)arena_alloc(numaliasm * sizeof(char*));
+         if (!aliasm) {
+           numaliasm = 0;
+           return false;
+         }
+         np++;
+         break;
+       }
+       default:
+         break;
+     }
+     ++i;
+     start_piece = mystrsep(line, iter);
+   }
+   if (np != 2) {
+     numaliasm = 0;
+-    free(aliasm);
++    arena_free(aliasm);
+     aliasm = NULL;
+     HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+                      af->getlinenum());
+     return false;
+   }
+ 
+   /* now parse the numaliasm lines to read in the remainder of the table */
+   for (int j = 0; j < numaliasm; j++) {
+@@ -1245,32 +1251,36 @@ bool HashMgr::parse_aliasm(const std::st
+             std::string::const_iterator end = nl.end();
+             std::string chunk(start_piece, end);
+             if (complexprefixes) {
+               if (utf8)
+                 reverseword_utf(chunk);
+               else
+                 reverseword(chunk);
+             }
+-            aliasm[j] = mystrdup(chunk.c_str());
++            size_t sl = chunk.length() + 1;
++            aliasm[j] = (char*)arena_alloc(sl);
++            if (aliasm[j]) {
++              memcpy(aliasm[j], chunk.c_str(), sl);
++            }
+             break;
+           }
+           default:
+             break;
+         }
+         ++i;
+         start_piece = mystrsep(nl, iter);
+       }
+     }
+     if (!aliasm[j]) {
+       numaliasm = 0;
+       for (int k = 0; k < j; ++k) {
+-        free(aliasm[k]);
++        arena_free(aliasm[k]);
+       }
+-      free(aliasm);
++      arena_free(aliasm);
+       aliasm = NULL;
+       HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+                        af->getlinenum());
+       return false;
+     }
+   }
+   return true;
+ }
+@@ -1379,8 +1389,27 @@ bool HashMgr::parse_reptable(const std::
+   }
+   return true;
+ }
+ 
+ // return replacing table
+ const std::vector<replentry>& HashMgr::get_reptable() const {
+   return reptable;
+ }
++
++void* HashMgr::arena_alloc(int num_bytes) {
++  static const int MIN_CHUNK_SIZE = 4096;
++  if (arena.empty() || (current_chunk_size - current_chunk_offset < num_bytes)) {
++    current_chunk_size = std::max(MIN_CHUNK_SIZE, num_bytes);
++    arena.push_back(std::make_unique<uint8_t[]>(current_chunk_size));
++    current_chunk_offset = 0;
++  }
++
++  uint8_t* ptr = &arena.back()[current_chunk_offset];
++  current_chunk_offset += num_bytes;
++  outstanding_arena_allocations++;
++  return ptr;
++}
++
++void HashMgr::arena_free(void* ptr) {
++  --outstanding_arena_allocations;
++  assert(outstanding_arena_allocations >= 0);
++}
+diff --git a/extensions/spellcheck/hunspell/src/hashmgr.hxx b/extensions/spellcheck/hunspell/src/hashmgr.hxx
+--- a/extensions/spellcheck/hunspell/src/hashmgr.hxx
++++ b/extensions/spellcheck/hunspell/src/hashmgr.hxx
+@@ -67,16 +67,18 @@
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  */
+ 
+ #ifndef HASHMGR_HXX_
+ #define HASHMGR_HXX_
+ 
+ #include <stdio.h>
++#include <stdint.h>
++#include <memory>
+ #include <string>
+ #include <vector>
+ 
+ #include "htypes.hxx"
+ #include "filemgr.hxx"
+ #include "w_char.hxx"
+ 
+ enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
+@@ -116,17 +118,23 @@ class HashMgr {
+ 
+   struct hentry* lookup(const char*) const;
+   int hash(const char*) const;
+   struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
+ 
+   int add(const std::string& word);
+   int add_with_affix(const std::string& word, const std::string& pattern);
+   int remove(const std::string& word);
+-  int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const;
++private:
++  // Only internal consumers are allowed to arena-allocate.
++  int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const;
++public:
++  int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {
++    return decode_flags(result, flags, af, /* arena = */ false);
++  }
+   bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const;
+   unsigned short decode_flag(const char* flag) const;
+   char* encode_flag(unsigned short flag) const;
+   int is_aliasf() const;
+   int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const;
+   int is_aliasm() const;
+   char* get_aliasm(int index) const;
+   const std::vector<replentry>& get_reptable() const;
+@@ -148,11 +156,27 @@ class HashMgr {
+                                   int wcl,
+                                   unsigned short* flags,
+                                   int al,
+                                   const std::string* dp,
+                                   int captype);
+   bool parse_aliasm(const std::string& line, FileMgr* af);
+   bool parse_reptable(const std::string& line, FileMgr* af);
+   int remove_forbidden_flag(const std::string& word);
++
++  // Our Mozilla fork uses a simple arena allocator for certain strings which
++  // persist for the lifetime of the HashMgr in order to avoid heap fragmentation.
++  // It's a simple bump-allocator, so we can't actually free() memory midway
++  // through the lifecycle, but we have a dummy free() implementation to ensure
++  // that our calls to arena_alloc() and arena_free() are balanced.
++  void* arena_alloc(int num_bytes);
++  void* arena_alloc(int num_bytes) const {
++    return const_cast<HashMgr*>(this)->arena_alloc(num_bytes);
++  }
++  void arena_free(void* ptr);
++
++  std::vector<std::unique_ptr<uint8_t[]>> arena;
++  int current_chunk_size = 0;
++  int current_chunk_offset = 0;
++  int outstanding_arena_allocations = 0;
+ };
+ 
+ #endif
diff --git a/extensions/spellcheck/hunspell/patches/bug1838113.patch b/extensions/spellcheck/hunspell/patches/bug1838113.patch
new file mode 100644
index 0000000000..61ec2af701
--- /dev/null
+++ b/extensions/spellcheck/hunspell/patches/bug1838113.patch
@@ -0,0 +1,20 @@
+diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
+index 48e58ff4b2677..39a54d38023c8 100644
+--- a/extensions/spellcheck/hunspell/src/csutil.cxx
++++ b/extensions/spellcheck/hunspell/src/csutil.cxx
+@@ -108,6 +108,7 @@ static struct unicode_info2* utf_tbl = NULL;
+ static int utf_tbl_count =
+     0;  // utf_tbl can be used by multiple Hunspell instances
+ 
++#ifndef MOZILLA_CLIENT
+ void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mode)
+ {
+ #if defined(_WIN32) && defined(_MSC_VER)
+@@ -127,6 +128,7 @@ void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mod
+ #endif
+   stream.open(path, mode);
+ }
++#endif
+ 
+ std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
+   dest.clear();
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
commit	6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree	a68f146d7fa01f0134297619fbe7e33db084e0aa /extensions/spellcheck/hunspell/patches
parent	Initial commit. (diff)
download	thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip