1 files changed, 190 insertions, 0 deletions
diff --git a/src/grep/src/searchutils.c b/src/grep/src/searchutils.c
new file mode 100644
index 0000000..8058511
--- /dev/null
+++ b/src/grep/src/searchutils.c
@@ -0,0 +1,190 @@
+/* searchutils.c - helper subroutines for grep's matchers.
+   Copyright 1992, 1998, 2000, 2007, 2009-2021 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+#include <config.h>
+
+#define SEARCH_INLINE _GL_EXTERN_INLINE
+#define SYSTEM_INLINE _GL_EXTERN_INLINE
+#include "search.h"
+
+/* For each byte B, sbwordchar[B] is true if B is a single-byte
+   character that is a word constituent, and is false otherwise.  */
+static bool sbwordchar[NCHAR];
+
+/* Whether -w considers WC to be a word constituent.  */
+static bool
+wordchar (wint_t wc)
+{
+  return wc == L'_' || iswalnum (wc);
+}
+
+void
+wordinit (void)
+{
+  for (int i = 0; i < NCHAR; i++)
+    sbwordchar[i] = wordchar (localeinfo.sbctowc[i]);
+}
+
+kwset_t
+kwsinit (bool mb_trans)
+{
+  char *trans = NULL;
+
+  if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
+    {
+      trans = xmalloc (NCHAR);
+      /* If I is a single-byte character that becomes a different
+         single-byte character when uppercased, set trans[I]
+         to that character.  Otherwise, set trans[I] to I.  */
+      for (int i = 0; i < NCHAR; i++)
+        trans[i] = toupper (i);
+    }
+
+  return kwsalloc (trans);
+}
+
+/* In the buffer *MB_START, return the number of bytes needed to go
+   back from CUR to the previous boundary, where a "boundary" is the
+   start of a multibyte character or is an error-encoding byte.  The
+   buffer ends at END (i.e., one past the address of the buffer's last
+   byte).  If CUR is already at a boundary, return 0.  If CUR is no
+   larger than *MB_START, return CUR - *MB_START without modifying
+   *MB_START or *MBCLEN.
+
+   When returning zero, set *MB_START to CUR.  When returning a
+   positive value, set *MB_START to the next boundary after CUR,
+   or to END if there is no such boundary, and set *MBCLEN to the
+   length of the preceding character.  */
+ptrdiff_t
+mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
+           char const *end)
+{
+  const char *p = *mb_start;
+  const char *p0 = p;
+  size_t clen;
+
+  if (cur <= p)
+    return cur - p;
+
+  if (localeinfo.using_utf8)
+    {
+      p = cur;
+      clen = 1;
+
+      if (cur < end && (*cur & 0xc0) == 0x80)
+        for (int i = 1; i <= 3; i++)
+          if ((cur[-i] & 0xc0) != 0x80)
+            {
+              mbstate_t mbs = { 0 };
+              clen = mb_clen (cur - i, end - (cur - i), &mbs);
+              if (i < clen && clen < (size_t) -2)
+                {
+                  p0 = cur - i;
+                  p = p0 + clen;
+                }
+              break;
+            }
+    }
+  else
+    {
+      mbstate_t mbs = { 0 };
+      do
+        {
+          clen = mb_clen (p, end - p, &mbs);
+
+          if ((size_t) -2 <= clen)
+            {
+              /* An invalid sequence, or a truncated multibyte character.
+                 Treat it as a single byte character.  */
+              clen = 1;
+              memset (&mbs, 0, sizeof mbs);
+            }
+          p0 = p;
+          p += clen;
+        }
+      while (p < cur);
+    }
+
+  *mb_start = p;
+  if (mbclen)
+    *mbclen = clen;
+  return p == cur ? 0 : cur - p0;
+}
+
+/* Examine the start of BUF (which goes to END) for word constituents.
+   If COUNTALL, examine as many as possible; otherwise, examine at most one.
+   Return the total number of bytes in the examined characters.  */
+static size_t
+wordchars_count (char const *buf, char const *end, bool countall)
+{
+  size_t n = 0;
+  mbstate_t mbs = { 0 };
+  while (n < end - buf)
+    {
+      unsigned char b = buf[n];
+      if (sbwordchar[b])
+        n++;
+      else if (localeinfo.sbclen[b] != -2)
+        break;
+      else
+        {
+          wchar_t wc = 0;
+          size_t wcbytes = mbrtowc (&wc, buf + n, end - buf - n, &mbs);
+          if (!wordchar (wc))
+            break;
+          n += wcbytes + !wcbytes;
+        }
+      if (!countall)
+        break;
+    }
+  return n;
+}
+
+/* Examine the start of BUF for the longest prefix containing just
+   word constituents.  Return the total number of bytes in the prefix.
+   The buffer ends at END.  */
+size_t
+wordchars_size (char const *buf, char const *end)
+{
+  return wordchars_count (buf, end, true);
+}
+
+/* If BUF starts with a word constituent, return the number of bytes
+   used to represent it; otherwise, return zero.  The buffer ends at END.  */
+size_t
+wordchar_next (char const *buf, char const *end)
+{
+  return wordchars_count (buf, end, false);
+}
+
+/* In the buffer BUF, return nonzero if the character whose encoding
+   contains the byte before CUR is a word constituent.  The buffer
+   ends at END.  */
+size_t
+wordchar_prev (char const *buf, char const *cur, char const *end)
+{
+  if (buf == cur)
+    return 0;
+  unsigned char b = *--cur;
+  if (! localeinfo.multibyte
+      || (localeinfo.using_utf8 && localeinfo.sbclen[b] == 1))
+    return sbwordchar[b];
+  char const *p = buf;
+  cur -= mb_goback (&p, NULL, cur, end);
+  return wordchar_next (cur, end);
+}