diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:21:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:21:29 +0000 |
commit | 29cd838eab01ed7110f3ccb2e8c6a35c8a31dbcc (patch) | |
tree | 63ef546b10a81d461e5cf5ed9e98a68cd7dee1aa /src/grep/src/kwsearch.c | |
parent | Initial commit. (diff) | |
download | kbuild-29cd838eab01ed7110f3ccb2e8c6a35c8a31dbcc.tar.xz kbuild-29cd838eab01ed7110f3ccb2e8c6a35c8a31dbcc.zip |
Adding upstream version 1:0.1.9998svn3589+dfsg.upstream/1%0.1.9998svn3589+dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/grep/src/kwsearch.c')
-rw-r--r-- | src/grep/src/kwsearch.c | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/src/grep/src/kwsearch.c b/src/grep/src/kwsearch.c new file mode 100644 index 0000000..ea18ce1 --- /dev/null +++ b/src/grep/src/kwsearch.c @@ -0,0 +1,240 @@ +/* kwsearch.c - searching subroutines using kwset for grep. + Copyright 1992, 1998, 2000, 2007, 2009-2021 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Written August 1992 by Mike Haertel. */ + +#include <config.h> +#include "search.h" + +/* A compiled -F pattern list. */ + +struct kwsearch +{ + /* The kwset for this pattern list. */ + kwset_t kwset; + + /* The number of user-specified patterns. This is less than + 'kwswords (kwset)' when some extra one-character words have been + appended, one for each troublesome character that will require a + DFA search. */ + ptrdiff_t words; + + /* The user's pattern and its size in bytes. */ + char *pattern; + size_t size; + + /* The user's pattern compiled as a regular expression, + or null if it has not been compiled. */ + void *re; +}; + +/* Compile the -F style PATTERN, containing SIZE bytes that are + followed by '\n'. Return a description of the compiled pattern. */ + +void * +Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) +{ + kwset_t kwset; + char *buf = NULL; + size_t bufalloc = 0; + + kwset = kwsinit (true); + + char const *p = pattern; + do + { + char const *sep = rawmemchr (p, '\n'); + ptrdiff_t len = sep - p; + + if (match_lines) + { + if (eolbyte == '\n' && pattern < p) + p--; + else + { + if (bufalloc < len + 2) + { + free (buf); + bufalloc = len + 2; + buf = x2realloc (NULL, &bufalloc); + buf[0] = eolbyte; + } + memcpy (buf + 1, p, len); + buf[len + 1] = eolbyte; + p = buf; + } + len += 2; + } + kwsincr (kwset, p, len); + + p = sep + 1; + } + while (p <= pattern + size); + + free (buf); + + ptrdiff_t words = kwswords (kwset); + kwsprep (kwset); + + struct kwsearch *kwsearch = xmalloc (sizeof *kwsearch); + kwsearch->kwset = kwset; + kwsearch->words = words; + kwsearch->pattern = pattern; + kwsearch->size = size; + kwsearch->re = NULL; + return kwsearch; +} + +/* Use the compiled pattern VCP to search the buffer BUF of size SIZE. + If found, return the offset of the first match and store its + size into *MATCH_SIZE. If not found, return SIZE_MAX. + If START_PTR is nonnull, start searching there. */ +size_t +Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + char const *start_ptr) +{ + char const *beg, *end, *mb_start; + ptrdiff_t len; + char eol = eolbyte; + struct kwsearch *kwsearch = vcp; + kwset_t kwset = kwsearch->kwset; + bool mb_check = localeinfo.multibyte & !localeinfo.using_utf8 & !match_lines; + bool longest = (mb_check | !!start_ptr | match_words) & !match_lines; + + for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) + { + struct kwsmatch kwsmatch; + ptrdiff_t offset = kwsexec (kwset, beg - match_lines, + buf + size - beg + match_lines, &kwsmatch, + longest); + if (offset < 0) + break; + len = kwsmatch.size - 2 * match_lines; + + size_t mbclen = 0; + if (mb_check + && mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0) + { + /* We have matched a single byte that is not at the beginning of a + multibyte character. mb_goback has advanced MB_START past that + multibyte character. Now, we want to position BEG so that the + next kwsexec search starts there. Thus, to compensate for the + for-loop's BEG++, above, subtract one here. This code is + unusually hard to reach, and exceptionally, let's show how to + trigger it here: + + printf '\203AA\n'|LC_ALL=ja_JP.SHIFT_JIS src/grep -F A + + That assumes the named locale is installed. + Note that your system's shift-JIS locale may have a different + name, possibly including "sjis". */ + beg = mb_start - 1; + continue; + } + beg += offset; + if (!!start_ptr & !match_words) + goto success_in_beg_and_len; + if (match_lines) + { + len += start_ptr == NULL; + goto success_in_beg_and_len; + } + if (! match_words) + goto success; + + /* We need a preceding mb_start pointer. Use the beginning of line + if there is a preceding newline. */ + if (mbclen == 0) + { + char const *nl = memrchr (mb_start, eol, beg - mb_start); + if (nl) + mb_start = nl + 1; + } + + /* Succeed if neither the preceding nor the following character is a + word constituent. If the preceding is not, yet the following + character IS a word constituent, keep trying with shorter matches. */ + if (mbclen > 0 + ? ! wordchar_next (beg - mbclen, buf + size) + : ! wordchar_prev (mb_start, beg, buf + size)) + for (;;) + { + if (! wordchar_next (beg + len, buf + size)) + { + if (start_ptr) + goto success_in_beg_and_len; + else + goto success; + } + if (!start_ptr && !localeinfo.multibyte) + { + if (! kwsearch->re) + { + fgrep_to_grep_pattern (&kwsearch->pattern, &kwsearch->size); + kwsearch->re = GEAcompile (kwsearch->pattern, + kwsearch->size, + RE_SYNTAX_GREP, !!start_ptr); + } + if (beg + len < buf + size) + { + end = rawmemchr (beg + len, eol); + end++; + } + else + end = buf + size; + + if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL) + != (size_t) -1) + goto success_match_words; + beg = end - 1; + break; + } + if (!len) + break; + + struct kwsmatch shorter_match; + if (kwsexec (kwset, beg, --len, &shorter_match, true) != 0) + break; + len = shorter_match.size; + } + + /* No word match was found at BEG. Skip past word constituents, + since they cannot precede the next match and not skipping + them could make things much slower. */ + beg += wordchars_size (beg, buf + size); + mb_start = beg; + } + + return -1; + + success: + if (beg + len < buf + size) + { + end = rawmemchr (beg + len, eol); + end++; + } + else + end = buf + size; + success_match_words: + beg = memrchr (buf, eol, beg - buf); + beg = beg ? beg + 1 : buf; + len = end - beg; + success_in_beg_and_len:; + *match_size = len; + return beg - buf; +} |