Adding upstream version 1.23.0.upstream/1.23.0 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-15 19:44:05 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-15 19:44:05 +0000
commit: d318611dd6f23fcfedd50e9b9e24620b102ba96a (patch)
tree: 8b9eef82ca40fdd5a8deeabf07572074c236095d /src/libs/libbib
parent: Initial commit. (diff)
download: groff-d318611dd6f23fcfedd50e9b9e24620b102ba96a.tar.xz
groff-d318611dd6f23fcfedd50e9b9e24620b102ba96a.zip
6 files changed, 1488 insertions, 0 deletions
diff --git a/src/libs/libbib/common.cpp b/src/libs/libbib/common.cpp
new file mode 100644
index 0000000..05accd8
--- /dev/null
+++ b/src/libs/libbib/common.cpp
@@ -0,0 +1,37 @@
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+unsigned hash(const char *s, int len)
+{
+#if 0
+  unsigned h = 0, g;
+  while (*s != '\0') {
+    h <<= 4;
+    h += *s++;
+    if ((g = h & 0xf0000000) != 0) {
+      h ^= g >> 24;
+      h ^= g;
+    }
+  }
+#endif
+  unsigned h = 0;
+  while (--len >= 0)
+    h = *s++ + 65587*h;
+  return h;
+}
+
diff --git a/src/libs/libbib/index.cpp b/src/libs/libbib/index.cpp
new file mode 100644
index 0000000..aebcbbf
--- /dev/null
+++ b/src/libs/libbib/index.cpp
@@ -0,0 +1,688 @@
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "lib.h"
+
+#include <stdlib.h>
+#include <errno.h>
+
+#include "posix.h"
+#include "cset.h"
+#include "cmap.h"
+#include "errarg.h"
+#include "error.h"
+
+#include "refid.h"
+#include "search.h"
+#include "index.h"
+#include "defs.h"
+
+#include "nonposix.h"
+
+// Interface to mmap.
+extern "C" {
+  void *mapread(int fd, int len);
+  int unmap(void *, int len);
+}
+
+#if 0
+const 
+#endif
+int minus_one = -1;
+
+bool do_verify = false;
+
+struct word_list;
+
+class index_search_item : public search_item {
+  search_item *out_of_date_files;
+  index_header header;
+  char *buffer;
+  void *map_addr;
+  int map_len;
+  tag *tags;
+  int *table;
+  int *lists;
+  char *pool;
+  char *key_buffer;
+  char *filename_buffer;
+  int filename_buflen;
+  char **common_words_table;
+  int common_words_table_size;
+  const char *ignore_fields;
+  time_t mtime;
+
+  const char *get_invalidity_reason();
+  const int *search1(const char **pp, const char *end);
+  const int *search(const char *ptr, int length, int **temp_listp);
+  const char *munge_filename(const char *);
+  void read_common_words_file();
+  void add_out_of_date_file(int fd, const char *filename, int fid);
+public:
+  index_search_item(const char *, int);
+  ~index_search_item();
+  const char *check_header(index_header *, unsigned);
+  bool load(int fd);
+  search_item_iterator *make_search_item_iterator(const char *);
+  bool is_valid();
+  void check_files();
+  int next_filename_id() const;
+  friend class index_search_item_iterator;
+};
+
+class index_search_item_iterator : public search_item_iterator {
+  index_search_item *indx;
+  search_item_iterator *out_of_date_files_iter;
+  search_item *next_out_of_date_file;
+  const int *found_list;
+  int *temp_list;
+  char *buf;
+  int buflen;
+  linear_searcher searcher;
+  char *query;
+  int get_tag(int tagno, const linear_searcher &, const char **, int *,
+	      reference_id *);
+public:
+  index_search_item_iterator(index_search_item *, const char *);
+  ~index_search_item_iterator();
+  int next(const linear_searcher &, const char **, int *, reference_id *);
+};
+
+
+index_search_item::index_search_item(const char *filename, int fid)
+: search_item(filename, fid), out_of_date_files(0), buffer(0), map_addr(0),
+  map_len(0), key_buffer(0), filename_buffer(0), filename_buflen(0),
+  common_words_table(0)
+{
+}
+
+index_search_item::~index_search_item()
+{
+  if (buffer)
+    free(buffer);
+  if (map_addr) {
+    if (unmap(map_addr, map_len) < 0)
+      error("unmap: %1", strerror(errno));
+  }
+  while (out_of_date_files) {
+    search_item *tem = out_of_date_files;
+    out_of_date_files = out_of_date_files->next;
+    delete tem;
+  }
+  delete[] filename_buffer;
+  delete[] key_buffer;
+  if (common_words_table) {
+    for (int i = 0; i < common_words_table_size; i++)
+      delete[] common_words_table[i];
+    delete[] common_words_table;
+  }
+}
+
+class file_closer {
+  int *fdp;
+public:
+  file_closer(int &fd) : fdp(&fd) { }
+  ~file_closer() { close(*fdp); }
+};
+
+// Tell the compiler that a variable is intentionally unused.
+inline void unused(void *) { }
+
+// Validate the data reported in the header so that we don't overread on
+// the heap in the load() member function.  Return null pointer if no
+// problems are detected.
+const char *index_search_item::check_header(index_header *file_header,
+					    unsigned file_size)
+{
+  if (file_header->tags_size < 0)
+    return "tag list length negative";
+  if (file_header->lists_size < 0)
+    return "reference list length negative";
+  // The table and string pool sizes will not be zero, even in an empty
+  // index.
+  if (file_header->table_size < 1)
+    return "table size nonpositive";
+  if (file_header->strings_size < 1)
+    return "string pool size nonpositive";
+  size_t sz = (file_header->tags_size * sizeof(tag)
+	       + file_header->lists_size * sizeof(int)
+	       + file_header->table_size * sizeof(int)
+	       + file_header->strings_size
+	       + sizeof(file_header));
+  if (sz != file_size)
+    return("size mismatch between header and data");
+  unsigned size_remaining = file_size;
+  unsigned chunk_size = file_header->tags_size * sizeof(tag);
+  if (chunk_size > size_remaining)
+    return "claimed tag list length exceeds file size";
+  size_remaining -= chunk_size;
+  chunk_size = file_header->lists_size * sizeof(int);
+  if (chunk_size > size_remaining)
+    return "claimed reference list length exceeds file size";
+  size_remaining -= chunk_size;
+  chunk_size = file_header->table_size * sizeof(int);
+  if (chunk_size > size_remaining)
+    return "claimed table size exceeds file size";
+  size_remaining -= chunk_size;
+  chunk_size = file_header->strings_size;
+  if (chunk_size > size_remaining)
+    return "claimed string pool size exceeds file size";
+  return 0;
+}
+
+bool index_search_item::load(int fd)
+{
+  file_closer fd_closer(fd);	// close fd on return
+  unused(&fd_closer);
+  struct stat sb;
+  if (fstat(fd, &sb) < 0) {
+    error("can't fstat index '%1': %2", name, strerror(errno));
+    return false;
+  }
+  if (!S_ISREG(sb.st_mode)) {
+    error("index '%1' is not a regular file", name);
+    return false;
+  }
+  mtime = sb.st_mtime;
+  unsigned size = unsigned(sb.st_size); // widening conversion
+  if (size == 0) {
+    error("index '%1' is an empty file", name);
+    return false;
+  }
+  char *addr;
+  map_addr = mapread(fd, size);
+  if (map_addr) {
+    addr = (char *)map_addr;
+    map_len = size;
+  }
+  else {
+    addr = buffer = (char *)malloc(size);
+    if (buffer == 0) {
+      error("can't allocate memory to process index '%1'", name);
+      return false;
+    }
+    char *ptr = buffer;
+    int bytes_to_read = size;
+    while (bytes_to_read > 0) {
+      int nread = read(fd, ptr, bytes_to_read);
+      if (nread == 0) {
+	error("unexpected end-of-file while reading index '%1'", name);
+	return false;
+      }
+      if (nread < 0) {
+	error("read error on index '%1': %2", name, strerror(errno));
+	return false;
+      }
+      bytes_to_read -= nread;
+      ptr += nread;
+    }
+  }
+  header = *(index_header *)addr;
+  if (header.magic != INDEX_MAGIC) {
+    error("'%1' is not an index file: wrong magic number", name);
+    return false;
+  }
+  if (header.version != INDEX_VERSION) {
+    error("version number in index '%1' is wrong: was %2, should be %3",
+	  name, header.version, INDEX_VERSION);
+    return false;
+  }
+  const char *problem = check_header(&header, size);
+  if (problem != 0) {
+    if (do_verify)
+      error("corrupt header in index file '%1': %2", name, problem);
+    else
+      error("corrupt header in index file '%1'", name);
+    return false;
+  }
+  tags = (tag *)(addr + sizeof(header));
+  lists = (int *)(tags + header.tags_size);
+  table = (int *)(lists + header.lists_size);
+  pool = (char *)(table + header.table_size);
+  ignore_fields = strchr(strchr(pool, '\0') + 1, '\0') + 1;
+  key_buffer = new char[header.truncate];
+  read_common_words_file();
+  return true;
+}
+
+const char *index_search_item::get_invalidity_reason()
+{
+  if (tags == 0)
+    return "not loaded";
+  if ((header.lists_size > 0) && (lists[header.lists_size - 1] >= 0))
+    return "last list element not negative";
+  int i;
+  for (i = 0; i < header.table_size; i++) {
+    int li = table[i];
+    if (li >= header.lists_size)
+      return "bad list index";
+    if (li >= 0) {
+      for (int *ptr = lists + li; *ptr >= 0; ptr++) {
+	if (*ptr >= header.tags_size)
+	  return "bad tag index";
+	if (*ptr >= ptr[1] && ptr[1] >= 0)
+	  return "list not ordered";
+      }
+    }
+  }
+  for (i = 0; i < header.tags_size; i++) {
+    if (tags[i].filename_index >= header.strings_size)
+      return "bad index in tags";
+    if (tags[i].length < 0)
+      return "bad length in tags";
+    if (tags[i].start < 0)
+      return "bad start in tags";
+  }
+  if (pool[header.strings_size - 1] != '\0')
+    return "last character in string pool is not null";
+  return 0;
+}
+
+bool index_search_item::is_valid()
+{
+  const char *reason = get_invalidity_reason();
+  if (!reason)
+    return true;
+  error("'%1' is bad: %2", name, reason);
+  return false;
+}
+
+int index_search_item::next_filename_id() const
+{
+  return filename_id + header.strings_size + 1;
+}
+
+search_item_iterator *index_search_item::make_search_item_iterator(
+  const char *query)
+{
+  return new index_search_item_iterator(this, query);
+}
+
+search_item *make_index_search_item(const char *filename, int fid)
+{
+  char *index_filename = new char[strlen(filename) + sizeof(INDEX_SUFFIX)];
+  strcpy(index_filename, filename);
+  strcat(index_filename, INDEX_SUFFIX);
+  int fd = open(index_filename, O_RDONLY | O_BINARY);
+  if (fd < 0)
+    return 0;
+  index_search_item *item = new index_search_item(index_filename, fid);
+  delete[] index_filename;
+  if (!item->load(fd)) {
+    close(fd);
+    delete item;
+    return 0;
+  }
+  else if (do_verify && !item->is_valid()) {
+    delete item;
+    return 0;
+  }
+  else {
+    item->check_files();
+    return item;
+  }
+}
+
+
+index_search_item_iterator::index_search_item_iterator(index_search_item *ind,
+						       const char *q)
+: indx(ind), out_of_date_files_iter(0), next_out_of_date_file(0), temp_list(0),
+  buf(0), buflen(0),
+  searcher(q, strlen(q), ind->ignore_fields, ind->header.truncate),
+  query(strsave(q))
+{
+  found_list = indx->search(q, strlen(q), &temp_list);
+  if (!found_list) {
+    found_list = &minus_one;
+    warning("all keys would have been discarded in constructing index '%1'",
+	    indx->name);
+  }
+}
+
+index_search_item_iterator::~index_search_item_iterator()
+{
+  delete[] temp_list;
+  delete[] buf;
+  delete[] query;
+  delete out_of_date_files_iter;
+}
+
+int index_search_item_iterator::next(const linear_searcher &,
+				     const char **pp, int *lenp,
+				     reference_id *ridp)
+{
+  if (found_list) {
+    for (;;) {
+      int tagno = *found_list;
+      if (tagno == -1)
+	break;
+      found_list++;
+      if (get_tag(tagno, searcher, pp, lenp, ridp))
+	return 1;
+    }
+    found_list = 0;
+    next_out_of_date_file = indx->out_of_date_files;
+  }
+  while (next_out_of_date_file) {
+    if (out_of_date_files_iter == 0)
+      out_of_date_files_iter
+	= next_out_of_date_file->make_search_item_iterator(query);
+    if (out_of_date_files_iter->next(searcher, pp, lenp, ridp))
+      return 1;
+    delete out_of_date_files_iter;
+    out_of_date_files_iter = 0;
+    next_out_of_date_file = next_out_of_date_file->next;
+  }
+  return 0;
+}
+
+int index_search_item_iterator::get_tag(int tagno,
+					const linear_searcher &searchr,
+					const char **pp, int *lenp,
+					reference_id *ridp)
+{
+  if (tagno < 0 || tagno >= indx->header.tags_size) {
+    error("bad tag number");
+    return 0;
+  }
+  tag *tp = indx->tags + tagno;
+  const char *filename = indx->munge_filename(indx->pool + tp->filename_index);
+  int fd = open(filename, O_RDONLY | O_BINARY);
+  if (fd < 0) {
+    error("can't open '%1': %2", filename, strerror(errno));
+    return 0;
+  }
+  struct stat sb;
+  if (fstat(fd, &sb) < 0) {
+    error("can't fstat: %1", strerror(errno));
+    close(fd);
+    return 0;
+  }
+  time_t mtime = sb.st_mtime;
+  if (mtime > indx->mtime) {
+    indx->add_out_of_date_file(fd, filename,
+			       indx->filename_id + tp->filename_index);
+    return 0;
+  }
+  int res = 0;
+  FILE *fp = fdopen(fd, FOPEN_RB);
+  if (!fp) {
+    error("fdopen failed");
+    close(fd);
+    return 0;
+  }
+  if (tp->start != 0 && fseek(fp, long(tp->start), 0) < 0)
+    error("can't seek on '%1': %2", filename, strerror(errno));
+  else {
+    int length = tp->length;
+    int err = 0;
+    if (length == 0) {
+      if (fstat(fileno(fp), &sb) < 0) {
+	error("can't stat '%1': %2", filename, strerror(errno));
+	err = 1;
+      }
+      else if (!S_ISREG(sb.st_mode)) {
+	error("'%1' is not a regular file", filename);
+	err = 1;
+      }
+      else
+	length = int(sb.st_size);
+    }
+    if (!err) {
+      if (length + 2 > buflen) {
+	delete[] buf;
+	buflen = length + 2;
+	buf = new char[buflen];
+      }
+      if (fread(buf + 1, 1, length, fp) != (size_t)length)
+	error("fread on '%1' failed: %2", filename, strerror(errno));
+      else {
+	buf[0] = '\n';
+	// Remove the CR characters from CRLF pairs.
+	int sidx = 1, didx = 1;
+	for ( ; sidx < length + 1; sidx++, didx++)
+	  {
+	    if (buf[sidx] == '\r')
+	      {
+		if (buf[++sidx] != '\n')
+		  buf[didx++] = '\r';
+		else
+		  length--;
+	      }
+	    if (sidx != didx)
+	      buf[didx] = buf[sidx];
+	  }
+	buf[length + 1] = '\n';
+	res = searchr.search(buf + 1, buf + 2 + length, pp, lenp);
+	if (res && ridp)
+	  *ridp = reference_id(indx->filename_id + tp->filename_index,
+			       tp->start);
+      }
+    }
+  }
+  fclose(fp);
+  return res;
+}
+
+const char *index_search_item::munge_filename(const char *filename)
+{
+  if (IS_ABSOLUTE(filename))
+    return filename;
+  const char *cwd = pool;
+  int need_slash = (cwd[0] != 0
+		    && strchr(DIR_SEPS, strchr(cwd, '\0')[-1]) == 0);
+  int len = strlen(cwd) + strlen(filename) + need_slash + 1;
+  if (len > filename_buflen) {
+    delete[] filename_buffer;
+    filename_buflen = len;
+    filename_buffer = new char[len];
+  }
+  strcpy(filename_buffer, cwd);
+  if (need_slash)
+    strcat(filename_buffer, "/");
+  strcat(filename_buffer, filename);
+  return filename_buffer;
+}
+
+const int *index_search_item::search1(const char **pp, const char *end)
+{
+  while (*pp < end && !csalnum(**pp))
+    *pp += 1;
+  if (*pp >= end)
+    return 0;
+  const char *start = *pp;
+  while (*pp < end && csalnum(**pp))
+    *pp += 1;
+  int len = *pp - start;
+  if (len < header.shortest)
+    return 0;
+  if (len > header.truncate)
+    len = header.truncate;
+  int is_number = 1;
+  for (int i = 0; i < len; i++)
+    if (csdigit(start[i]))
+      key_buffer[i] = start[i];
+    else {
+      key_buffer[i] = cmlower(start[i]);
+      is_number = 0;
+    }
+  if (is_number && !(len == 4 && start[0] == '1' && start[1] == '9'))
+    return 0;
+  unsigned hc = hash(key_buffer, len);
+  if (common_words_table) {
+    for (int h = hc % common_words_table_size;
+	 common_words_table[h];
+	 --h) {
+      if (strlen(common_words_table[h]) == (size_t)len
+	  && memcmp(common_words_table[h], key_buffer, len) == 0)
+	return 0;
+      if (h == 0)
+	h = common_words_table_size;
+    }
+  }
+  int li = table[int(hc % header.table_size)];
+  return li < 0 ? &minus_one : lists + li;
+}
+
+static void merge(int *result, const int *s1, const int *s2)
+{
+  for (; *s1 >= 0; s1++) {
+    while (*s2 >= 0 && *s2 < *s1)
+      s2++;
+    if (*s2 == *s1)
+      *result++ = *s2;
+  }
+  *result++ = -1;
+}
+
+const int *index_search_item::search(const char *ptr, int length,
+				     int **temp_listp)
+{
+  const char *end = ptr + length;
+  if (*temp_listp) {
+    delete[] *temp_listp;
+    *temp_listp = 0;
+  }
+  const int *first_list = 0;
+  while (ptr < end && (first_list = search1(&ptr, end)) == 0)
+    ;
+  if (!first_list)
+    return 0;
+  if (*first_list < 0)
+    return first_list;
+  const int *second_list = 0;
+  while (ptr < end && (second_list = search1(&ptr, end)) == 0)
+    ;
+  if (!second_list)
+    return first_list;
+  if (*second_list < 0)
+    return second_list;
+  const int *p;
+  for (p = first_list; *p >= 0; p++)
+    ;
+  int len = p - first_list;
+  for (p = second_list; *p >= 0; p++)
+    ;
+  if (p - second_list < len)
+    len = p - second_list;
+  int *matches = new int[len + 1];
+  merge(matches, first_list, second_list);
+  while (ptr < end) {
+    const int *list = search1(&ptr, end);
+    if (list != 0) {
+      if (*list < 0) {
+	delete[] matches;
+	return list;
+      }
+      merge(matches, matches, list);
+      if (*matches < 0) {
+	delete[] matches;
+	return &minus_one;
+      }
+    }
+  }
+  *temp_listp = matches;
+  return matches;
+}
+
+void index_search_item::read_common_words_file()
+{
+  if (header.common <= 0)
+    return;
+  const char *common_words_file = munge_filename(strchr(pool, '\0') + 1);
+  errno = 0;
+  FILE *fp = fopen(common_words_file, "r");
+  if (!fp) {
+    error("can't open '%1': %2", common_words_file, strerror(errno));
+    return;
+  }
+  common_words_table_size = 2*header.common + 1;
+  while (!is_prime(common_words_table_size))
+    common_words_table_size += 2;
+  common_words_table = new char *[common_words_table_size];
+  for (int i = 0; i < common_words_table_size; i++)
+    common_words_table[i] = 0;
+  int count = 0;
+  int key_len = 0;
+  for (;;) {
+    int c = getc(fp);
+    while (c != EOF && !csalnum(c))
+      c = getc(fp);
+    if (c == EOF)
+      break;
+    do {
+      if (key_len < header.truncate)
+	key_buffer[key_len++] = cmlower(c);
+      c = getc(fp);
+    } while (c != EOF && csalnum(c));
+    if (key_len >= header.shortest) {
+      int h = hash(key_buffer, key_len) % common_words_table_size;
+      while (common_words_table[h]) {
+	if (h == 0)
+	  h = common_words_table_size;
+	--h;
+      }
+      common_words_table[h] = new char[key_len + 1];
+      memcpy(common_words_table[h], key_buffer, key_len);
+      common_words_table[h][key_len] = '\0';
+    }
+    if (++count >= header.common)
+      break;
+    key_len = 0;
+    if (c == EOF)
+      break;
+  }
+  fclose(fp);
+}
+
+void index_search_item::add_out_of_date_file(int fd, const char *filename,
+					     int fid)
+{
+  search_item **pp;
+  for (pp = &out_of_date_files; *pp; pp = &(*pp)->next)
+    if ((*pp)->is_named(filename))
+      return;
+  *pp = make_linear_search_item(fd, filename, fid);
+  warning("'%1' modified since index '%2' created", filename, name);
+}
+
+void index_search_item::check_files()
+{
+  const char *pool_end = pool + header.strings_size;
+  for (const char *ptr = strchr(ignore_fields, '\0') + 1;
+       ptr < pool_end;
+       ptr = strchr(ptr, '\0') + 1) {
+    const char *path = munge_filename(ptr);
+    struct stat sb;
+    if (stat(path, &sb) < 0)
+      error("can't stat '%1': %2", path, strerror(errno));
+    else if (sb.st_mtime > mtime) {
+      int fd = open(path, O_RDONLY | O_BINARY);
+      if (fd < 0)
+	error("can't open '%1': %2", path, strerror(errno));
+      else
+	add_out_of_date_file(fd, path, filename_id + (ptr - pool));
+    }
+  }
+}
+
+// Local Variables:
+// fill-column: 72
+// mode: C++
+// End:
+// vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
diff --git a/src/libs/libbib/libbib.am b/src/libs/libbib/libbib.am
new file mode 100644
index 0000000..00e1d3b
--- /dev/null
+++ b/src/libs/libbib/libbib.am
@@ -0,0 +1,35 @@
+# Automake rules for 'libbib'
+#
+# Copyright (C) 2014-2020 Free Software Foundation, Inc.
+#
+# 'groff' is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# 'groff' is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see
+# <http://www.gnu.org/licenses/gpl-2.0.html>.
+#
+########################################################################
+
+noinst_LIBRARIES += libbib.a
+libbib_a_SOURCES = \
+  src/libs/libbib/common.cpp \
+  src/libs/libbib/index.cpp \
+  src/libs/libbib/linear.cpp \
+  src/libs/libbib/search.cpp \
+  src/libs/libbib/map.c
+src/libs/libbib/index.$(OBJEXT): defs.h
+
+
+# Local Variables:
+# mode: makefile-automake
+# fill-column: 72
+# End:
+# vim: set autoindent filetype=automake textwidth=72:
diff --git a/src/libs/libbib/linear.cpp b/src/libs/libbib/linear.cpp
new file mode 100644
index 0000000..22e11d8
--- /dev/null
+++ b/src/libs/libbib/linear.cpp
@@ -0,0 +1,506 @@
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "lib.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "posix.h"
+#include "errarg.h"
+#include "error.h"
+#include "cset.h"
+#include "cmap.h"
+#include "nonposix.h"
+
+#include "refid.h"
+#include "search.h"
+
+class file_buffer {
+  char *buffer;
+  char *bufend;
+public:
+  file_buffer();
+  ~file_buffer();
+  int load(int fd, const char *filename);
+  const char *get_start() const;
+  const char *get_end() const;
+};
+
+typedef unsigned char uchar;
+
+static uchar map[256];
+static uchar inv_map[256][3];
+
+struct map_init {
+  map_init();
+};
+
+static map_init the_map_init;
+
+map_init::map_init()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    map[i] = csalnum(i) ? cmlower(i) : '\0';
+  for (i = 0; i < 256; i++) {
+    if (cslower(i)) {
+      inv_map[i][0] = i;
+      inv_map[i][1] = cmupper(i);
+      inv_map[i][2] = '\0';
+    }
+    else if (csdigit(i)) {
+      inv_map[i][0] = i;
+      inv_map[i][1] = 0;
+    }
+    else
+      inv_map[i][0] = '\0';
+  }
+}
+
+
+class bmpattern {
+  char *pat;
+  int len;
+  int delta[256];
+public:
+  bmpattern(const char *pattern, int pattern_length);
+  ~bmpattern();
+  const char *search(const char *p, const char *end) const;
+  int length() const;
+};
+
+bmpattern::bmpattern(const char *pattern, int pattern_length)
+: len(pattern_length)
+{
+  pat = new char[len];
+  int i;
+  for (i = 0; i < len; i++)
+    pat[i] = map[uchar(pattern[i])];
+  for (i = 0; i < 256; i++)
+    delta[i] = len;
+  for (i = 0; i < len; i++)
+    for (const unsigned char *inv = inv_map[uchar(pat[i])]; *inv; inv++)
+      delta[*inv] = len - i - 1;
+}
+
+const char *bmpattern::search(const char *buf, const char *end) const
+{
+  int buflen = end - buf;
+  if (len > buflen)
+    return 0;
+  const char *strend;
+  if (buflen > len*4)
+    strend = end - len*4;
+  else
+    strend = buf;
+  const char *k = buf + len - 1;
+  const int *del = delta;
+  const char *pattern = pat;
+  for (;;) {
+    while (k < strend) {
+      int t = del[uchar(*k)];
+      if (!t)
+	break;
+      k += t;
+      k += del[uchar(*k)];
+      k += del[uchar(*k)];
+    }
+    while (k < end && del[uchar(*k)] != 0)
+      k++;
+    if (k == end)
+      break;
+    int j = len - 1;
+    const char *s = k;
+    for (;;) {
+      if (j == 0)
+	return s;
+      if (map[uchar(*--s)] != uchar(pattern[--j]))
+	break;
+    }
+    k++;
+  }
+  return 0;
+}
+
+bmpattern::~bmpattern()
+{
+  delete[] pat;
+}
+
+inline int bmpattern::length() const
+{
+  return len;
+}
+
+
+static const char *find_end(const char *bufend, const char *p);
+
+const char *linear_searcher::search_and_check(const bmpattern *key,
+  const char *buf, const char *bufend, const char **start) const
+{
+  assert(buf[-1] == '\n');
+  assert(bufend[-1] == '\n');
+  const char *ptr = buf;
+  for (;;) {
+    const char *found = key->search(ptr, bufend);
+    if (!found)
+      break;
+    if (check_match(buf, bufend, found, key->length(), &ptr, start))
+      return found;
+  }
+  return 0;
+}
+
+static const char *skip_field(const char *end, const char *p)
+{
+  for (;;)
+    if (*p++ == '\n') {
+      if (p == end || *p == '%')
+	break;
+      const char *q;
+      for (q = p; *q == ' ' || *q == '\t'; q++)
+	;
+      if (*q == '\n')
+	break;
+      p = q + 1;
+    }
+  return p;
+}
+
+static const char *find_end(const char *bufend, const char *p)
+{
+  for (;;)
+    if (*p++ == '\n') {
+      if (p == bufend)
+	break;
+      const char *q;
+      for (q = p; *q == ' ' || *q == '\t'; q++)
+	;
+      if (*q == '\n')
+	break;
+      p = q + 1;
+    }
+  return p;
+}
+
+
+int linear_searcher::check_match(const char *buf, const char *bufend,
+				 const char *match, int matchlen,
+				 const char **cont, const char **start) const
+{
+  *cont = match + 1;
+  // The user is required to supply only the first truncate_len characters
+  // of the key.  If truncate_len <= 0, he must supply all the key.
+  if ((truncate_len <= 0 || matchlen < truncate_len)
+      && map[uchar(match[matchlen])] != '\0')
+    return 0;
+
+  // The character before the match must not be an alphanumeric
+  // character (unless the alphanumeric character follows one or two
+  // percent characters at the beginning of the line), nor must it be
+  // a percent character at the beginning of a line, nor a percent
+  // character following a percent character at the beginning of a
+  // line.
+
+  switch (match - buf) {
+  case 0:
+    break;
+  case 1:
+    if (match[-1] == '%' || map[uchar(match[-1])] != '\0')
+      return 0;
+    break;
+  case 2:
+    if (map[uchar(match[-1])] != '\0' && match[-2] != '%')
+      return 0;
+    if (match[-1] == '%'
+	&& (match[-2] == '\n' || match[-2] == '%'))
+      return 0;
+    break;
+  default:
+    if (map[uchar(match[-1])] != '\0'
+	&& !(match[-2] == '%'
+	     && (match[-3] == '\n'
+		 || (match[-3] == '%' && match[-4] == '\n'))))
+      return 0;
+    if (match[-1] == '%'
+	&& (match[-2] == '\n'
+	    || (match[-2] == '%' && match[-3] == '\n')))
+      return 0;
+  }
+    
+  const char *p = match;
+  int had_percent = 0;
+  for (;;) {
+    if (*p == '\n') {
+      if (!had_percent && p[1] == '%') {
+	if (p[2] != '\0' && strchr(ignore_fields, p[2]) != 0) {
+	  *cont = skip_field(bufend, match + matchlen);
+	  return 0;
+	}
+	if (!start)
+	  break;
+	had_percent = 1;
+      }
+      if (p <= buf) {
+	if (start)
+	  *start = p + 1;
+	return 1;
+      }
+      const char *q;
+      for (q = p - 1; *q == ' ' || *q == '\t'; q--)
+	;
+      if (*q == '\n') {
+	if (start)
+	  *start = p + 1;
+	break;
+      }
+      p = q;
+    }
+    p--;
+  }
+  return 1;
+}
+
+file_buffer::file_buffer()
+: buffer(0), bufend(0)
+{
+}
+
+file_buffer::~file_buffer()
+{
+  delete[] buffer;
+}
+
+const char *file_buffer::get_start() const
+{
+  return buffer ? buffer + 4 : 0;
+}
+
+const char *file_buffer::get_end() const
+{
+  return bufend;
+}
+
+int file_buffer::load(int fd, const char *filename)
+{
+  struct stat sb;
+  if (fstat(fd, &sb) < 0)
+    error("can't fstat '%1': %2", filename, strerror(errno));
+  else if (!S_ISREG(sb.st_mode))
+    error("'%1' is not a regular file", filename);
+  else {
+    // We need one character extra at the beginning for an additional newline
+    // used as a sentinel.  We get 4 instead so that the read buffer will be
+    // word-aligned.  This seems to make the read slightly faster.  We also
+    // need one character at the end also for an additional newline used as a
+    // sentinel.
+    int size = int(sb.st_size);
+    buffer = new char[size + 4 + 1];
+    int nread = read(fd, buffer + 4, size);
+    if (nread < 0)
+      error("error reading '%1': %2", filename, strerror(errno));
+    else if (nread != size)
+      error("size of '%1' decreased", filename);
+    else {
+      char c;
+      nread = read(fd, &c, 1);
+      if (nread != 0)
+	error("size of '%1' increased", filename);
+      else if (memchr(buffer + 4, '\0', size < 1024 ? size : 1024) != 0)
+	error("database '%1' is a binary file", filename);
+      else {
+	close(fd);
+	buffer[3] = '\n';
+	int sidx = 4, didx = 4;
+	for ( ; sidx < size + 4; sidx++, didx++)
+	  {
+	    if (buffer[sidx] == '\r')
+	      {
+		if (buffer[++sidx] != '\n')
+		  buffer[didx++] = '\r';
+		else
+		  size--;
+	      }
+	    if (sidx != didx)
+	      buffer[didx] = buffer[sidx];
+	  }
+	bufend = buffer + 4 + size;
+	if (bufend[-1] != '\n')
+	  *bufend++ = '\n';
+	return 1;
+      }
+    }
+    delete[] buffer;
+    buffer = 0;
+  }
+  close(fd);
+  return 0;
+}
+
+linear_searcher::linear_searcher(const char *query, int query_len,
+				 const char *ign, int trunc)
+: ignore_fields(ign), truncate_len(trunc), keys(0), nkeys(0)
+{
+  const char *query_end = query + query_len;
+  int nk = 0;
+  const char *p;
+  for (p = query; p < query_end; p++)
+    if (map[uchar(*p)] != '\0'
+	&& (p[1] == '\0' || map[uchar(p[1])] == '\0'))
+      nk++;
+  if (nk == 0)
+    return;
+  keys = new bmpattern*[nk];
+  p = query;
+  for (;;) {
+    while (p < query_end && map[uchar(*p)] == '\0')
+      p++;
+    if (p == query_end)
+      break;
+    const char *start = p;
+    while (p < query_end && map[uchar(*p)] != '\0')
+      p++;
+    keys[nkeys++] = new bmpattern(start, p - start);
+  }
+  assert(nkeys <= nk);
+  if (nkeys == 0) {
+    delete[] keys;
+    keys = 0;
+  }
+}
+
+linear_searcher::~linear_searcher()
+{
+  for (int i = 0; i < nkeys; i++)
+    delete keys[i];
+  delete[] keys;
+}
+
+int linear_searcher::search(const char *buffer, const char *bufend,
+			    const char **startp, int *lengthp) const
+{
+  assert(bufend - buffer > 0);
+  assert(buffer[-1] == '\n');
+  assert(bufend[-1] == '\n');
+  if (nkeys == 0)
+    return 0;
+  for (;;) {
+    const char *refstart;
+    const char *found = search_and_check(keys[0], buffer, bufend, &refstart);
+    if (!found)
+      break;
+    const char *refend = find_end(bufend, found + keys[0]->length());
+    int i;
+    for (i = 1; i < nkeys; i++)
+      if (!search_and_check(keys[i], refstart, refend))
+	break;
+    if (i >= nkeys) {
+      *startp = refstart;
+      *lengthp = refend - refstart;
+      return 1;
+    }
+    buffer = refend;
+  }
+  return 0;
+}
+
+class linear_search_item : public search_item {
+  file_buffer fbuf;
+public:
+  linear_search_item(const char *filename, int fid);
+  ~linear_search_item();
+  int load(int fd);
+  search_item_iterator *make_search_item_iterator(const char *);
+  friend class linear_search_item_iterator;
+};
+
+class linear_search_item_iterator : public search_item_iterator {
+  linear_search_item *lsi;
+  int pos;
+public:
+  linear_search_item_iterator(linear_search_item *, const char *query);
+  ~linear_search_item_iterator();
+  int next(const linear_searcher &, const char **ptr, int *lenp,
+	   reference_id *ridp);
+};
+
+search_item *make_linear_search_item(int fd, const char *filename, int fid)
+{
+  linear_search_item *item = new linear_search_item(filename, fid);
+  if (!item->load(fd)) {
+    delete item;
+    return 0;
+  }
+  else
+    return item;
+}
+
+linear_search_item::linear_search_item(const char *filename, int fid)
+: search_item(filename, fid)
+{
+}
+
+linear_search_item::~linear_search_item()
+{
+}
+
+int linear_search_item::load(int fd)
+{
+  return fbuf.load(fd, name);
+}
+
+search_item_iterator *linear_search_item::make_search_item_iterator(
+  const char *query)
+{
+  return new linear_search_item_iterator(this, query);
+}
+
+linear_search_item_iterator::linear_search_item_iterator(
+  linear_search_item *p, const char *)
+: lsi(p), pos(0)
+{
+}
+
+linear_search_item_iterator::~linear_search_item_iterator()
+{
+}
+
+int linear_search_item_iterator::next(const linear_searcher &searcher,
+				      const char **startp, int *lengthp,
+				      reference_id *ridp)
+{
+  const char *bufstart = lsi->fbuf.get_start();
+  const char *bufend = lsi->fbuf.get_end();
+  const char *ptr = bufstart + pos;
+  if (ptr < bufend && searcher.search(ptr, bufend, startp, lengthp)) {
+    pos = *startp + *lengthp - bufstart;
+    if (ridp)
+      *ridp = reference_id(lsi->filename_id, *startp - bufstart);
+    return 1;
+  }
+  else
+    return 0;
+}
+
+// Local Variables:
+// fill-column: 72
+// mode: C++
+// End:
+// vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
diff --git a/src/libs/libbib/map.c b/src/libs/libbib/map.c
new file mode 100644
index 0000000..9eae72b
--- /dev/null
+++ b/src/libs/libbib/map.c
@@ -0,0 +1,86 @@
+/* Copyright (C) 1989- 2014  Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <stdlib.h>
+
+#ifdef HAVE_MMAP
+
+#include <sys/types.h>
+#include <sys/mman.h>
+
+/* The Net-2 man pages says that a MAP_FILE flag is required. */
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Prototypes */
+char *mapread(int, int);
+int unmap(char *, int);
+
+char *mapread(int fd, int nbytes)
+{
+  char *p = (char *)mmap((void *)0, (size_t)nbytes, PROT_READ,
+			 MAP_FILE|MAP_PRIVATE, fd, (off_t)0);
+  if (p == MAP_FAILED)
+    return 0;
+  /* mmap() shouldn't return 0 since MAP_FIXED wasn't specified. */
+  if (p == 0)
+    abort();
+  return p;
+}
+
+int unmap(char *p, int len)
+{
+  return munmap((void *)p, len);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#else /* not HAVE_MMAP */
+
+#include <errno.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+char *mapread(int fd, int nbytes)
+{
+  errno = ENODEV;
+  return 0;
+}
+
+int unmap(char *p, int len)
+{
+  errno = EINVAL;
+  return -1;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* not HAVE_MMAP */
diff --git a/src/libs/libbib/search.cpp b/src/libs/libbib/search.cpp
new file mode 100644
index 0000000..10867b6
--- /dev/null
+++ b/src/libs/libbib/search.cpp
@@ -0,0 +1,136 @@
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "lib.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "posix.h"
+#include "errarg.h"
+#include "error.h"
+#include "nonposix.h"
+
+#include "refid.h"
+#include "search.h"
+
+int linear_truncate_len = 6;
+const char *linear_ignore_fields = "XYZ";
+
+search_list::search_list()
+: list(0), niterators(0), next_fid(1)
+{
+}
+
+search_list::~search_list()
+{
+  assert(niterators == 0);
+  while (list) {
+    search_item *tem = list->next;
+    delete list;
+    list = tem;
+  }
+}
+
+void search_list::add_file(const char *filename, int silent)
+{
+  search_item *p = make_index_search_item(filename, next_fid);
+  if (!p) {
+    int fd = open(filename, O_RDONLY | O_BINARY);
+    if (fd < 0) {
+      if (!silent)
+        error("can't open '%1': %2", filename, strerror(errno));
+    }
+    else
+      p = make_linear_search_item(fd, filename, next_fid);
+  }
+  if (p) {
+    search_item **pp;
+    for (pp = &list; *pp; pp = &(*pp)->next)
+      ;
+    *pp = p;
+    next_fid = p->next_filename_id();
+  }
+}
+
+int search_list::nfiles() const
+{
+  int n = 0;
+  for (search_item *ptr = list; ptr; ptr = ptr->next)
+    n++;
+  return n;
+}
+
+search_list_iterator::search_list_iterator(search_list *p, const char *q)
+: list(p), ptr(p->list), iter(0), query(strsave(q)),
+  searcher(q, strlen(q), linear_ignore_fields, linear_truncate_len)
+{
+  list->niterators += 1;
+}
+
+search_list_iterator::~search_list_iterator()
+{
+  list->niterators -= 1;
+  delete[] query;
+  delete iter;
+}
+
+int search_list_iterator::next(const char **pp, int *lenp, reference_id *ridp)
+{
+  while (ptr) {
+    if (iter == 0)
+      iter = ptr->make_search_item_iterator(query);
+    if (iter->next(searcher, pp, lenp, ridp))
+      return 1;
+    delete iter;
+    iter = 0;
+    ptr = ptr->next;
+  }
+  return 0;
+}
+
+search_item::search_item(const char *nm, int fid)
+: name(strsave(nm)), filename_id(fid), next(0)
+{
+}
+
+search_item::~search_item()
+{
+  delete[] name;
+}
+
+int search_item::is_named(const char *nm) const
+{
+  return strcmp(name, nm) == 0;
+}
+
+int search_item::next_filename_id() const
+{
+  return filename_id + 1;
+}
+
+search_item_iterator::~search_item_iterator()
+{
+}
+
+// Local Variables:
+// fill-column: 72
+// mode: C++
+// End:
+// vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-15 19:44:05 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-15 19:44:05 +0000
commit	d318611dd6f23fcfedd50e9b9e24620b102ba96a (patch)
tree	8b9eef82ca40fdd5a8deeabf07572074c236095d /src/libs/libbib
parent	Initial commit. (diff)
download	groff-d318611dd6f23fcfedd50e9b9e24620b102ba96a.tar.xz groff-d318611dd6f23fcfedd50e9b9e24620b102ba96a.zip