summaryrefslogtreecommitdiffstats
path: root/lib/readtokens.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 17:39:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 17:39:29 +0000
commit8ffec2a3aba6f114784e11f89ef1d57a096ae540 (patch)
treeccebcbad06203e8241a8e7249f8e6c478a3682ea /lib/readtokens.c
parentInitial commit. (diff)
downloadcoreutils-8ffec2a3aba6f114784e11f89ef1d57a096ae540.tar.xz
coreutils-8ffec2a3aba6f114784e11f89ef1d57a096ae540.zip
Adding upstream version 8.32.upstream/8.32upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/readtokens.c')
-rw-r--r--lib/readtokens.c195
1 files changed, 195 insertions, 0 deletions
diff --git a/lib/readtokens.c b/lib/readtokens.c
new file mode 100644
index 0000000..b884d81
--- /dev/null
+++ b/lib/readtokens.c
@@ -0,0 +1,195 @@
+/* readtokens.c -- Functions for reading tokens from an input stream.
+
+ Copyright (C) 1990-1991, 1999-2004, 2006, 2009-2020 Free Software
+ Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+ Written by Jim Meyering. */
+
+/* This almost supersedes xreadline stuff -- using delim="\n"
+ gives the same functionality, except that these functions
+ would never return empty lines. */
+
+#include <config.h>
+
+#include "readtokens.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include "xalloc.h"
+
+#if USE_UNLOCKED_IO
+# include "unlocked-io.h"
+#endif
+
+/* Initialize a tokenbuffer. */
+
+void
+init_tokenbuffer (token_buffer *tokenbuffer)
+{
+ tokenbuffer->size = 0;
+ tokenbuffer->buffer = NULL;
+}
+
+typedef size_t word;
+enum { bits_per_word = sizeof (word) * CHAR_BIT };
+
+static bool
+get_nth_bit (size_t n, word const *bitset)
+{
+ return bitset[n / bits_per_word] >> n % bits_per_word & 1;
+}
+
+static void
+set_nth_bit (size_t n, word *bitset)
+{
+ size_t one = 1;
+ bitset[n / bits_per_word] |= one << n % bits_per_word;
+}
+
+/* Read a token from STREAM into TOKENBUFFER.
+ A token is delimited by any of the N_DELIM bytes in DELIM.
+ Upon return, the token is in tokenbuffer->buffer and
+ has a trailing '\0' instead of any original delimiter.
+ The function value is the length of the token not including
+ the final '\0'. Upon EOF (i.e. on the call after the last
+ token is read) or error, return -1 without modifying tokenbuffer.
+ The EOF and error conditions may be distinguished in the caller
+ by testing ferror (STREAM).
+
+ This function works properly on lines containing NUL bytes
+ and on files that do not end with a delimiter. */
+
+size_t
+readtoken (FILE *stream,
+ const char *delim,
+ size_t n_delim,
+ token_buffer *tokenbuffer)
+{
+ char *p;
+ int c;
+ size_t i, n;
+ word isdelim[(UCHAR_MAX + bits_per_word) / bits_per_word];
+
+ memset (isdelim, 0, sizeof isdelim);
+ for (i = 0; i < n_delim; i++)
+ {
+ unsigned char ch = delim[i];
+ set_nth_bit (ch, isdelim);
+ }
+
+ /* skip over any leading delimiters */
+ for (c = getc (stream); c >= 0 && get_nth_bit (c, isdelim); c = getc (stream))
+ {
+ /* empty */
+ }
+
+ p = tokenbuffer->buffer;
+ n = tokenbuffer->size;
+ i = 0;
+ for (;;)
+ {
+ if (c < 0 && i == 0)
+ return -1;
+
+ if (i == n)
+ p = x2nrealloc (p, &n, sizeof *p);
+
+ if (c < 0)
+ {
+ p[i] = 0;
+ break;
+ }
+ if (get_nth_bit (c, isdelim))
+ {
+ p[i] = 0;
+ break;
+ }
+ p[i++] = c;
+ c = getc (stream);
+ }
+
+ tokenbuffer->buffer = p;
+ tokenbuffer->size = n;
+ return i;
+}
+
+/* Build a NULL-terminated array of pointers to tokens
+ read from STREAM. Return the number of tokens read.
+ All storage is obtained through calls to xmalloc-like functions.
+
+ %%% Question: is it worth it to do a single
+ %%% realloc() of 'tokens' just before returning? */
+
+size_t
+readtokens (FILE *stream,
+ size_t projected_n_tokens,
+ const char *delim,
+ size_t n_delim,
+ char ***tokens_out,
+ size_t **token_lengths)
+{
+ token_buffer tb, *token = &tb;
+ char **tokens;
+ size_t *lengths;
+ size_t sz;
+ size_t n_tokens;
+
+ if (projected_n_tokens == 0)
+ projected_n_tokens = 64;
+ else
+ projected_n_tokens++; /* add one for trailing NULL pointer */
+
+ sz = projected_n_tokens;
+ tokens = xnmalloc (sz, sizeof *tokens);
+ lengths = xnmalloc (sz, sizeof *lengths);
+
+ n_tokens = 0;
+ init_tokenbuffer (token);
+ for (;;)
+ {
+ char *tmp;
+ size_t token_length = readtoken (stream, delim, n_delim, token);
+ if (n_tokens >= sz)
+ {
+ tokens = x2nrealloc (tokens, &sz, sizeof *tokens);
+ lengths = xnrealloc (lengths, sz, sizeof *lengths);
+ }
+
+ if (token_length == (size_t) -1)
+ {
+ /* don't increment n_tokens for NULL entry */
+ tokens[n_tokens] = NULL;
+ lengths[n_tokens] = 0;
+ break;
+ }
+ tmp = xnmalloc (token_length + 1, sizeof *tmp);
+ lengths[n_tokens] = token_length;
+ tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1);
+ n_tokens++;
+ }
+
+ free (token->buffer);
+ *tokens_out = tokens;
+ if (token_lengths != NULL)
+ *token_lengths = lengths;
+ else
+ free (lengths);
+ return n_tokens;
+}