diff options
Diffstat (limited to 'lib/readtokens.c')
-rw-r--r-- | lib/readtokens.c | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/lib/readtokens.c b/lib/readtokens.c new file mode 100644 index 0000000..8c165a1 --- /dev/null +++ b/lib/readtokens.c @@ -0,0 +1,192 @@ +/* readtokens.c -- Functions for reading tokens from an input stream. + + Copyright (C) 1990-1991, 1999-2004, 2006, 2009-2023 Free Software + Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + + Written by Jim Meyering. */ + +/* This almost supersedes xreadline stuff -- using delim="\n" + gives the same functionality, except that these functions + would never return empty lines. */ + +#include <config.h> + +#include "readtokens.h" + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "xalloc.h" + +#if USE_UNLOCKED_IO +# include "unlocked-io.h" +#endif + +/* Initialize a tokenbuffer. */ + +void +init_tokenbuffer (token_buffer *tokenbuffer) +{ + tokenbuffer->size = 0; + tokenbuffer->buffer = NULL; +} + +typedef size_t word; +enum { bits_per_word = sizeof (word) * CHAR_BIT }; + +static bool +get_nth_bit (size_t n, word const *bitset) +{ + return bitset[n / bits_per_word] >> n % bits_per_word & 1; +} + +static void +set_nth_bit (size_t n, word *bitset) +{ + size_t one = 1; + bitset[n / bits_per_word] |= one << n % bits_per_word; +} + +/* Read a token from STREAM into TOKENBUFFER. + A token is delimited by any of the N_DELIM bytes in DELIM. + Upon return, the token is in tokenbuffer->buffer and + has a trailing '\0' instead of any original delimiter. + The function value is the length of the token not including + the final '\0'. Upon EOF (i.e. on the call after the last + token is read) or error, return -1 without modifying tokenbuffer. + The EOF and error conditions may be distinguished in the caller + by testing ferror (STREAM). + + This function works properly on lines containing NUL bytes + and on files that do not end with a delimiter. */ + +size_t +readtoken (FILE *stream, + const char *delim, + size_t n_delim, + token_buffer *tokenbuffer) +{ + int c; + idx_t i; + word isdelim[(UCHAR_MAX + bits_per_word) / bits_per_word]; + + memset (isdelim, 0, sizeof isdelim); + for (i = 0; i < n_delim; i++) + { + unsigned char ch = delim[i]; + set_nth_bit (ch, isdelim); + } + + /* skip over any leading delimiters */ + for (c = getc (stream); c >= 0 && get_nth_bit (c, isdelim); c = getc (stream)) + { + /* empty */ + } + + char *p = tokenbuffer->buffer; + idx_t n = tokenbuffer->size; + i = 0; + for (;;) + { + if (c < 0 && i == 0) + return -1; + + if (i == n) + p = xpalloc (p, &n, 1, -1, sizeof *p); + + if (c < 0) + { + p[i] = 0; + break; + } + if (get_nth_bit (c, isdelim)) + { + p[i] = 0; + break; + } + p[i++] = c; + c = getc (stream); + } + + tokenbuffer->buffer = p; + tokenbuffer->size = n; + return i; +} + +/* Build a NULL-terminated array of pointers to tokens + read from STREAM. Return the number of tokens read. + All storage is obtained through calls to xmalloc-like functions. + + %%% Question: is it worth it to do a single + %%% realloc() of 'tokens' just before returning? */ + +size_t +readtokens (FILE *stream, + size_t projected_n_tokens, + const char *delim, + size_t n_delim, + char ***tokens_out, + size_t **token_lengths) +{ + token_buffer tb, *token = &tb; + char **tokens; + size_t *lengths; + idx_t sz, n_tokens; + + if (projected_n_tokens == 0) + projected_n_tokens = 64; + else + projected_n_tokens++; /* add one for trailing NULL pointer */ + + sz = projected_n_tokens; + tokens = xnmalloc (sz, sizeof *tokens); + lengths = xnmalloc (sz, sizeof *lengths); + + n_tokens = 0; + init_tokenbuffer (token); + for (;;) + { + char *tmp; + size_t token_length = readtoken (stream, delim, n_delim, token); + if (n_tokens >= sz) + { + tokens = xpalloc (tokens, &sz, 1, -1, sizeof *tokens); + lengths = xreallocarray (lengths, sz, sizeof *lengths); + } + + if (token_length == (size_t) -1) + { + /* don't increment n_tokens for NULL entry */ + tokens[n_tokens] = NULL; + lengths[n_tokens] = 0; + break; + } + tmp = xnmalloc (token_length + 1, sizeof *tmp); + lengths[n_tokens] = token_length; + tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1); + n_tokens++; + } + + free (token->buffer); + *tokens_out = tokens; + if (token_lengths != NULL) + *token_lengths = lengths; + else + free (lengths); + return n_tokens; +} |