summaryrefslogtreecommitdiffstats
path: root/nspr/pr/src/io/prscanf.c
diff options
context:
space:
mode:
Diffstat (limited to 'nspr/pr/src/io/prscanf.c')
-rw-r--r--nspr/pr/src/io/prscanf.c630
1 files changed, 630 insertions, 0 deletions
diff --git a/nspr/pr/src/io/prscanf.c b/nspr/pr/src/io/prscanf.c
new file mode 100644
index 0000000..9923ea2
--- /dev/null
+++ b/nspr/pr/src/io/prscanf.c
@@ -0,0 +1,630 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Scan functions for NSPR types
+ *
+ * Author: Wan-Teh Chang
+ *
+ * Acknowledgment: The implementation is inspired by the source code
+ * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992.
+ */
+
+#include <limits.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include "prprf.h"
+#include "prdtoa.h"
+#include "prlog.h"
+#include "prerror.h"
+
+/*
+ * A function that reads a character from 'stream'.
+ * Returns the character read, or EOF if end of stream is reached.
+ */
+typedef int (*_PRGetCharFN)(void *stream);
+
+/*
+ * A function that pushes the character 'ch' back to 'stream'.
+ */
+typedef void (*_PRUngetCharFN)(void *stream, int ch);
+
+/*
+ * The size specifier for the integer and floating point number
+ * conversions in format control strings.
+ */
+typedef enum {
+ _PR_size_none, /* No size specifier is given */
+ _PR_size_h, /* The 'h' specifier, suggesting "short" */
+ _PR_size_l, /* The 'l' specifier, suggesting "long" */
+ _PR_size_L, /* The 'L' specifier, meaning a 'long double' */
+ _PR_size_ll /* The 'll' specifier, suggesting "long long" */
+} _PRSizeSpec;
+
+/*
+ * The collection of data that is passed between the scan function
+ * and its subordinate functions. The fields of this structure
+ * serve as the input or output arguments for these functions.
+ */
+typedef struct {
+ _PRGetCharFN get; /* get a character from input stream */
+ _PRUngetCharFN unget; /* unget (push back) a character */
+ void *stream; /* argument for get and unget */
+ va_list ap; /* the variable argument list */
+ int nChar; /* number of characters read from 'stream' */
+
+ PRBool assign; /* assign, or suppress assignment? */
+ int width; /* field width */
+ _PRSizeSpec sizeSpec; /* 'h', 'l', 'L', or 'll' */
+
+ PRBool converted; /* is the value actually converted? */
+} ScanfState;
+
+#define GET(state) ((state)->nChar++, (state)->get((state)->stream))
+#define UNGET(state, ch) \
+ ((state)->nChar--, (state)->unget((state)->stream, ch))
+
+/*
+ * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH,
+ * are always used together.
+ *
+ * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return
+ * value to 'ch' only if we have not exceeded the field width of
+ * 'state'. Therefore, after GET_IF_WITHIN_WIDTH, the value of
+ * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true.
+ */
+
+#define GET_IF_WITHIN_WIDTH(state, ch) \
+ if (--(state)->width >= 0) { \
+ (ch) = GET(state); \
+ }
+#define WITHIN_WIDTH(state) ((state)->width >= 0)
+
+/*
+ * _pr_strtoull:
+ * Convert a string to an unsigned 64-bit integer. The string
+ * 'str' is assumed to be a representation of the integer in
+ * base 'base'.
+ *
+ * Warning:
+ * - Only handle base 8, 10, and 16.
+ * - No overflow checking.
+ */
+
+static PRUint64
+_pr_strtoull(const char *str, char **endptr, int base)
+{
+ static const int BASE_MAX = 16;
+ static const char digits[] = "0123456789abcdef";
+ char *digitPtr;
+ PRUint64 x; /* return value */
+ PRInt64 base64;
+ const char *cPtr;
+ PRBool negative;
+ const char *digitStart;
+
+ PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16);
+ if (base < 0 || base == 1 || base > BASE_MAX) {
+ if (endptr) {
+ *endptr = (char *) str;
+ return LL_ZERO;
+ }
+ }
+
+ cPtr = str;
+ while (isspace(*cPtr)) {
+ ++cPtr;
+ }
+
+ negative = PR_FALSE;
+ if (*cPtr == '-') {
+ negative = PR_TRUE;
+ cPtr++;
+ } else if (*cPtr == '+') {
+ cPtr++;
+ }
+
+ if (base == 16) {
+ if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) {
+ cPtr += 2;
+ }
+ } else if (base == 0) {
+ if (*cPtr != '0') {
+ base = 10;
+ } else if (cPtr[1] == 'x' || cPtr[1] == 'X') {
+ base = 16;
+ cPtr += 2;
+ } else {
+ base = 8;
+ }
+ }
+ PR_ASSERT(base != 0);
+ LL_I2L(base64, base);
+ digitStart = cPtr;
+
+ /* Skip leading zeros */
+ while (*cPtr == '0') {
+ cPtr++;
+ }
+
+ LL_I2L(x, 0);
+ while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) {
+ PRUint64 d;
+
+ LL_I2L(d, (digitPtr - digits));
+ LL_MUL(x, x, base64);
+ LL_ADD(x, x, d);
+ cPtr++;
+ }
+
+ if (cPtr == digitStart) {
+ if (endptr) {
+ *endptr = (char *) str;
+ }
+ return LL_ZERO;
+ }
+
+ if (negative) {
+#ifdef HAVE_LONG_LONG
+ /* The cast to a signed type is to avoid a compiler warning */
+ x = -(PRInt64)x;
+#else
+ LL_NEG(x, x);
+#endif
+ }
+
+ if (endptr) {
+ *endptr = (char *) cPtr;
+ }
+ return x;
+}
+
+/*
+ * The maximum field width (in number of characters) that is enough
+ * (may be more than necessary) to represent a 64-bit integer or
+ * floating point number.
+ */
+#define FMAX 31
+#define DECIMAL_POINT '.'
+
+static PRStatus
+GetInt(ScanfState *state, int code)
+{
+ char buf[FMAX + 1], *p;
+ int ch = 0;
+ static const char digits[] = "0123456789abcdefABCDEF";
+ PRBool seenDigit = PR_FALSE;
+ int base;
+ int dlen;
+
+ switch (code) {
+ case 'd': case 'u':
+ base = 10;
+ break;
+ case 'i':
+ base = 0;
+ break;
+ case 'x': case 'X': case 'p':
+ base = 16;
+ break;
+ case 'o':
+ base = 8;
+ break;
+ default:
+ return PR_FAILURE;
+ }
+ if (state->width == 0 || state->width > FMAX) {
+ state->width = FMAX;
+ }
+ p = buf;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ }
+ if (WITHIN_WIDTH(state) && ch == '0') {
+ seenDigit = PR_TRUE;
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ if (WITHIN_WIDTH(state)
+ && (ch == 'x' || ch == 'X')
+ && (base == 0 || base == 16)) {
+ base = 16;
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ } else if (base == 0) {
+ base = 8;
+ }
+ }
+ if (base == 0 || base == 10) {
+ dlen = 10;
+ } else if (base == 8) {
+ dlen = 8;
+ } else {
+ PR_ASSERT(base == 16);
+ dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */
+ }
+ while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) {
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ seenDigit = PR_TRUE;
+ }
+ if (WITHIN_WIDTH(state)) {
+ UNGET(state, ch);
+ }
+ if (!seenDigit) {
+ return PR_FAILURE;
+ }
+ *p = '\0';
+ if (state->assign) {
+ if (code == 'd' || code == 'i') {
+ if (state->sizeSpec == _PR_size_ll) {
+ PRInt64 llval = _pr_strtoull(buf, NULL, base);
+ *va_arg(state->ap, PRInt64 *) = llval;
+ } else {
+ long lval = strtol(buf, NULL, base);
+
+ if (state->sizeSpec == _PR_size_none) {
+ *va_arg(state->ap, PRIntn *) = lval;
+ } else if (state->sizeSpec == _PR_size_h) {
+ *va_arg(state->ap, PRInt16 *) = (PRInt16)lval;
+ } else if (state->sizeSpec == _PR_size_l) {
+ *va_arg(state->ap, PRInt32 *) = lval;
+ } else {
+ return PR_FAILURE;
+ }
+ }
+ } else {
+ if (state->sizeSpec == _PR_size_ll) {
+ PRUint64 llval = _pr_strtoull(buf, NULL, base);
+ *va_arg(state->ap, PRUint64 *) = llval;
+ } else {
+ unsigned long lval = strtoul(buf, NULL, base);
+
+ if (state->sizeSpec == _PR_size_none) {
+ *va_arg(state->ap, PRUintn *) = lval;
+ } else if (state->sizeSpec == _PR_size_h) {
+ *va_arg(state->ap, PRUint16 *) = (PRUint16)lval;
+ } else if (state->sizeSpec == _PR_size_l) {
+ *va_arg(state->ap, PRUint32 *) = lval;
+ } else {
+ return PR_FAILURE;
+ }
+ }
+ }
+ state->converted = PR_TRUE;
+ }
+ return PR_SUCCESS;
+}
+
+static PRStatus
+GetFloat(ScanfState *state)
+{
+ char buf[FMAX + 1], *p;
+ int ch = 0;
+ PRBool seenDigit = PR_FALSE;
+
+ if (state->width == 0 || state->width > FMAX) {
+ state->width = FMAX;
+ }
+ p = buf;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ }
+ while (WITHIN_WIDTH(state) && isdigit(ch)) {
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ seenDigit = PR_TRUE;
+ }
+ if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) {
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ while (WITHIN_WIDTH(state) && isdigit(ch)) {
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ seenDigit = PR_TRUE;
+ }
+ }
+
+ /*
+ * This is not robust. For example, "1.2e+" would confuse
+ * the code below to read 'e' and '+', only to realize that
+ * it should have stopped at "1.2". But we can't push back
+ * more than one character, so there is nothing I can do.
+ */
+
+ /* Parse exponent */
+ if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) {
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ }
+ while (WITHIN_WIDTH(state) && isdigit(ch)) {
+ *p++ = ch;
+ GET_IF_WITHIN_WIDTH(state, ch);
+ }
+ }
+ if (WITHIN_WIDTH(state)) {
+ UNGET(state, ch);
+ }
+ if (!seenDigit) {
+ return PR_FAILURE;
+ }
+ *p = '\0';
+ if (state->assign) {
+ PRFloat64 dval = PR_strtod(buf, NULL);
+
+ state->converted = PR_TRUE;
+ if (state->sizeSpec == _PR_size_l) {
+ *va_arg(state->ap, PRFloat64 *) = dval;
+ } else if (state->sizeSpec == _PR_size_L) {
+ *va_arg(state->ap, long double *) = dval;
+ } else {
+ *va_arg(state->ap, float *) = (float) dval;
+ }
+ }
+ return PR_SUCCESS;
+}
+
+/*
+ * Convert, and return the end of the conversion spec.
+ * Return NULL on error.
+ */
+
+static const char *
+Convert(ScanfState *state, const char *fmt)
+{
+ const char *cPtr;
+ int ch;
+ char *cArg = NULL;
+
+ state->converted = PR_FALSE;
+ cPtr = fmt;
+ if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') {
+ do {
+ ch = GET(state);
+ } while (isspace(ch));
+ UNGET(state, ch);
+ }
+ switch (*cPtr) {
+ case 'c':
+ if (state->assign) {
+ cArg = va_arg(state->ap, char *);
+ }
+ if (state->width == 0) {
+ state->width = 1;
+ }
+ for (; state->width > 0; state->width--) {
+ ch = GET(state);
+ if (ch == EOF) {
+ return NULL;
+ }
+ if (state->assign) {
+ *cArg++ = ch;
+ }
+ }
+ if (state->assign) {
+ state->converted = PR_TRUE;
+ }
+ break;
+ case 'p':
+ case 'd': case 'i': case 'o':
+ case 'u': case 'x': case 'X':
+ if (GetInt(state, *cPtr) == PR_FAILURE) {
+ return NULL;
+ }
+ break;
+ case 'e': case 'E': case 'f':
+ case 'g': case 'G':
+ if (GetFloat(state) == PR_FAILURE) {
+ return NULL;
+ }
+ break;
+ case 'n':
+ /* do not consume any input */
+ if (state->assign) {
+ switch (state->sizeSpec) {
+ case _PR_size_none:
+ *va_arg(state->ap, PRIntn *) = state->nChar;
+ break;
+ case _PR_size_h:
+ *va_arg(state->ap, PRInt16 *) = state->nChar;
+ break;
+ case _PR_size_l:
+ *va_arg(state->ap, PRInt32 *) = state->nChar;
+ break;
+ case _PR_size_ll:
+ LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar);
+ break;
+ default:
+ PR_ASSERT(0);
+ }
+ }
+ break;
+ case 's':
+ if (state->width == 0) {
+ state->width = INT_MAX;
+ }
+ if (state->assign) {
+ cArg = va_arg(state->ap, char *);
+ }
+ for (; state->width > 0; state->width--) {
+ ch = GET(state);
+ if ((ch == EOF) || isspace(ch)) {
+ UNGET(state, ch);
+ break;
+ }
+ if (state->assign) {
+ *cArg++ = ch;
+ }
+ }
+ if (state->assign) {
+ *cArg = '\0';
+ state->converted = PR_TRUE;
+ }
+ break;
+ case '%':
+ ch = GET(state);
+ if (ch != '%') {
+ UNGET(state, ch);
+ return NULL;
+ }
+ break;
+ case '[':
+ {
+ PRBool complement = PR_FALSE;
+ const char *closeBracket;
+ size_t n;
+
+ if (*++cPtr == '^') {
+ complement = PR_TRUE;
+ cPtr++;
+ }
+ closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']');
+ if (closeBracket == NULL) {
+ return NULL;
+ }
+ n = closeBracket - cPtr;
+ if (state->width == 0) {
+ state->width = INT_MAX;
+ }
+ if (state->assign) {
+ cArg = va_arg(state->ap, char *);
+ }
+ for (; state->width > 0; state->width--) {
+ ch = GET(state);
+ if ((ch == EOF)
+ || (!complement && !memchr(cPtr, ch, n))
+ || (complement && memchr(cPtr, ch, n))) {
+ UNGET(state, ch);
+ break;
+ }
+ if (state->assign) {
+ *cArg++ = ch;
+ }
+ }
+ if (state->assign) {
+ *cArg = '\0';
+ state->converted = PR_TRUE;
+ }
+ cPtr = closeBracket;
+ }
+ break;
+ default:
+ return NULL;
+ }
+ return cPtr;
+}
+
+static PRInt32
+DoScanf(ScanfState *state, const char *fmt)
+{
+ PRInt32 nConverted = 0;
+ const char *cPtr;
+ int ch;
+
+ state->nChar = 0;
+ cPtr = fmt;
+ while (1) {
+ if (isspace(*cPtr)) {
+ /* white space: skip */
+ do {
+ cPtr++;
+ } while (isspace(*cPtr));
+ do {
+ ch = GET(state);
+ } while (isspace(ch));
+ UNGET(state, ch);
+ } else if (*cPtr == '%') {
+ /* format spec: convert */
+ cPtr++;
+ state->assign = PR_TRUE;
+ if (*cPtr == '*') {
+ cPtr++;
+ state->assign = PR_FALSE;
+ }
+ for (state->width = 0; isdigit(*cPtr); cPtr++) {
+ state->width = state->width * 10 + *cPtr - '0';
+ }
+ state->sizeSpec = _PR_size_none;
+ if (*cPtr == 'h') {
+ cPtr++;
+ state->sizeSpec = _PR_size_h;
+ } else if (*cPtr == 'l') {
+ cPtr++;
+ if (*cPtr == 'l') {
+ cPtr++;
+ state->sizeSpec = _PR_size_ll;
+ } else {
+ state->sizeSpec = _PR_size_l;
+ }
+ } else if (*cPtr == 'L') {
+ cPtr++;
+ state->sizeSpec = _PR_size_L;
+ }
+ cPtr = Convert(state, cPtr);
+ if (cPtr == NULL) {
+ return (nConverted > 0 ? nConverted : EOF);
+ }
+ if (state->converted) {
+ nConverted++;
+ }
+ cPtr++;
+ } else {
+ /* others: must match */
+ if (*cPtr == '\0') {
+ return nConverted;
+ }
+ ch = GET(state);
+ if (ch != *cPtr) {
+ UNGET(state, ch);
+ return nConverted;
+ }
+ cPtr++;
+ }
+ }
+}
+
+static int
+StringGetChar(void *stream)
+{
+ char *cPtr = *((char **) stream);
+
+ if (*cPtr == '\0') {
+ return EOF;
+ }
+ *((char **) stream) = cPtr + 1;
+ return (unsigned char) *cPtr;
+}
+
+static void
+StringUngetChar(void *stream, int ch)
+{
+ char *cPtr = *((char **) stream);
+
+ if (ch != EOF) {
+ *((char **) stream) = cPtr - 1;
+ }
+}
+
+PR_IMPLEMENT(PRInt32)
+PR_sscanf(const char *buf, const char *fmt, ...)
+{
+ PRInt32 rv;
+ ScanfState state;
+
+ state.get = &StringGetChar;
+ state.unget = &StringUngetChar;
+ state.stream = (void *) &buf;
+ va_start(state.ap, fmt);
+ rv = DoScanf(&state, fmt);
+ va_end(state.ap);
+ return rv;
+}