/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * Scan functions for NSPR types * * Author: Wan-Teh Chang * * Acknowledgment: The implementation is inspired by the source code * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992. */ #include #include #include #include #include "prprf.h" #include "prdtoa.h" #include "prlog.h" #include "prerror.h" /* * A function that reads a character from 'stream'. * Returns the character read, or EOF if end of stream is reached. */ typedef int (*_PRGetCharFN)(void *stream); /* * A function that pushes the character 'ch' back to 'stream'. */ typedef void (*_PRUngetCharFN)(void *stream, int ch); /* * The size specifier for the integer and floating point number * conversions in format control strings. */ typedef enum { _PR_size_none, /* No size specifier is given */ _PR_size_h, /* The 'h' specifier, suggesting "short" */ _PR_size_l, /* The 'l' specifier, suggesting "long" */ _PR_size_L, /* The 'L' specifier, meaning a 'long double' */ _PR_size_ll /* The 'll' specifier, suggesting "long long" */ } _PRSizeSpec; /* * The collection of data that is passed between the scan function * and its subordinate functions. The fields of this structure * serve as the input or output arguments for these functions. */ typedef struct { _PRGetCharFN get; /* get a character from input stream */ _PRUngetCharFN unget; /* unget (push back) a character */ void *stream; /* argument for get and unget */ va_list ap; /* the variable argument list */ int nChar; /* number of characters read from 'stream' */ PRBool assign; /* assign, or suppress assignment? */ int width; /* field width */ _PRSizeSpec sizeSpec; /* 'h', 'l', 'L', or 'll' */ PRBool converted; /* is the value actually converted? */ } ScanfState; #define GET(state) ((state)->nChar++, (state)->get((state)->stream)) #define UNGET(state, ch) \ ((state)->nChar--, (state)->unget((state)->stream, ch)) /* * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH, * are always used together. * * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return * value to 'ch' only if we have not exceeded the field width of * 'state'. Therefore, after GET_IF_WITHIN_WIDTH, the value of * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true. */ #define GET_IF_WITHIN_WIDTH(state, ch) \ if (--(state)->width >= 0) { \ (ch) = GET(state); \ } #define WITHIN_WIDTH(state) ((state)->width >= 0) /* * _pr_strtoull: * Convert a string to an unsigned 64-bit integer. The string * 'str' is assumed to be a representation of the integer in * base 'base'. * * Warning: * - Only handle base 8, 10, and 16. * - No overflow checking. */ static PRUint64 _pr_strtoull(const char *str, char **endptr, int base) { static const int BASE_MAX = 16; static const char digits[] = "0123456789abcdef"; char *digitPtr; PRUint64 x; /* return value */ PRInt64 base64; const char *cPtr; PRBool negative; const char *digitStart; PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16); if (base < 0 || base == 1 || base > BASE_MAX) { if (endptr) { *endptr = (char *) str; return LL_ZERO; } } cPtr = str; while (isspace(*cPtr)) { ++cPtr; } negative = PR_FALSE; if (*cPtr == '-') { negative = PR_TRUE; cPtr++; } else if (*cPtr == '+') { cPtr++; } if (base == 16) { if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) { cPtr += 2; } } else if (base == 0) { if (*cPtr != '0') { base = 10; } else if (cPtr[1] == 'x' || cPtr[1] == 'X') { base = 16; cPtr += 2; } else { base = 8; } } PR_ASSERT(base != 0); LL_I2L(base64, base); digitStart = cPtr; /* Skip leading zeros */ while (*cPtr == '0') { cPtr++; } LL_I2L(x, 0); while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) { PRUint64 d; LL_I2L(d, (digitPtr - digits)); LL_MUL(x, x, base64); LL_ADD(x, x, d); cPtr++; } if (cPtr == digitStart) { if (endptr) { *endptr = (char *) str; } return LL_ZERO; } if (negative) { #ifdef HAVE_LONG_LONG /* The cast to a signed type is to avoid a compiler warning */ x = -(PRInt64)x; #else LL_NEG(x, x); #endif } if (endptr) { *endptr = (char *) cPtr; } return x; } /* * The maximum field width (in number of characters) that is enough * (may be more than necessary) to represent a 64-bit integer or * floating point number. */ #define FMAX 31 #define DECIMAL_POINT '.' static PRStatus GetInt(ScanfState *state, int code) { char buf[FMAX + 1], *p; int ch = 0; static const char digits[] = "0123456789abcdefABCDEF"; PRBool seenDigit = PR_FALSE; int base; int dlen; switch (code) { case 'd': case 'u': base = 10; break; case 'i': base = 0; break; case 'x': case 'X': case 'p': base = 16; break; case 'o': base = 8; break; default: return PR_FAILURE; } if (state->width == 0 || state->width > FMAX) { state->width = FMAX; } p = buf; GET_IF_WITHIN_WIDTH(state, ch); if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); } if (WITHIN_WIDTH(state) && ch == '0') { seenDigit = PR_TRUE; *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); if (WITHIN_WIDTH(state) && (ch == 'x' || ch == 'X') && (base == 0 || base == 16)) { base = 16; *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); } else if (base == 0) { base = 8; } } if (base == 0 || base == 10) { dlen = 10; } else if (base == 8) { dlen = 8; } else { PR_ASSERT(base == 16); dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */ } while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) { *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); seenDigit = PR_TRUE; } if (WITHIN_WIDTH(state)) { UNGET(state, ch); } if (!seenDigit) { return PR_FAILURE; } *p = '\0'; if (state->assign) { if (code == 'd' || code == 'i') { if (state->sizeSpec == _PR_size_ll) { PRInt64 llval = _pr_strtoull(buf, NULL, base); *va_arg(state->ap, PRInt64 *) = llval; } else { long lval = strtol(buf, NULL, base); if (state->sizeSpec == _PR_size_none) { *va_arg(state->ap, PRIntn *) = lval; } else if (state->sizeSpec == _PR_size_h) { *va_arg(state->ap, PRInt16 *) = (PRInt16)lval; } else if (state->sizeSpec == _PR_size_l) { *va_arg(state->ap, PRInt32 *) = lval; } else { return PR_FAILURE; } } } else { if (state->sizeSpec == _PR_size_ll) { PRUint64 llval = _pr_strtoull(buf, NULL, base); *va_arg(state->ap, PRUint64 *) = llval; } else { unsigned long lval = strtoul(buf, NULL, base); if (state->sizeSpec == _PR_size_none) { *va_arg(state->ap, PRUintn *) = lval; } else if (state->sizeSpec == _PR_size_h) { *va_arg(state->ap, PRUint16 *) = (PRUint16)lval; } else if (state->sizeSpec == _PR_size_l) { *va_arg(state->ap, PRUint32 *) = lval; } else { return PR_FAILURE; } } } state->converted = PR_TRUE; } return PR_SUCCESS; } static PRStatus GetFloat(ScanfState *state) { char buf[FMAX + 1], *p; int ch = 0; PRBool seenDigit = PR_FALSE; if (state->width == 0 || state->width > FMAX) { state->width = FMAX; } p = buf; GET_IF_WITHIN_WIDTH(state, ch); if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); } while (WITHIN_WIDTH(state) && isdigit(ch)) { *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); seenDigit = PR_TRUE; } if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) { *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); while (WITHIN_WIDTH(state) && isdigit(ch)) { *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); seenDigit = PR_TRUE; } } /* * This is not robust. For example, "1.2e+" would confuse * the code below to read 'e' and '+', only to realize that * it should have stopped at "1.2". But we can't push back * more than one character, so there is nothing I can do. */ /* Parse exponent */ if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) { *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); } while (WITHIN_WIDTH(state) && isdigit(ch)) { *p++ = ch; GET_IF_WITHIN_WIDTH(state, ch); } } if (WITHIN_WIDTH(state)) { UNGET(state, ch); } if (!seenDigit) { return PR_FAILURE; } *p = '\0'; if (state->assign) { PRFloat64 dval = PR_strtod(buf, NULL); state->converted = PR_TRUE; if (state->sizeSpec == _PR_size_l) { *va_arg(state->ap, PRFloat64 *) = dval; } else if (state->sizeSpec == _PR_size_L) { *va_arg(state->ap, long double *) = dval; } else { *va_arg(state->ap, float *) = (float) dval; } } return PR_SUCCESS; } /* * Convert, and return the end of the conversion spec. * Return NULL on error. */ static const char * Convert(ScanfState *state, const char *fmt) { const char *cPtr; int ch; char *cArg = NULL; state->converted = PR_FALSE; cPtr = fmt; if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') { do { ch = GET(state); } while (isspace(ch)); UNGET(state, ch); } switch (*cPtr) { case 'c': if (state->assign) { cArg = va_arg(state->ap, char *); } if (state->width == 0) { state->width = 1; } for (; state->width > 0; state->width--) { ch = GET(state); if (ch == EOF) { return NULL; } if (state->assign) { *cArg++ = ch; } } if (state->assign) { state->converted = PR_TRUE; } break; case 'p': case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': if (GetInt(state, *cPtr) == PR_FAILURE) { return NULL; } break; case 'e': case 'E': case 'f': case 'g': case 'G': if (GetFloat(state) == PR_FAILURE) { return NULL; } break; case 'n': /* do not consume any input */ if (state->assign) { switch (state->sizeSpec) { case _PR_size_none: *va_arg(state->ap, PRIntn *) = state->nChar; break; case _PR_size_h: *va_arg(state->ap, PRInt16 *) = state->nChar; break; case _PR_size_l: *va_arg(state->ap, PRInt32 *) = state->nChar; break; case _PR_size_ll: LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar); break; default: PR_ASSERT(0); } } break; case 's': if (state->width == 0) { state->width = INT_MAX; } if (state->assign) { cArg = va_arg(state->ap, char *); } for (; state->width > 0; state->width--) { ch = GET(state); if ((ch == EOF) || isspace(ch)) { UNGET(state, ch); break; } if (state->assign) { *cArg++ = ch; } } if (state->assign) { *cArg = '\0'; state->converted = PR_TRUE; } break; case '%': ch = GET(state); if (ch != '%') { UNGET(state, ch); return NULL; } break; case '[': { PRBool complement = PR_FALSE; const char *closeBracket; size_t n; if (*++cPtr == '^') { complement = PR_TRUE; cPtr++; } closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']'); if (closeBracket == NULL) { return NULL; } n = closeBracket - cPtr; if (state->width == 0) { state->width = INT_MAX; } if (state->assign) { cArg = va_arg(state->ap, char *); } for (; state->width > 0; state->width--) { ch = GET(state); if ((ch == EOF) || (!complement && !memchr(cPtr, ch, n)) || (complement && memchr(cPtr, ch, n))) { UNGET(state, ch); break; } if (state->assign) { *cArg++ = ch; } } if (state->assign) { *cArg = '\0'; state->converted = PR_TRUE; } cPtr = closeBracket; } break; default: return NULL; } return cPtr; } static PRInt32 DoScanf(ScanfState *state, const char *fmt) { PRInt32 nConverted = 0; const char *cPtr; int ch; state->nChar = 0; cPtr = fmt; while (1) { if (isspace(*cPtr)) { /* white space: skip */ do { cPtr++; } while (isspace(*cPtr)); do { ch = GET(state); } while (isspace(ch)); UNGET(state, ch); } else if (*cPtr == '%') { /* format spec: convert */ cPtr++; state->assign = PR_TRUE; if (*cPtr == '*') { cPtr++; state->assign = PR_FALSE; } for (state->width = 0; isdigit(*cPtr); cPtr++) { state->width = state->width * 10 + *cPtr - '0'; } state->sizeSpec = _PR_size_none; if (*cPtr == 'h') { cPtr++; state->sizeSpec = _PR_size_h; } else if (*cPtr == 'l') { cPtr++; if (*cPtr == 'l') { cPtr++; state->sizeSpec = _PR_size_ll; } else { state->sizeSpec = _PR_size_l; } } else if (*cPtr == 'L') { cPtr++; state->sizeSpec = _PR_size_L; } cPtr = Convert(state, cPtr); if (cPtr == NULL) { return (nConverted > 0 ? nConverted : EOF); } if (state->converted) { nConverted++; } cPtr++; } else { /* others: must match */ if (*cPtr == '\0') { return nConverted; } ch = GET(state); if (ch != *cPtr) { UNGET(state, ch); return nConverted; } cPtr++; } } } static int StringGetChar(void *stream) { char *cPtr = *((char **) stream); if (*cPtr == '\0') { return EOF; } *((char **) stream) = cPtr + 1; return (unsigned char) *cPtr; } static void StringUngetChar(void *stream, int ch) { char *cPtr = *((char **) stream); if (ch != EOF) { *((char **) stream) = cPtr - 1; } } PR_IMPLEMENT(PRInt32) PR_sscanf(const char *buf, const char *fmt, ...) { PRInt32 rv; ScanfState state; state.get = &StringGetChar; state.unget = &StringUngetChar; state.stream = (void *) &buf; va_start(state.ap, fmt); rv = DoScanf(&state, fmt); va_end(state.ap); return rv; }