andre@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ andre@0: /* This Source Code Form is subject to the terms of the Mozilla Public andre@0: * License, v. 2.0. If a copy of the MPL was not distributed with this andre@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ andre@0: andre@0: /* andre@0: * Scan functions for NSPR types andre@0: * andre@0: * Author: Wan-Teh Chang andre@0: * andre@0: * Acknowledgment: The implementation is inspired by the source code andre@0: * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992. andre@0: */ andre@0: andre@0: #include andre@0: #include andre@0: #include andre@0: #include andre@0: #include "prprf.h" andre@0: #include "prdtoa.h" andre@0: #include "prlog.h" andre@0: #include "prerror.h" andre@0: andre@0: /* andre@0: * A function that reads a character from 'stream'. andre@0: * Returns the character read, or EOF if end of stream is reached. andre@0: */ andre@0: typedef int (*_PRGetCharFN)(void *stream); andre@0: andre@0: /* andre@0: * A function that pushes the character 'ch' back to 'stream'. andre@0: */ andre@0: typedef void (*_PRUngetCharFN)(void *stream, int ch); andre@0: andre@0: /* andre@0: * The size specifier for the integer and floating point number andre@0: * conversions in format control strings. andre@0: */ andre@0: typedef enum { andre@0: _PR_size_none, /* No size specifier is given */ andre@0: _PR_size_h, /* The 'h' specifier, suggesting "short" */ andre@0: _PR_size_l, /* The 'l' specifier, suggesting "long" */ andre@0: _PR_size_L, /* The 'L' specifier, meaning a 'long double' */ andre@0: _PR_size_ll /* The 'll' specifier, suggesting "long long" */ andre@0: } _PRSizeSpec; andre@0: andre@0: /* andre@0: * The collection of data that is passed between the scan function andre@0: * and its subordinate functions. The fields of this structure andre@0: * serve as the input or output arguments for these functions. andre@0: */ andre@0: typedef struct { andre@0: _PRGetCharFN get; /* get a character from input stream */ andre@0: _PRUngetCharFN unget; /* unget (push back) a character */ andre@0: void *stream; /* argument for get and unget */ andre@0: va_list ap; /* the variable argument list */ andre@0: int nChar; /* number of characters read from 'stream' */ andre@0: andre@0: PRBool assign; /* assign, or suppress assignment? */ andre@0: int width; /* field width */ andre@0: _PRSizeSpec sizeSpec; /* 'h', 'l', 'L', or 'll' */ andre@0: andre@0: PRBool converted; /* is the value actually converted? */ andre@0: } ScanfState; andre@0: andre@0: #define GET(state) ((state)->nChar++, (state)->get((state)->stream)) andre@0: #define UNGET(state, ch) \ andre@0: ((state)->nChar--, (state)->unget((state)->stream, ch)) andre@0: andre@0: /* andre@0: * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH, andre@0: * are always used together. andre@0: * andre@0: * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return andre@0: * value to 'ch' only if we have not exceeded the field width of andre@0: * 'state'. Therefore, after GET_IF_WITHIN_WIDTH, the value of andre@0: * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true. andre@0: */ andre@0: andre@0: #define GET_IF_WITHIN_WIDTH(state, ch) \ andre@0: if (--(state)->width >= 0) { \ andre@0: (ch) = GET(state); \ andre@0: } andre@0: #define WITHIN_WIDTH(state) ((state)->width >= 0) andre@0: andre@0: /* andre@0: * _pr_strtoull: andre@0: * Convert a string to an unsigned 64-bit integer. The string andre@0: * 'str' is assumed to be a representation of the integer in andre@0: * base 'base'. andre@0: * andre@0: * Warning: andre@0: * - Only handle base 8, 10, and 16. andre@0: * - No overflow checking. andre@0: */ andre@0: andre@0: static PRUint64 andre@0: _pr_strtoull(const char *str, char **endptr, int base) andre@0: { andre@0: static const int BASE_MAX = 16; andre@0: static const char digits[] = "0123456789abcdef"; andre@0: char *digitPtr; andre@0: PRUint64 x; /* return value */ andre@0: PRInt64 base64; andre@0: const char *cPtr; andre@0: PRBool negative; andre@0: const char *digitStart; andre@0: andre@0: PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16); andre@0: if (base < 0 || base == 1 || base > BASE_MAX) { andre@0: if (endptr) { andre@0: *endptr = (char *) str; andre@0: return LL_ZERO; andre@0: } andre@0: } andre@0: andre@0: cPtr = str; andre@0: while (isspace(*cPtr)) { andre@0: ++cPtr; andre@0: } andre@0: andre@0: negative = PR_FALSE; andre@0: if (*cPtr == '-') { andre@0: negative = PR_TRUE; andre@0: cPtr++; andre@0: } else if (*cPtr == '+') { andre@0: cPtr++; andre@0: } andre@0: andre@0: if (base == 16) { andre@0: if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) { andre@0: cPtr += 2; andre@0: } andre@0: } else if (base == 0) { andre@0: if (*cPtr != '0') { andre@0: base = 10; andre@0: } else if (cPtr[1] == 'x' || cPtr[1] == 'X') { andre@0: base = 16; andre@0: cPtr += 2; andre@0: } else { andre@0: base = 8; andre@0: } andre@0: } andre@0: PR_ASSERT(base != 0); andre@0: LL_I2L(base64, base); andre@0: digitStart = cPtr; andre@0: andre@0: /* Skip leading zeros */ andre@0: while (*cPtr == '0') { andre@0: cPtr++; andre@0: } andre@0: andre@0: LL_I2L(x, 0); andre@0: while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) { andre@0: PRUint64 d; andre@0: andre@0: LL_I2L(d, (digitPtr - digits)); andre@0: LL_MUL(x, x, base64); andre@0: LL_ADD(x, x, d); andre@0: cPtr++; andre@0: } andre@0: andre@0: if (cPtr == digitStart) { andre@0: if (endptr) { andre@0: *endptr = (char *) str; andre@0: } andre@0: return LL_ZERO; andre@0: } andre@0: andre@0: if (negative) { andre@0: #ifdef HAVE_LONG_LONG andre@0: /* The cast to a signed type is to avoid a compiler warning */ andre@0: x = -(PRInt64)x; andre@0: #else andre@0: LL_NEG(x, x); andre@0: #endif andre@0: } andre@0: andre@0: if (endptr) { andre@0: *endptr = (char *) cPtr; andre@0: } andre@0: return x; andre@0: } andre@0: andre@0: /* andre@0: * The maximum field width (in number of characters) that is enough andre@0: * (may be more than necessary) to represent a 64-bit integer or andre@0: * floating point number. andre@0: */ andre@0: #define FMAX 31 andre@0: #define DECIMAL_POINT '.' andre@0: andre@0: static PRStatus andre@0: GetInt(ScanfState *state, int code) andre@0: { andre@0: char buf[FMAX + 1], *p; andre@0: int ch; andre@0: static const char digits[] = "0123456789abcdefABCDEF"; andre@0: PRBool seenDigit = PR_FALSE; andre@0: int base; andre@0: int dlen; andre@0: andre@0: switch (code) { andre@0: case 'd': case 'u': andre@0: base = 10; andre@0: break; andre@0: case 'i': andre@0: base = 0; andre@0: break; andre@0: case 'x': case 'X': case 'p': andre@0: base = 16; andre@0: break; andre@0: case 'o': andre@0: base = 8; andre@0: break; andre@0: default: andre@0: return PR_FAILURE; andre@0: } andre@0: if (state->width == 0 || state->width > FMAX) { andre@0: state->width = FMAX; andre@0: } andre@0: p = buf; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: } andre@0: if (WITHIN_WIDTH(state) && ch == '0') { andre@0: seenDigit = PR_TRUE; andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: if (WITHIN_WIDTH(state) andre@0: && (ch == 'x' || ch == 'X') andre@0: && (base == 0 || base == 16)) { andre@0: base = 16; andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: } else if (base == 0) { andre@0: base = 8; andre@0: } andre@0: } andre@0: if (base == 0 || base == 10) { andre@0: dlen = 10; andre@0: } else if (base == 8) { andre@0: dlen = 8; andre@0: } else { andre@0: PR_ASSERT(base == 16); andre@0: dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */ andre@0: } andre@0: while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) { andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: seenDigit = PR_TRUE; andre@0: } andre@0: if (WITHIN_WIDTH(state)) { andre@0: UNGET(state, ch); andre@0: } andre@0: if (!seenDigit) { andre@0: return PR_FAILURE; andre@0: } andre@0: *p = '\0'; andre@0: if (state->assign) { andre@0: if (code == 'd' || code == 'i') { andre@0: if (state->sizeSpec == _PR_size_ll) { andre@0: PRInt64 llval = _pr_strtoull(buf, NULL, base); andre@0: *va_arg(state->ap, PRInt64 *) = llval; andre@0: } else { andre@0: long lval = strtol(buf, NULL, base); andre@0: andre@0: if (state->sizeSpec == _PR_size_none) { andre@0: *va_arg(state->ap, PRIntn *) = lval; andre@0: } else if (state->sizeSpec == _PR_size_h) { andre@0: *va_arg(state->ap, PRInt16 *) = (PRInt16)lval; andre@0: } else if (state->sizeSpec == _PR_size_l) { andre@0: *va_arg(state->ap, PRInt32 *) = lval; andre@0: } else { andre@0: return PR_FAILURE; andre@0: } andre@0: } andre@0: } else { andre@0: if (state->sizeSpec == _PR_size_ll) { andre@0: PRUint64 llval = _pr_strtoull(buf, NULL, base); andre@0: *va_arg(state->ap, PRUint64 *) = llval; andre@0: } else { andre@0: unsigned long lval = strtoul(buf, NULL, base); andre@0: andre@0: if (state->sizeSpec == _PR_size_none) { andre@0: *va_arg(state->ap, PRUintn *) = lval; andre@0: } else if (state->sizeSpec == _PR_size_h) { andre@0: *va_arg(state->ap, PRUint16 *) = (PRUint16)lval; andre@0: } else if (state->sizeSpec == _PR_size_l) { andre@0: *va_arg(state->ap, PRUint32 *) = lval; andre@0: } else { andre@0: return PR_FAILURE; andre@0: } andre@0: } andre@0: } andre@0: state->converted = PR_TRUE; andre@0: } andre@0: return PR_SUCCESS; andre@0: } andre@0: andre@0: static PRStatus andre@0: GetFloat(ScanfState *state) andre@0: { andre@0: char buf[FMAX + 1], *p; andre@0: int ch; andre@0: PRBool seenDigit = PR_FALSE; andre@0: andre@0: if (state->width == 0 || state->width > FMAX) { andre@0: state->width = FMAX; andre@0: } andre@0: p = buf; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: } andre@0: while (WITHIN_WIDTH(state) && isdigit(ch)) { andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: seenDigit = PR_TRUE; andre@0: } andre@0: if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) { andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: while (WITHIN_WIDTH(state) && isdigit(ch)) { andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: seenDigit = PR_TRUE; andre@0: } andre@0: } andre@0: andre@0: /* andre@0: * This is not robust. For example, "1.2e+" would confuse andre@0: * the code below to read 'e' and '+', only to realize that andre@0: * it should have stopped at "1.2". But we can't push back andre@0: * more than one character, so there is nothing I can do. andre@0: */ andre@0: andre@0: /* Parse exponent */ andre@0: if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) { andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: } andre@0: while (WITHIN_WIDTH(state) && isdigit(ch)) { andre@0: *p++ = ch; andre@0: GET_IF_WITHIN_WIDTH(state, ch); andre@0: } andre@0: } andre@0: if (WITHIN_WIDTH(state)) { andre@0: UNGET(state, ch); andre@0: } andre@0: if (!seenDigit) { andre@0: return PR_FAILURE; andre@0: } andre@0: *p = '\0'; andre@0: if (state->assign) { andre@0: PRFloat64 dval = PR_strtod(buf, NULL); andre@0: andre@0: state->converted = PR_TRUE; andre@0: if (state->sizeSpec == _PR_size_l) { andre@0: *va_arg(state->ap, PRFloat64 *) = dval; andre@0: } else if (state->sizeSpec == _PR_size_L) { andre@0: #if defined(OSF1) || defined(IRIX) andre@0: *va_arg(state->ap, double *) = dval; andre@0: #else andre@0: *va_arg(state->ap, long double *) = dval; andre@0: #endif andre@0: } else { andre@0: *va_arg(state->ap, float *) = (float) dval; andre@0: } andre@0: } andre@0: return PR_SUCCESS; andre@0: } andre@0: andre@0: /* andre@0: * Convert, and return the end of the conversion spec. andre@0: * Return NULL on error. andre@0: */ andre@0: andre@0: static const char * andre@0: Convert(ScanfState *state, const char *fmt) andre@0: { andre@0: const char *cPtr; andre@0: int ch; andre@0: char *cArg = NULL; andre@0: andre@0: state->converted = PR_FALSE; andre@0: cPtr = fmt; andre@0: if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') { andre@0: do { andre@0: ch = GET(state); andre@0: } while (isspace(ch)); andre@0: UNGET(state, ch); andre@0: } andre@0: switch (*cPtr) { andre@0: case 'c': andre@0: if (state->assign) { andre@0: cArg = va_arg(state->ap, char *); andre@0: } andre@0: if (state->width == 0) { andre@0: state->width = 1; andre@0: } andre@0: for (; state->width > 0; state->width--) { andre@0: ch = GET(state); andre@0: if (ch == EOF) { andre@0: return NULL; andre@0: } else if (state->assign) { andre@0: *cArg++ = ch; andre@0: } andre@0: } andre@0: if (state->assign) { andre@0: state->converted = PR_TRUE; andre@0: } andre@0: break; andre@0: case 'p': andre@0: case 'd': case 'i': case 'o': andre@0: case 'u': case 'x': case 'X': andre@0: if (GetInt(state, *cPtr) == PR_FAILURE) { andre@0: return NULL; andre@0: } andre@0: break; andre@0: case 'e': case 'E': case 'f': andre@0: case 'g': case 'G': andre@0: if (GetFloat(state) == PR_FAILURE) { andre@0: return NULL; andre@0: } andre@0: break; andre@0: case 'n': andre@0: /* do not consume any input */ andre@0: if (state->assign) { andre@0: switch (state->sizeSpec) { andre@0: case _PR_size_none: andre@0: *va_arg(state->ap, PRIntn *) = state->nChar; andre@0: break; andre@0: case _PR_size_h: andre@0: *va_arg(state->ap, PRInt16 *) = state->nChar; andre@0: break; andre@0: case _PR_size_l: andre@0: *va_arg(state->ap, PRInt32 *) = state->nChar; andre@0: break; andre@0: case _PR_size_ll: andre@0: LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar); andre@0: break; andre@0: default: andre@0: PR_ASSERT(0); andre@0: } andre@0: } andre@0: break; andre@0: case 's': andre@0: if (state->width == 0) { andre@0: state->width = INT_MAX; andre@0: } andre@0: if (state->assign) { andre@0: cArg = va_arg(state->ap, char *); andre@0: } andre@0: for (; state->width > 0; state->width--) { andre@0: ch = GET(state); andre@0: if ((ch == EOF) || isspace(ch)) { andre@0: UNGET(state, ch); andre@0: break; andre@0: } andre@0: if (state->assign) { andre@0: *cArg++ = ch; andre@0: } andre@0: } andre@0: if (state->assign) { andre@0: *cArg = '\0'; andre@0: state->converted = PR_TRUE; andre@0: } andre@0: break; andre@0: case '%': andre@0: ch = GET(state); andre@0: if (ch != '%') { andre@0: UNGET(state, ch); andre@0: return NULL; andre@0: } andre@0: break; andre@0: case '[': andre@0: { andre@0: PRBool complement = PR_FALSE; andre@0: const char *closeBracket; andre@0: size_t n; andre@0: andre@0: if (*++cPtr == '^') { andre@0: complement = PR_TRUE; andre@0: cPtr++; andre@0: } andre@0: closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']'); andre@0: if (closeBracket == NULL) { andre@0: return NULL; andre@0: } andre@0: n = closeBracket - cPtr; andre@0: if (state->width == 0) { andre@0: state->width = INT_MAX; andre@0: } andre@0: if (state->assign) { andre@0: cArg = va_arg(state->ap, char *); andre@0: } andre@0: for (; state->width > 0; state->width--) { andre@0: ch = GET(state); andre@0: if ((ch == EOF) andre@0: || (!complement && !memchr(cPtr, ch, n)) andre@0: || (complement && memchr(cPtr, ch, n))) { andre@0: UNGET(state, ch); andre@0: break; andre@0: } andre@0: if (state->assign) { andre@0: *cArg++ = ch; andre@0: } andre@0: } andre@0: if (state->assign) { andre@0: *cArg = '\0'; andre@0: state->converted = PR_TRUE; andre@0: } andre@0: cPtr = closeBracket; andre@0: } andre@0: break; andre@0: default: andre@0: return NULL; andre@0: } andre@0: return cPtr; andre@0: } andre@0: andre@0: static PRInt32 andre@0: DoScanf(ScanfState *state, const char *fmt) andre@0: { andre@0: PRInt32 nConverted = 0; andre@0: const char *cPtr; andre@0: int ch; andre@0: andre@0: state->nChar = 0; andre@0: cPtr = fmt; andre@0: while (1) { andre@0: if (isspace(*cPtr)) { andre@0: /* white space: skip */ andre@0: do { andre@0: cPtr++; andre@0: } while (isspace(*cPtr)); andre@0: do { andre@0: ch = GET(state); andre@0: } while (isspace(ch)); andre@0: UNGET(state, ch); andre@0: } else if (*cPtr == '%') { andre@0: /* format spec: convert */ andre@0: cPtr++; andre@0: state->assign = PR_TRUE; andre@0: if (*cPtr == '*') { andre@0: cPtr++; andre@0: state->assign = PR_FALSE; andre@0: } andre@0: for (state->width = 0; isdigit(*cPtr); cPtr++) { andre@0: state->width = state->width * 10 + *cPtr - '0'; andre@0: } andre@0: state->sizeSpec = _PR_size_none; andre@0: if (*cPtr == 'h') { andre@0: cPtr++; andre@0: state->sizeSpec = _PR_size_h; andre@0: } else if (*cPtr == 'l') { andre@0: cPtr++; andre@0: if (*cPtr == 'l') { andre@0: cPtr++; andre@0: state->sizeSpec = _PR_size_ll; andre@0: } else { andre@0: state->sizeSpec = _PR_size_l; andre@0: } andre@0: } else if (*cPtr == 'L') { andre@0: cPtr++; andre@0: state->sizeSpec = _PR_size_L; andre@0: } andre@0: cPtr = Convert(state, cPtr); andre@0: if (cPtr == NULL) { andre@0: return (nConverted > 0 ? nConverted : EOF); andre@0: } andre@0: if (state->converted) { andre@0: nConverted++; andre@0: } andre@0: cPtr++; andre@0: } else { andre@0: /* others: must match */ andre@0: if (*cPtr == '\0') { andre@0: return nConverted; andre@0: } andre@0: ch = GET(state); andre@0: if (ch != *cPtr) { andre@0: UNGET(state, ch); andre@0: return nConverted; andre@0: } andre@0: cPtr++; andre@0: } andre@0: } andre@0: } andre@0: andre@0: static int andre@0: StringGetChar(void *stream) andre@0: { andre@0: char *cPtr = *((char **) stream); andre@0: andre@0: if (*cPtr == '\0') { andre@0: return EOF; andre@0: } else { andre@0: *((char **) stream) = cPtr + 1; andre@0: return (unsigned char) *cPtr; andre@0: } andre@0: } andre@0: andre@0: static void andre@0: StringUngetChar(void *stream, int ch) andre@0: { andre@0: char *cPtr = *((char **) stream); andre@0: andre@0: if (ch != EOF) { andre@0: *((char **) stream) = cPtr - 1; andre@0: } andre@0: } andre@0: andre@0: PR_IMPLEMENT(PRInt32) andre@0: PR_sscanf(const char *buf, const char *fmt, ...) andre@0: { andre@0: PRInt32 rv; andre@0: ScanfState state; andre@0: andre@0: state.get = &StringGetChar; andre@0: state.unget = &StringUngetChar; andre@0: state.stream = (void *) &buf; andre@0: va_start(state.ap, fmt); andre@0: rv = DoScanf(&state, fmt); andre@0: va_end(state.ap); andre@0: return rv; andre@0: }