#include <stdio.h>
#include <limits.h>
#include <ctype.h>
#include "php.h"
#include "php_variables.h"
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
#include "zend_execute.h"
#include "zend_operators.h"
#include "zend_strtod.h"
#include "php_globals.h"
#include "basic_functions.h"
#include "scanf.h"
#define SCAN_NOSKIP 0x1
#define SCAN_SUPPRESS 0x2
#define SCAN_UNSIGNED 0x4
#define SCAN_WIDTH 0x8
#define SCAN_SIGNOK 0x10
#define SCAN_NODIGITS 0x20
#define SCAN_NOZERO 0x40
#define SCAN_XOK 0x80
#define SCAN_PTOK 0x100
#define SCAN_EXPOK 0x200
#define UCHAR(x) (zend_uchar)(x)
typedef struct CharSet {
int exclude;
int nchars;
char *chars;
int nranges;
struct Range {
char start;
char end;
} *ranges;
} CharSet;
static char *BuildCharSet(CharSet *cset, char *format);
static int CharInSet(CharSet *cset, int ch);
static void ReleaseCharSet(CharSet *cset);
static inline void scan_set_error_return(int numVars, pval **return_value);
static char * BuildCharSet(CharSet *cset, char *format)
{
char *ch, start;
int nranges;
char *end;
memset(cset, 0, sizeof(CharSet));
ch = format;
if (*ch == '^') {
cset->exclude = 1;
ch = ++format;
}
end = format + 1;
if (*ch == ']') {
ch = end++;
}
nranges = 0;
while (*ch != ']') {
if (*ch == '-') {
nranges++;
}
ch = end++;
}
cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
if (nranges > 0) {
cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
} else {
cset->ranges = NULL;
}
cset->nchars = cset->nranges = 0;
ch = format++;
start = *ch;
if (*ch == ']' || *ch == '-') {
cset->chars[cset->nchars++] = *ch;
ch = format++;
}
while (*ch != ']') {
if (*format == '-') {
start = *ch;
} else if (*ch == '-') {
if (*format == ']') {
cset->chars[cset->nchars++] = start;
cset->chars[cset->nchars++] = *ch;
} else {
ch = format++;
if (start < *ch) {
cset->ranges[cset->nranges].start = start;
cset->ranges[cset->nranges].end = *ch;
} else {
cset->ranges[cset->nranges].start = *ch;
cset->ranges[cset->nranges].end = start;
}
cset->nranges++;
}
} else {
cset->chars[cset->nchars++] = *ch;
}
ch = format++;
}
return format;
}
static int CharInSet(CharSet *cset, int c)
{
char ch = (char) c;
int i, match = 0;
for (i = 0; i < cset->nchars; i++) {
if (cset->chars[i] == ch) {
match = 1;
break;
}
}
if (!match) {
for (i = 0; i < cset->nranges; i++) {
if ((cset->ranges[i].start <= ch)
&& (ch <= cset->ranges[i].end)) {
match = 1;
break;
}
}
}
return (cset->exclude ? !match : match);
}
static void ReleaseCharSet(CharSet *cset)
{
efree((char *)cset->chars);
if (cset->ranges) {
efree((char *)cset->ranges);
}
}
PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
{
#define STATIC_LIST_SIZE 16
int gotXpg, gotSequential, value, i, flags;
char *end, *ch = NULL;
int staticAssign[STATIC_LIST_SIZE];
int *nassign = staticAssign;
int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
TSRMLS_FETCH();
if (numVars > nspace) {
nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
nspace = numVars;
}
for (i = 0; i < nspace; i++) {
nassign[i] = 0;
}
xpgSize = objIndex = gotXpg = gotSequential = 0;
while (*format != '\0') {
ch = format++;
flags = 0;
if (*ch != '%') {
continue;
}
ch = format++;
if (*ch == '%') {
continue;
}
if (*ch == '*') {
flags |= SCAN_SUPPRESS;
ch = format++;
goto xpgCheckDone;
}
if ( isdigit( (int)*ch ) ) {
value = strtoul(format-1, &end, 10);
if (*end != '$') {
goto notXpg;
}
format = end+1;
ch = format++;
gotXpg = 1;
if (gotSequential) {
goto mixedXPG;
}
objIndex = value - 1;
if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
goto badIndex;
} else if (numVars == 0) {
if (value > SCAN_MAX_ARGS) {
goto badIndex;
}
xpgSize = (xpgSize > value) ? xpgSize : value;
}
goto xpgCheckDone;
}
notXpg:
gotSequential = 1;
if (gotXpg) {
mixedXPG:
php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
goto error;
}
xpgCheckDone:
if (isdigit(UCHAR(*ch))) {
value = strtoul(format-1, &format, 10);
flags |= SCAN_WIDTH;
ch = format++;
}
if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
ch = format++;
}
if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
goto badIndex;
}
switch (*ch) {
case 'n':
case 'd':
case 'D':
case 'i':
case 'o':
case 'x':
case 'X':
case 'u':
case 'f':
case 'e':
case 'E':
case 'g':
case 's':
break;
case 'c':
break;
case '[':
if (*format == '\0') {
goto badSet;
}
ch = format++;
if (*ch == '^') {
if (*format == '\0') {
goto badSet;
}
ch = format++;
}
if (*ch == ']') {
if (*format == '\0') {
goto badSet;
}
ch = format++;
}
while (*ch != ']') {
if (*format == '\0') {
goto badSet;
}
ch = format++;
}
break;
badSet:
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
goto error;
default:
{
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
goto error;
}
}
if (!(flags & SCAN_SUPPRESS)) {
if (objIndex >= nspace) {
value = nspace;
if (xpgSize) {
nspace = xpgSize;
} else {
nspace += STATIC_LIST_SIZE;
}
if (nassign == staticAssign) {
nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
for (i = 0; i < STATIC_LIST_SIZE; ++i) {
nassign[i] = staticAssign[i];
}
} else {
nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
}
for (i = value; i < nspace; i++) {
nassign[i] = 0;
}
}
nassign[objIndex]++;
objIndex++;
}
}
if (numVars == 0) {
if (xpgSize) {
numVars = xpgSize;
} else {
numVars = objIndex;
}
}
if (totalSubs) {
*totalSubs = numVars;
}
for (i = 0; i < numVars; i++) {
if (nassign[i] > 1) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
goto error;
} else if (!xpgSize && (nassign[i] == 0)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
goto error;
}
}
if (nassign != staticAssign) {
efree((char *)nassign);
}
return SCAN_SUCCESS;
badIndex:
if (gotXpg) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
} else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
}
error:
if (nassign != staticAssign) {
efree((char *)nassign);
}
return SCAN_ERROR_INVALID_FORMAT;
#undef STATIC_LIST_SIZE
}
PHPAPI int php_sscanf_internal( char *string, char *format,
int argCount, zval ***args,
int varStart, pval **return_value TSRMLS_DC)
{
int numVars, nconversions, totalVars = -1;
int i, value, result;
int objIndex;
char *end, *baseString;
zval **current;
char op = 0;
int base = 0;
int underflow = 0;
size_t width;
long (*fn)() = NULL;
char *ch, sch;
int flags;
char buf[64];
if ((varStart > argCount) || (varStart < 0)){
varStart = SCAN_MAX_ARGS + 1;
}
numVars = argCount - varStart;
if (numVars < 0) {
numVars = 0;
}
#if 0
zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
string, format, numVars, varStart);
#endif
if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
scan_set_error_return( numVars, return_value );
return SCAN_ERROR_INVALID_FORMAT;
}
objIndex = numVars ? varStart : 0;
if (numVars) {
for (i = varStart;i < argCount;i++){
if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
scan_set_error_return(numVars, return_value);
return SCAN_ERROR_VAR_PASSED_BYVAL;
}
}
}
if (!numVars) {
array_init(*return_value);
for (i = 0; i < totalVars; i++) {
if (add_next_index_null(*return_value) == FAILURE) {
scan_set_error_return(0, return_value);
return FAILURE;
}
}
}
baseString = string;
nconversions = 0;
while (*format != '\0') {
ch = format++;
flags = 0;
if ( isspace( (int)*ch ) ) {
sch = *string;
while ( isspace( (int)sch ) ) {
if (*string == '\0') {
goto done;
}
string++;
sch = *string;
}
continue;
}
if (*ch != '%') {
literal:
if (*string == '\0') {
underflow = 1;
goto done;
}
sch = *string;
string++;
if (*ch != sch) {
goto done;
}
continue;
}
ch = format++;
if (*ch == '%') {
goto literal;
}
if (*ch == '*') {
flags |= SCAN_SUPPRESS;
ch = format++;
} else if ( isdigit(UCHAR(*ch))) {
value = strtoul(format-1, &end, 10);
if (*end == '$') {
format = end+1;
ch = format++;
objIndex = varStart + value - 1;
}
}
if ( isdigit(UCHAR(*ch))) {
width = strtoul(format-1, &format, 10);
ch = format++;
} else {
width = 0;
}
if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
ch = format++;
}
switch (*ch) {
case 'n':
if (!(flags & SCAN_SUPPRESS)) {
if (numVars && objIndex >= argCount) {
break;
} else if (numVars) {
current = args[objIndex++];
zval_dtor( *current );
ZVAL_LONG( *current, (long)(string - baseString) );
} else {
add_index_long(*return_value, objIndex++, string - baseString);
}
}
nconversions++;
continue;
case 'd':
case 'D':
op = 'i';
base = 10;
fn = (long (*)())strtol;
break;
case 'i':
op = 'i';
base = 0;
fn = (long (*)())strtol;
break;
case 'o':
op = 'i';
base = 8;
fn = (long (*)())strtol;
break;
case 'x':
case 'X':
op = 'i';
base = 16;
fn = (long (*)())strtol;
break;
case 'u':
op = 'i';
base = 10;
flags |= SCAN_UNSIGNED;
fn = (long (*)())strtoul;
break;
case 'f':
case 'e':
case 'E':
case 'g':
op = 'f';
break;
case 's':
op = 's';
break;
case 'c':
op = 's';
flags |= SCAN_NOSKIP;
if (0 == width) {
width = 1;
}
break;
case '[':
op = '[';
flags |= SCAN_NOSKIP;
break;
}
if (*string == '\0') {
underflow = 1;
goto done;
}
if (!(flags & SCAN_NOSKIP)) {
while (*string != '\0') {
sch = *string;
if (! isspace((int)sch) ) {
break;
}
string++;
}
if (*string == '\0') {
underflow = 1;
goto done;
}
}
switch (op) {
case 'c':
case 's':
if (width == 0) {
width = (size_t) ~0;
}
end = string;
while (*end != '\0') {
sch = *end;
if ( isspace( (int)sch ) ) {
break;
}
end++;
if (--width == 0) {
break;
}
}
if (!(flags & SCAN_SUPPRESS)) {
if (numVars && objIndex >= argCount) {
break;
} else if (numVars) {
current = args[objIndex++];
zval_dtor( *current );
ZVAL_STRINGL( *current, string, end-string, 1);
} else {
add_index_stringl( *return_value, objIndex++, string, end-string, 1);
}
}
string = end;
break;
case '[': {
CharSet cset;
if (width == 0) {
width = (size_t) ~0;
}
end = string;
format = BuildCharSet(&cset, format);
while (*end != '\0') {
sch = *end;
if (!CharInSet(&cset, (int)sch)) {
break;
}
end++;
if (--width == 0) {
break;
}
}
ReleaseCharSet(&cset);
if (string == end) {
goto done;
}
if (!(flags & SCAN_SUPPRESS)) {
if (numVars && objIndex >= argCount) {
break;
} else if (numVars) {
current = args[objIndex++];
zval_dtor( *current );
ZVAL_STRINGL( *current, string, end-string, 1);
} else {
add_index_stringl(*return_value, objIndex++, string, end-string, 1);
}
}
string = end;
break;
}
case 'i':
buf[0] = '\0';
if ((width == 0) || (width > sizeof(buf) - 1)) {
width = sizeof(buf) - 1;
}
flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
for (end = buf; width > 0; width--) {
switch (*string) {
case '0':
if (base == 16) {
flags |= SCAN_XOK;
}
if (base == 0) {
base = 8;
flags |= SCAN_XOK;
}
if (flags & SCAN_NOZERO) {
flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
} else {
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
}
goto addToInt;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
if (base == 0) {
base = 10;
}
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
goto addToInt;
case '8': case '9':
if (base == 0) {
base = 10;
}
if (base <= 8) {
break;
}
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
goto addToInt;
case 'A': case 'B': case 'C':
case 'D': case 'E': case 'F':
case 'a': case 'b': case 'c':
case 'd': case 'e': case 'f':
if (base <= 10) {
break;
}
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
goto addToInt;
case '+': case '-':
if (flags & SCAN_SIGNOK) {
flags &= ~SCAN_SIGNOK;
goto addToInt;
}
break;
case 'x': case 'X':
if ((flags & SCAN_XOK) && (end == buf+1)) {
base = 16;
flags &= ~SCAN_XOK;
goto addToInt;
}
break;
}
break;
addToInt:
*end++ = *string++;
if (*string == '\0') {
break;
}
}
if (flags & SCAN_NODIGITS) {
if (*string == '\0') {
underflow = 1;
}
goto done;
} else if (end[-1] == 'x' || end[-1] == 'X') {
end--;
string--;
}
if (!(flags & SCAN_SUPPRESS)) {
*end = '\0';
value = (int) (*fn)(buf, NULL, base);
if ((flags & SCAN_UNSIGNED) && (value < 0)) {
sprintf(buf, "%u", value);
if (numVars && objIndex >= argCount) {
break;
} else if (numVars) {
current = args[objIndex++];
convert_to_string( *current );
ZVAL_STRING( *current, buf, 1 );
} else {
add_index_string(*return_value, objIndex++, buf, 1);
}
} else {
if (numVars && objIndex >= argCount) {
break;
} else if (numVars) {
current = args[objIndex++];
convert_to_long( *current );
Z_LVAL(**current) = value;
} else {
add_index_long(*return_value, objIndex++, value);
}
}
}
break;
case 'f':
buf[0] = '\0';
if ((width == 0) || (width > sizeof(buf) - 1)) {
width = sizeof(buf) - 1;
}
flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
for (end = buf; width > 0; width--) {
switch (*string) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9':
flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
goto addToFloat;
case '+': case '-':
if (flags & SCAN_SIGNOK) {
flags &= ~SCAN_SIGNOK;
goto addToFloat;
}
break;
case '.':
if (flags & SCAN_PTOK) {
flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
goto addToFloat;
}
break;
case 'e': case 'E':
if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
| SCAN_SIGNOK | SCAN_NODIGITS;
goto addToFloat;
}
break;
}
break;
addToFloat:
*end++ = *string++;
if (*string == '\0') {
break;
}
}
if (flags & SCAN_NODIGITS) {
if (flags & SCAN_EXPOK) {
if (*string == '\0') {
underflow = 1;
}
goto done;
}
end--;
string--;
if (*end != 'e' && *end != 'E') {
end--;
string--;
}
}
if (!(flags & SCAN_SUPPRESS)) {
double dvalue;
*end = '\0';
dvalue = zend_strtod(buf, NULL);
if (numVars && objIndex >= argCount) {
break;
} else if (numVars) {
current = args[objIndex++];
convert_to_double( *current );
Z_DVAL_PP( current ) = dvalue;
} else {
add_index_double( *return_value, objIndex++, dvalue );
}
}
break;
}
nconversions++;
}
done:
result = SCAN_SUCCESS;
if (underflow && (0==nconversions)) {
scan_set_error_return( numVars, return_value );
result = SCAN_ERROR_EOF;
} else if (numVars) {
convert_to_long( *return_value );
Z_LVAL_PP(return_value) = nconversions;
} else if (nconversions < totalVars) {
}
return result;
}
static inline void scan_set_error_return(int numVars, pval **return_value)
{
if (numVars) {
Z_TYPE_PP(return_value) = IS_LONG;
Z_LVAL_PP(return_value) = SCAN_ERROR_EOF;
} else {
convert_to_null( *return_value );
}
}