#include "tclInt.h"
#include "tclPort.h"
#define SCAN_NOSKIP 0x1
#define SCAN_SUPPRESS 0x2
#define SCAN_UNSIGNED 0x4
#define SCAN_WIDTH 0x8
#define SCAN_SIGNOK 0x10
#define SCAN_NODIGITS 0x20
#define SCAN_NOZERO 0x40
#define SCAN_XOK 0x80
#define SCAN_PTOK 0x100
#define SCAN_EXPOK 0x200
#define SCAN_LONGER 0x400
typedef struct CharSet {
int exclude;
int nchars;
Tcl_UniChar *chars;
int nranges;
struct Range {
Tcl_UniChar start;
Tcl_UniChar end;
} *ranges;
} CharSet;
static char * BuildCharSet _ANSI_ARGS_((CharSet *cset, char *format));
static int CharInSet _ANSI_ARGS_((CharSet *cset, int ch));
static void ReleaseCharSet _ANSI_ARGS_((CharSet *cset));
static int ValidateFormat _ANSI_ARGS_((Tcl_Interp *interp, char *format,
int numVars, int *totalVars));
static char *
BuildCharSet(cset, format)
CharSet *cset;
char *format;
{
Tcl_UniChar ch, start;
int offset, nranges;
char *end;
memset(cset, 0, sizeof(CharSet));
offset = Tcl_UtfToUniChar(format, &ch);
if (ch == '^') {
cset->exclude = 1;
format += offset;
offset = Tcl_UtfToUniChar(format, &ch);
}
end = format + offset;
if (ch == ']') {
end += Tcl_UtfToUniChar(end, &ch);
}
nranges = 0;
while (ch != ']') {
if (ch == '-') {
nranges++;
}
end += Tcl_UtfToUniChar(end, &ch);
}
cset->chars = (Tcl_UniChar *) ckalloc(sizeof(Tcl_UniChar)
* (end - format - 1));
if (nranges > 0) {
cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges);
} else {
cset->ranges = NULL;
}
cset->nchars = cset->nranges = 0;
format += Tcl_UtfToUniChar(format, &ch);
start = ch;
if (ch == ']' || ch == '-') {
cset->chars[cset->nchars++] = ch;
format += Tcl_UtfToUniChar(format, &ch);
}
while (ch != ']') {
if (*format == '-') {
start = ch;
} else if (ch == '-') {
if (*format == ']') {
cset->chars[cset->nchars++] = start;
cset->chars[cset->nchars++] = ch;
} else {
format += Tcl_UtfToUniChar(format, &ch);
if (start < ch) {
cset->ranges[cset->nranges].start = start;
cset->ranges[cset->nranges].end = ch;
} else {
cset->ranges[cset->nranges].start = ch;
cset->ranges[cset->nranges].end = start;
}
cset->nranges++;
}
} else {
cset->chars[cset->nchars++] = ch;
}
format += Tcl_UtfToUniChar(format, &ch);
}
return format;
}
static int
CharInSet(cset, c)
CharSet *cset;
int c;
{
Tcl_UniChar ch = (Tcl_UniChar) c;
int i, match = 0;
for (i = 0; i < cset->nchars; i++) {
if (cset->chars[i] == ch) {
match = 1;
break;
}
}
if (!match) {
for (i = 0; i < cset->nranges; i++) {
if ((cset->ranges[i].start <= ch)
&& (ch <= cset->ranges[i].end)) {
match = 1;
break;
}
}
}
return (cset->exclude ? !match : match);
}
static void
ReleaseCharSet(cset)
CharSet *cset;
{
ckfree((char *)cset->chars);
if (cset->ranges) {
ckfree((char *)cset->ranges);
}
}
static int
ValidateFormat(interp, format, numVars, totalSubs)
Tcl_Interp *interp;
char *format;
int numVars;
int *totalSubs;
{
#define STATIC_LIST_SIZE 16
int gotXpg, gotSequential, value, i, flags;
char *end;
Tcl_UniChar ch;
int staticAssign[STATIC_LIST_SIZE];
int *nassign = staticAssign;
int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
char buf[TCL_UTF_MAX+1];
if (numVars > nspace) {
nassign = (int*)ckalloc(sizeof(int) * numVars);
nspace = numVars;
}
for (i = 0; i < nspace; i++) {
nassign[i] = 0;
}
xpgSize = objIndex = gotXpg = gotSequential = 0;
while (*format != '\0') {
format += Tcl_UtfToUniChar(format, &ch);
flags = 0;
if (ch != '%') {
continue;
}
format += Tcl_UtfToUniChar(format, &ch);
if (ch == '%') {
continue;
}
if (ch == '*') {
flags |= SCAN_SUPPRESS;
format += Tcl_UtfToUniChar(format, &ch);
goto xpgCheckDone;
}
if ((ch < 0x80) && isdigit(UCHAR(ch))) {
value = strtoul(format-1, &end, 10);
if (*end != '$') {
goto notXpg;
}
format = end+1;
format += Tcl_UtfToUniChar(format, &ch);
gotXpg = 1;
if (gotSequential) {
goto mixedXPG;
}
objIndex = value - 1;
if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
goto badIndex;
} else if (numVars == 0) {
xpgSize = (xpgSize > value) ? xpgSize : value;
}
goto xpgCheckDone;
}
notXpg:
gotSequential = 1;
if (gotXpg) {
mixedXPG:
Tcl_SetResult(interp,
"cannot mix \"%\" and \"%n$\" conversion specifiers",
TCL_STATIC);
goto error;
}
xpgCheckDone:
if ((ch < 0x80) && isdigit(UCHAR(ch))) {
value = strtoul(format-1, &format, 10);
flags |= SCAN_WIDTH;
format += Tcl_UtfToUniChar(format, &ch);
}
switch (ch) {
case 'l':
case 'L':
#ifndef TCL_WIDE_INT_IS_LONG
flags |= SCAN_LONGER;
#endif
case 'h':
format += Tcl_UtfToUniChar(format, &ch);
}
if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
goto badIndex;
}
switch (ch) {
case 'c':
if (flags & SCAN_WIDTH) {
Tcl_SetResult(interp,
"field width may not be specified in %c conversion",
TCL_STATIC);
goto error;
}
case 'n':
case 's':
if (flags & SCAN_LONGER) {
invalidLonger:
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
"'l' modifier may not be specified in %", buf,
" conversion", NULL);
goto error;
}
case 'd':
case 'e':
case 'f':
case 'g':
case 'i':
case 'o':
case 'u':
case 'x':
break;
case '[':
if (flags & SCAN_LONGER) {
goto invalidLonger;
}
if (*format == '\0') {
goto badSet;
}
format += Tcl_UtfToUniChar(format, &ch);
if (ch == '^') {
if (*format == '\0') {
goto badSet;
}
format += Tcl_UtfToUniChar(format, &ch);
}
if (ch == ']') {
if (*format == '\0') {
goto badSet;
}
format += Tcl_UtfToUniChar(format, &ch);
}
while (ch != ']') {
if (*format == '\0') {
goto badSet;
}
format += Tcl_UtfToUniChar(format, &ch);
}
break;
badSet:
Tcl_SetResult(interp, "unmatched [ in format string",
TCL_STATIC);
goto error;
default:
{
char buf[TCL_UTF_MAX+1];
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
"bad scan conversion character \"", buf, "\"", NULL);
goto error;
}
}
if (!(flags & SCAN_SUPPRESS)) {
if (objIndex >= nspace) {
value = nspace;
if (xpgSize) {
nspace = xpgSize;
} else {
nspace += STATIC_LIST_SIZE;
}
if (nassign == staticAssign) {
nassign = (void *)ckalloc(nspace * sizeof(int));
for (i = 0; i < STATIC_LIST_SIZE; ++i) {
nassign[i] = staticAssign[i];
}
} else {
nassign = (void *)ckrealloc((void *)nassign,
nspace * sizeof(int));
}
for (i = value; i < nspace; i++) {
nassign[i] = 0;
}
}
nassign[objIndex]++;
objIndex++;
}
}
if (numVars == 0) {
if (xpgSize) {
numVars = xpgSize;
} else {
numVars = objIndex;
}
}
if (totalSubs) {
*totalSubs = numVars;
}
for (i = 0; i < numVars; i++) {
if (nassign[i] > 1) {
Tcl_SetResult(interp, "variable is assigned by multiple \"%n$\" conversion specifiers", TCL_STATIC);
goto error;
} else if (!xpgSize && (nassign[i] == 0)) {
Tcl_SetResult(interp, "variable is not assigned by any conversion specifiers", TCL_STATIC);
goto error;
}
}
if (nassign != staticAssign) {
ckfree((char *)nassign);
}
return TCL_OK;
badIndex:
if (gotXpg) {
Tcl_SetResult(interp, "\"%n$\" argument index out of range",
TCL_STATIC);
} else {
Tcl_SetResult(interp,
"different numbers of variable names and field specifiers",
TCL_STATIC);
}
error:
if (nassign != staticAssign) {
ckfree((char *)nassign);
}
return TCL_ERROR;
#undef STATIC_LIST_SIZE
}
int
Tcl_ScanObjCmd(dummy, interp, objc, objv)
ClientData dummy;
Tcl_Interp *interp;
int objc;
Tcl_Obj *CONST objv[];
{
char *format;
int numVars, nconversions, totalVars = -1;
int objIndex, offset, i, result, code;
long value;
char *string, *end, *baseString;
char op = 0;
int base = 0;
int underflow = 0;
size_t width;
long (*fn)() = NULL;
#ifndef TCL_WIDE_INT_IS_LONG
Tcl_WideInt (*lfn)() = NULL;
Tcl_WideInt wideValue;
#endif
Tcl_UniChar ch, sch;
Tcl_Obj **objs = NULL, *objPtr = NULL;
int flags;
char buf[513];
if (objc < 3) {
Tcl_WrongNumArgs(interp, 1, objv,
"string format ?varName varName ...?");
return TCL_ERROR;
}
format = Tcl_GetStringFromObj(objv[2], NULL);
numVars = objc-3;
if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) {
return TCL_ERROR;
}
if (totalVars > 0) {
objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars);
for (i = 0; i < totalVars; i++) {
objs[i] = NULL;
}
}
string = Tcl_GetStringFromObj(objv[1], NULL);
baseString = string;
objIndex = 0;
nconversions = 0;
while (*format != '\0') {
format += Tcl_UtfToUniChar(format, &ch);
flags = 0;
if (Tcl_UniCharIsSpace(ch)) {
offset = Tcl_UtfToUniChar(string, &sch);
while (Tcl_UniCharIsSpace(sch)) {
if (*string == '\0') {
goto done;
}
string += offset;
offset = Tcl_UtfToUniChar(string, &sch);
}
continue;
}
if (ch != '%') {
literal:
if (*string == '\0') {
underflow = 1;
goto done;
}
string += Tcl_UtfToUniChar(string, &sch);
if (ch != sch) {
goto done;
}
continue;
}
format += Tcl_UtfToUniChar(format, &ch);
if (ch == '%') {
goto literal;
}
if (ch == '*') {
flags |= SCAN_SUPPRESS;
format += Tcl_UtfToUniChar(format, &ch);
} else if ((ch < 0x80) && isdigit(UCHAR(ch))) {
value = strtoul(format-1, &end, 10);
if (*end == '$') {
format = end+1;
format += Tcl_UtfToUniChar(format, &ch);
objIndex = (int) value - 1;
}
}
if ((ch < 0x80) && isdigit(UCHAR(ch))) {
width = strtoul(format-1, &format, 10);
format += Tcl_UtfToUniChar(format, &ch);
} else {
width = 0;
}
switch (ch) {
case 'l':
case 'L':
#ifndef TCL_WIDE_INT_IS_LONG
flags |= SCAN_LONGER;
#endif
case 'h':
format += Tcl_UtfToUniChar(format, &ch);
}
switch (ch) {
case 'n':
if (!(flags & SCAN_SUPPRESS)) {
objPtr = Tcl_NewIntObj(string - baseString);
Tcl_IncrRefCount(objPtr);
objs[objIndex++] = objPtr;
}
nconversions++;
continue;
case 'd':
op = 'i';
base = 10;
fn = (long (*)())strtol;
#ifndef TCL_WIDE_INT_IS_LONG
lfn = (Tcl_WideInt (*)())strtoll;
#endif
break;
case 'i':
op = 'i';
base = 0;
fn = (long (*)())strtol;
#ifndef TCL_WIDE_INT_IS_LONG
lfn = (Tcl_WideInt (*)())strtoll;
#endif
break;
case 'o':
op = 'i';
base = 8;
fn = (long (*)())strtoul;
#ifndef TCL_WIDE_INT_IS_LONG
lfn = (Tcl_WideInt (*)())strtoull;
#endif
break;
case 'x':
op = 'i';
base = 16;
fn = (long (*)())strtoul;
#ifndef TCL_WIDE_INT_IS_LONG
lfn = (Tcl_WideInt (*)())strtoull;
#endif
break;
case 'u':
op = 'i';
base = 10;
flags |= SCAN_UNSIGNED;
fn = (long (*)())strtoul;
#ifndef TCL_WIDE_INT_IS_LONG
lfn = (Tcl_WideInt (*)())strtoull;
#endif
break;
case 'f':
case 'e':
case 'g':
op = 'f';
break;
case 's':
op = 's';
break;
case 'c':
op = 'c';
flags |= SCAN_NOSKIP;
break;
case '[':
op = '[';
flags |= SCAN_NOSKIP;
break;
}
if (*string == '\0') {
underflow = 1;
goto done;
}
if (!(flags & SCAN_NOSKIP)) {
while (*string != '\0') {
offset = Tcl_UtfToUniChar(string, &sch);
if (!Tcl_UniCharIsSpace(sch)) {
break;
}
string += offset;
}
if (*string == '\0') {
underflow = 1;
goto done;
}
}
switch (op) {
case 's':
if (width == 0) {
width = (size_t) ~0;
}
end = string;
while (*end != '\0') {
offset = Tcl_UtfToUniChar(end, &sch);
if (Tcl_UniCharIsSpace(sch)) {
break;
}
end += offset;
if (--width == 0) {
break;
}
}
if (!(flags & SCAN_SUPPRESS)) {
objPtr = Tcl_NewStringObj(string, end-string);
Tcl_IncrRefCount(objPtr);
objs[objIndex++] = objPtr;
}
string = end;
break;
case '[': {
CharSet cset;
if (width == 0) {
width = (size_t) ~0;
}
end = string;
format = BuildCharSet(&cset, format);
while (*end != '\0') {
offset = Tcl_UtfToUniChar(end, &sch);
if (!CharInSet(&cset, (int)sch)) {
break;
}
end += offset;
if (--width == 0) {
break;
}
}
ReleaseCharSet(&cset);
if (string == end) {
goto done;
}
if (!(flags & SCAN_SUPPRESS)) {
objPtr = Tcl_NewStringObj(string, end-string);
Tcl_IncrRefCount(objPtr);
objs[objIndex++] = objPtr;
}
string = end;
break;
}
case 'c':
string += Tcl_UtfToUniChar(string, &sch);
if (!(flags & SCAN_SUPPRESS)) {
objPtr = Tcl_NewIntObj((int)sch);
Tcl_IncrRefCount(objPtr);
objs[objIndex++] = objPtr;
}
break;
case 'i':
if ((width == 0) || (width > sizeof(buf) - 1)) {
width = sizeof(buf) - 1;
}
flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
for (end = buf; width > 0; width--) {
switch (*string) {
case '0':
if (base == 0) {
base = 8;
flags |= SCAN_XOK;
}
if (base == 16) {
flags |= SCAN_XOK;
}
if (flags & SCAN_NOZERO) {
flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS
| SCAN_NOZERO);
} else {
flags &= ~(SCAN_SIGNOK | SCAN_XOK
| SCAN_NODIGITS);
}
goto addToInt;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
if (base == 0) {
base = 10;
}
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
goto addToInt;
case '8': case '9':
if (base == 0) {
base = 10;
}
if (base <= 8) {
break;
}
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
goto addToInt;
case 'A': case 'B': case 'C':
case 'D': case 'E': case 'F':
case 'a': case 'b': case 'c':
case 'd': case 'e': case 'f':
if (base <= 10) {
break;
}
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
goto addToInt;
case '+': case '-':
if (flags & SCAN_SIGNOK) {
flags &= ~SCAN_SIGNOK;
goto addToInt;
}
break;
case 'x': case 'X':
if ((flags & SCAN_XOK) && (end == buf+1)) {
base = 16;
flags &= ~SCAN_XOK;
goto addToInt;
}
break;
}
break;
addToInt:
*end++ = *string++;
if (*string == '\0') {
break;
}
}
if (flags & SCAN_NODIGITS) {
if (*string == '\0') {
underflow = 1;
}
goto done;
} else if (end[-1] == 'x' || end[-1] == 'X') {
end--;
string--;
}
if (!(flags & SCAN_SUPPRESS)) {
*end = '\0';
#ifndef TCL_WIDE_INT_IS_LONG
if (flags & SCAN_LONGER) {
wideValue = (Tcl_WideInt) (*lfn)(buf, NULL, base);
if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) {
sprintf(buf, "%" TCL_LL_MODIFIER "u",
(Tcl_WideUInt)wideValue);
objPtr = Tcl_NewStringObj(buf, -1);
} else {
objPtr = Tcl_NewWideIntObj(wideValue);
}
} else {
#endif
value = (long) (*fn)(buf, NULL, base);
if ((flags & SCAN_UNSIGNED) && (value < 0)) {
sprintf(buf, "%lu", value);
objPtr = Tcl_NewStringObj(buf, -1);
} else {
if ((unsigned long) value > UINT_MAX) {
objPtr = Tcl_NewLongObj(value);
} else {
objPtr = Tcl_NewIntObj(value);
}
}
#ifndef TCL_WIDE_INT_IS_LONG
}
#endif
Tcl_IncrRefCount(objPtr);
objs[objIndex++] = objPtr;
}
break;
case 'f':
if ((width == 0) || (width > sizeof(buf) - 1)) {
width = sizeof(buf) - 1;
}
flags &= ~SCAN_LONGER;
flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
for (end = buf; width > 0; width--) {
switch (*string) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9':
flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
goto addToFloat;
case '+': case '-':
if (flags & SCAN_SIGNOK) {
flags &= ~SCAN_SIGNOK;
goto addToFloat;
}
break;
case '.':
if (flags & SCAN_PTOK) {
flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
goto addToFloat;
}
break;
case 'e': case 'E':
if ((flags & (SCAN_NODIGITS | SCAN_EXPOK))
== SCAN_EXPOK) {
flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
| SCAN_SIGNOK | SCAN_NODIGITS;
goto addToFloat;
}
break;
}
break;
addToFloat:
*end++ = *string++;
if (*string == '\0') {
break;
}
}
if (flags & SCAN_NODIGITS) {
if (flags & SCAN_EXPOK) {
if (*string == '\0') {
underflow = 1;
}
goto done;
}
end--;
string--;
if (*end != 'e' && *end != 'E') {
end--;
string--;
}
}
if (!(flags & SCAN_SUPPRESS)) {
double dvalue;
*end = '\0';
dvalue = strtod(buf, NULL);
objPtr = Tcl_NewDoubleObj(dvalue);
Tcl_IncrRefCount(objPtr);
objs[objIndex++] = objPtr;
}
break;
}
nconversions++;
}
done:
result = 0;
code = TCL_OK;
if (numVars) {
for (i = 0; i < totalVars; i++) {
if (objs[i] != NULL) {
result++;
if (Tcl_ObjSetVar2(interp, objv[i+3], NULL,
objs[i], 0) == NULL) {
Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
"couldn't set variable \"",
Tcl_GetString(objv[i+3]), "\"", (char *) NULL);
code = TCL_ERROR;
}
Tcl_DecrRefCount(objs[i]);
}
}
} else {
objPtr = Tcl_NewObj();
for (i = 0; i < totalVars; i++) {
if (objs[i] != NULL) {
Tcl_ListObjAppendElement(NULL, objPtr, objs[i]);
Tcl_DecrRefCount(objs[i]);
} else {
Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj());
}
}
}
if (objs != NULL) {
ckfree((char*) objs);
}
if (code == TCL_OK) {
if (underflow && (nconversions == 0)) {
if (numVars) {
objPtr = Tcl_NewIntObj(-1);
} else {
if (objPtr) {
Tcl_SetListObj(objPtr, 0, NULL);
} else {
objPtr = Tcl_NewObj();
}
}
} else if (numVars) {
objPtr = Tcl_NewIntObj(result);
}
Tcl_SetObjResult(interp, objPtr);
}
return code;
}