RegExpPrototype.cpp [plain text]
#include "config.h"
#include "RegExpPrototype.h"
#include "ArrayPrototype.h"
#include "BuiltinNames.h"
#include "Error.h"
#include "JSArray.h"
#include "JSCBuiltins.h"
#include "JSCJSValue.h"
#include "JSFunction.h"
#include "JSObject.h"
#include "JSString.h"
#include "JSStringBuilder.h"
#include "Lexer.h"
#include "ObjectPrototype.h"
#include "JSCInlines.h"
#include "RegExpObject.h"
#include "RegExp.h"
#include "RegExpCache.h"
#include "RegExpConstructor.h"
#include "RegExpMatchesArray.h"
#include "StringObject.h"
#include "StringRecursionChecker.h"
namespace JSC {
static EncodedJSValue JSC_HOST_CALL regExpProtoFuncExec(ExecState*);
static EncodedJSValue JSC_HOST_CALL regExpProtoFuncCompile(ExecState*);
static EncodedJSValue JSC_HOST_CALL regExpProtoFuncToString(ExecState*);
static EncodedJSValue JSC_HOST_CALL regExpProtoGetterGlobal(ExecState*);
static EncodedJSValue JSC_HOST_CALL regExpProtoGetterIgnoreCase(ExecState*);
static EncodedJSValue JSC_HOST_CALL regExpProtoGetterMultiline(ExecState*);
static EncodedJSValue JSC_HOST_CALL regExpProtoGetterSticky(ExecState*);
static EncodedJSValue JSC_HOST_CALL regExpProtoGetterUnicode(ExecState*);
static EncodedJSValue JSC_HOST_CALL regExpProtoGetterSource(ExecState*);
static EncodedJSValue JSC_HOST_CALL regExpProtoGetterFlags(ExecState*);
const ClassInfo RegExpPrototype::s_info = { "Object", &Base::s_info, 0, CREATE_METHOD_TABLE(RegExpPrototype) };
RegExpPrototype::RegExpPrototype(VM& vm, Structure* structure)
: JSNonFinalObject(vm, structure)
{
}
void RegExpPrototype::finishCreation(VM& vm, JSGlobalObject* globalObject)
{
Base::finishCreation(vm);
ASSERT(inherits(info()));
JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->compile, regExpProtoFuncCompile, DontEnum, 2);
JSC_NATIVE_INTRINSIC_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->exec, regExpProtoFuncExec, DontEnum, 1, RegExpExecIntrinsic);
JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->toString, regExpProtoFuncToString, DontEnum, 0);
JSC_NATIVE_GETTER(vm.propertyNames->global, regExpProtoGetterGlobal, DontEnum | Accessor);
JSC_NATIVE_GETTER(vm.propertyNames->ignoreCase, regExpProtoGetterIgnoreCase, DontEnum | Accessor);
JSC_NATIVE_GETTER(vm.propertyNames->multiline, regExpProtoGetterMultiline, DontEnum | Accessor);
JSC_NATIVE_GETTER(vm.propertyNames->sticky, regExpProtoGetterSticky, DontEnum | Accessor);
JSC_NATIVE_GETTER(vm.propertyNames->unicode, regExpProtoGetterUnicode, DontEnum | Accessor);
JSC_NATIVE_GETTER(vm.propertyNames->source, regExpProtoGetterSource, DontEnum | Accessor);
JSC_NATIVE_GETTER(vm.propertyNames->flags, regExpProtoGetterFlags, DontEnum | Accessor);
JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->matchSymbol, regExpPrototypeMatchCodeGenerator, DontEnum);
JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->replaceSymbol, regExpPrototypeReplaceCodeGenerator, DontEnum);
JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->searchSymbol, regExpPrototypeSearchCodeGenerator, DontEnum);
JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->splitSymbol, regExpPrototypeSplitCodeGenerator, DontEnum);
JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->test, regExpPrototypeTestCodeGenerator, DontEnum);
m_emptyRegExp.set(vm, this, RegExp::create(vm, "", NoFlags));
}
void RegExpPrototype::visitChildren(JSCell* cell, SlotVisitor& visitor)
{
RegExpPrototype* thisObject = jsCast<RegExpPrototype*>(cell);
ASSERT_GC_OBJECT_INHERITS(thisObject, info());
Base::visitChildren(thisObject, visitor);
visitor.append(&thisObject->m_emptyRegExp);
}
EncodedJSValue JSC_HOST_CALL regExpProtoFuncTestFast(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (!thisValue.inherits(RegExpObject::info()))
return throwVMTypeError(exec);
JSString* string = exec->argument(0).toStringOrNull(exec);
if (!string)
return JSValue::encode(jsUndefined());
return JSValue::encode(jsBoolean(asRegExpObject(thisValue)->test(exec, exec->lexicalGlobalObject(), string)));
}
EncodedJSValue JSC_HOST_CALL regExpProtoFuncExec(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (!thisValue.inherits(RegExpObject::info()))
return throwVMTypeError(exec, "Builtin RegExp exec can only be called on a RegExp object");
JSString* string = exec->argument(0).toStringOrNull(exec);
if (!string)
return JSValue::encode(jsUndefined());
return JSValue::encode(asRegExpObject(thisValue)->exec(exec, exec->lexicalGlobalObject(), string));
}
EncodedJSValue JSC_HOST_CALL regExpProtoFuncMatchFast(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (!thisValue.inherits(RegExpObject::info()))
return throwVMTypeError(exec);
JSString* string = exec->argument(0).toStringOrNull(exec);
if (!string)
return JSValue::encode(jsUndefined());
if (!asRegExpObject(thisValue)->regExp()->global())
return JSValue::encode(asRegExpObject(thisValue)->exec(exec, exec->lexicalGlobalObject(), string));
return JSValue::encode(asRegExpObject(thisValue)->matchGlobal(exec, exec->lexicalGlobalObject(), string));
}
EncodedJSValue JSC_HOST_CALL regExpProtoFuncCompile(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (!thisValue.inherits(RegExpObject::info()))
return throwVMTypeError(exec);
RegExp* regExp;
JSValue arg0 = exec->argument(0);
JSValue arg1 = exec->argument(1);
if (arg0.inherits(RegExpObject::info())) {
if (!arg1.isUndefined())
return throwVMTypeError(exec, ASCIILiteral("Cannot supply flags when constructing one RegExp from another."));
regExp = asRegExpObject(arg0)->regExp();
} else {
String pattern = !exec->argumentCount() ? emptyString() : arg0.toString(exec)->value(exec);
if (exec->hadException())
return JSValue::encode(jsUndefined());
RegExpFlags flags = NoFlags;
if (!arg1.isUndefined()) {
flags = regExpFlags(arg1.toString(exec)->value(exec));
if (exec->hadException())
return JSValue::encode(jsUndefined());
if (flags == InvalidFlags)
return throwVMError(exec, createSyntaxError(exec, ASCIILiteral("Invalid flags supplied to RegExp constructor.")));
}
regExp = RegExp::create(exec->vm(), pattern, flags);
}
if (!regExp->isValid())
return throwVMError(exec, createSyntaxError(exec, regExp->errorMessage()));
asRegExpObject(thisValue)->setRegExp(exec->vm(), regExp);
asRegExpObject(thisValue)->setLastIndex(exec, 0);
return JSValue::encode(thisValue);
}
typedef std::array<char, 5 + 1> FlagsString;
static inline FlagsString flagsString(ExecState* exec, JSObject* regexp)
{
FlagsString string;
string[0] = 0;
VM& vm = exec->vm();
JSValue globalValue = regexp->get(exec, exec->propertyNames().global);
if (vm.exception())
return string;
JSValue ignoreCaseValue = regexp->get(exec, exec->propertyNames().ignoreCase);
if (vm.exception())
return string;
JSValue multilineValue = regexp->get(exec, exec->propertyNames().multiline);
if (vm.exception())
return string;
JSValue unicodeValue = regexp->get(exec, exec->propertyNames().unicode);
if (vm.exception())
return string;
JSValue stickyValue = regexp->get(exec, exec->propertyNames().sticky);
if (vm.exception())
return string;
unsigned index = 0;
if (globalValue.toBoolean(exec))
string[index++] = 'g';
if (ignoreCaseValue.toBoolean(exec))
string[index++] = 'i';
if (multilineValue.toBoolean(exec))
string[index++] = 'm';
if (unicodeValue.toBoolean(exec))
string[index++] = 'u';
if (stickyValue.toBoolean(exec))
string[index++] = 'y';
ASSERT(index < string.size());
string[index] = 0;
return string;
}
EncodedJSValue JSC_HOST_CALL regExpProtoFuncToString(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (!thisValue.isObject())
return throwVMTypeError(exec);
JSObject* thisObject = asObject(thisValue);
StringRecursionChecker checker(exec, thisObject);
if (JSValue earlyReturnValue = checker.earlyReturnValue())
return JSValue::encode(earlyReturnValue);
VM& vm = exec->vm();
JSValue sourceValue = thisObject->get(exec, vm.propertyNames->source);
if (vm.exception())
return JSValue::encode(jsUndefined());
String source = sourceValue.toString(exec)->value(exec);
if (vm.exception())
return JSValue::encode(jsUndefined());
JSValue flagsValue = thisObject->get(exec, vm.propertyNames->flags);
if (vm.exception())
return JSValue::encode(jsUndefined());
String flags = flagsValue.toString(exec)->value(exec);
if (vm.exception())
return JSValue::encode(jsUndefined());
return JSValue::encode(jsMakeNontrivialString(exec, '/', source, '/', flags));
}
EncodedJSValue JSC_HOST_CALL regExpProtoGetterGlobal(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (UNLIKELY(!thisValue.inherits(RegExpObject::info()))) {
if (thisValue.inherits(RegExpPrototype::info()))
return JSValue::encode(jsUndefined());
return throwVMTypeError(exec, ASCIILiteral("The RegExp.prototype.global getter can only be called on a RegExp object"));
}
return JSValue::encode(jsBoolean(asRegExpObject(thisValue)->regExp()->global()));
}
EncodedJSValue JSC_HOST_CALL regExpProtoGetterIgnoreCase(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (UNLIKELY(!thisValue.inherits(RegExpObject::info()))) {
if (thisValue.inherits(RegExpPrototype::info()))
return JSValue::encode(jsUndefined());
return throwVMTypeError(exec, ASCIILiteral("The RegExp.prototype.ignoreCase getter can only be called on a RegExp object"));
}
return JSValue::encode(jsBoolean(asRegExpObject(thisValue)->regExp()->ignoreCase()));
}
EncodedJSValue JSC_HOST_CALL regExpProtoGetterMultiline(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (UNLIKELY(!thisValue.inherits(RegExpObject::info()))) {
if (thisValue.inherits(RegExpPrototype::info()))
return JSValue::encode(jsUndefined());
return throwVMTypeError(exec, ASCIILiteral("The RegExp.prototype.multiline getter can only be called on a RegExp object"));
}
return JSValue::encode(jsBoolean(asRegExpObject(thisValue)->regExp()->multiline()));
}
EncodedJSValue JSC_HOST_CALL regExpProtoGetterSticky(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (UNLIKELY(!thisValue.inherits(RegExpObject::info()))) {
if (thisValue.inherits(RegExpPrototype::info()))
return JSValue::encode(jsUndefined());
return throwVMTypeError(exec, ASCIILiteral("The RegExp.prototype.sticky getter can only be called on a RegExp object"));
}
return JSValue::encode(jsBoolean(asRegExpObject(thisValue)->regExp()->sticky()));
}
EncodedJSValue JSC_HOST_CALL regExpProtoGetterUnicode(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (UNLIKELY(!thisValue.inherits(RegExpObject::info()))) {
if (thisValue.inherits(RegExpPrototype::info()))
return JSValue::encode(jsUndefined());
return throwVMTypeError(exec, ASCIILiteral("The RegExp.prototype.unicode getter can only be called on a RegExp object"));
}
return JSValue::encode(jsBoolean(asRegExpObject(thisValue)->regExp()->unicode()));
}
EncodedJSValue JSC_HOST_CALL regExpProtoGetterFlags(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (UNLIKELY(!thisValue.isObject()))
return throwVMTypeError(exec, ASCIILiteral("The RegExp.prototype.flags getter can only be called on an object"));
auto flags = flagsString(exec, asObject(thisValue));
if (exec->hadException())
return JSValue::encode(jsUndefined());
return JSValue::encode(jsString(exec, flags.data()));
}
template <typename CharacterType>
static inline void appendLineTerminatorEscape(StringBuilder&, CharacterType);
template <>
inline void appendLineTerminatorEscape<LChar>(StringBuilder& builder, LChar lineTerminator)
{
if (lineTerminator == '\n')
builder.append('n');
else
builder.append('r');
}
template <>
inline void appendLineTerminatorEscape<UChar>(StringBuilder& builder, UChar lineTerminator)
{
if (lineTerminator == '\n')
builder.append('n');
else if (lineTerminator == '\r')
builder.append('r');
else if (lineTerminator == 0x2028)
builder.appendLiteral("u2028");
else
builder.appendLiteral("u2029");
}
template <typename CharacterType>
static inline JSValue regExpProtoGetterSourceInternal(ExecState* exec, const String& pattern, const CharacterType* characters, unsigned length)
{
bool previousCharacterWasBackslash = false;
bool inBrackets = false;
bool shouldEscape = false;
if (!length)
return jsNontrivialString(exec, ASCIILiteral("(?:)"));
for (unsigned i = 0; i < length; ++i) {
CharacterType ch = characters[i];
if (!previousCharacterWasBackslash) {
if (inBrackets) {
if (ch == ']')
inBrackets = false;
} else {
if (ch == '/') {
shouldEscape = true;
break;
}
if (ch == '[')
inBrackets = true;
}
}
if (Lexer<CharacterType>::isLineTerminator(ch)) {
shouldEscape = true;
break;
}
if (previousCharacterWasBackslash)
previousCharacterWasBackslash = false;
else
previousCharacterWasBackslash = ch == '\\';
}
if (!shouldEscape)
return jsString(exec, pattern);
previousCharacterWasBackslash = false;
inBrackets = false;
StringBuilder result;
for (unsigned i = 0; i < length; ++i) {
CharacterType ch = characters[i];
if (!previousCharacterWasBackslash) {
if (inBrackets) {
if (ch == ']')
inBrackets = false;
} else {
if (ch == '/')
result.append('\\');
else if (ch == '[')
inBrackets = true;
}
}
if (Lexer<CharacterType>::isLineTerminator(ch)) {
if (!previousCharacterWasBackslash)
result.append('\\');
appendLineTerminatorEscape<CharacterType>(result, ch);
} else
result.append(ch);
if (previousCharacterWasBackslash)
previousCharacterWasBackslash = false;
else
previousCharacterWasBackslash = ch == '\\';
}
return jsString(exec, result.toString());
}
EncodedJSValue JSC_HOST_CALL regExpProtoGetterSource(ExecState* exec)
{
JSValue thisValue = exec->thisValue();
if (UNLIKELY(!thisValue.inherits(RegExpObject::info()))) {
if (thisValue.inherits(RegExpPrototype::info()))
return JSValue::encode(jsString(exec, ASCIILiteral("(?:)")));
return throwVMTypeError(exec, ASCIILiteral("The RegExp.prototype.source getter can only be called on a RegExp object"));
}
String pattern = asRegExpObject(thisValue)->regExp()->pattern();
if (pattern.is8Bit())
return JSValue::encode(regExpProtoGetterSourceInternal(exec, pattern, pattern.characters8(), pattern.length()));
return JSValue::encode(regExpProtoGetterSourceInternal(exec, pattern, pattern.characters16(), pattern.length()));
}
EncodedJSValue JSC_HOST_CALL regExpProtoFuncSearchFast(ExecState* exec)
{
VM& vm = exec->vm();
JSValue thisValue = exec->thisValue();
RegExp* regExp = asRegExpObject(thisValue)->regExp();
JSString* string = exec->uncheckedArgument(0).toString(exec);
String s = string->value(exec);
if (vm.exception())
return JSValue::encode(jsUndefined());
RegExpConstructor* regExpConstructor = exec->lexicalGlobalObject()->regExpConstructor();
MatchResult result = regExpConstructor->performMatch(vm, regExp, string, s, 0);
return JSValue::encode(result ? jsNumber(result.start) : jsNumber(-1));
}
static inline unsigned advanceStringIndex(String str, unsigned strSize, unsigned index, bool isUnicode)
{
if (!isUnicode)
return ++index;
return RegExpObject::advanceStringUnicode(str, strSize, index);
}
enum SplitControl {
ContinueSplit,
AbortSplit
};
template<typename ControlFunc, typename PushFunc>
void genericSplit(
VM& vm, RegExp* regexp, const String& input, unsigned inputSize, unsigned& position,
unsigned& matchPosition, bool regExpIsSticky, bool regExpIsUnicode,
const ControlFunc& control, const PushFunc& push)
{
while (matchPosition < inputSize) {
if (control() == AbortSplit)
return;
Vector<int, 32> ovector;
int mpos = regexp->match(vm, input, matchPosition, ovector);
if (mpos < 0) {
if (!regExpIsSticky)
break;
matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode);
continue;
}
if (static_cast<unsigned>(mpos) >= inputSize) {
break;
}
matchPosition = mpos;
unsigned matchEnd = ovector[1];
if (matchEnd == position) {
matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode);
continue;
}
ASSERT(matchEnd);
unsigned numberOfCaptures = regexp->numSubpatterns();
if (push(true, position, matchPosition - position) == AbortSplit)
return;
position = matchEnd;
for (unsigned i = 1; i <= numberOfCaptures; ++i) {
int sub = ovector[i * 2];
if (push(sub >= 0, sub, ovector[i * 2 + 1] - sub) == AbortSplit)
return;
}
matchPosition = position;
}
}
EncodedJSValue JSC_HOST_CALL regExpProtoFuncSplitFast(ExecState* exec)
{
VM& vm = exec->vm();
JSValue thisValue = exec->thisValue();
RegExp* regexp = asRegExpObject(thisValue)->regExp();
JSString* inputString = exec->argument(0).toString(exec);
String input = inputString->value(exec);
if (vm.exception())
return JSValue::encode(jsUndefined());
ASSERT(!input.isNull());
JSArray* result = constructEmptyArray(exec, 0);
if (UNLIKELY(vm.exception()))
return JSValue::encode(jsUndefined());
unsigned resultLength = 0;
JSValue limitValue = exec->argument(1);
unsigned limit = limitValue.isUndefined() ? 0xFFFFFFFFu : limitValue.toUInt32(exec);
unsigned inputSize = input.length();
unsigned position = 0;
if (!limit)
return JSValue::encode(result);
if (input.isEmpty()) {
if (!regexp->match(vm, input, 0))
result->putDirectIndex(exec, 0, inputString);
return JSValue::encode(result);
}
unsigned matchPosition = position;
bool regExpIsSticky = regexp->sticky();
bool regExpIsUnicode = regexp->unicode();
unsigned maxSizeForDirectPath = 100000;
genericSplit(
vm, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode,
[&] () -> SplitControl {
if (resultLength >= maxSizeForDirectPath)
return AbortSplit;
return ContinueSplit;
},
[&] (bool isDefined, unsigned start, unsigned length) -> SplitControl {
result->putDirectIndex(exec, resultLength++, isDefined ? JSRopeString::createSubstringOfResolved(vm, inputString, start, length) : jsUndefined());
if (resultLength >= limit)
return AbortSplit;
return ContinueSplit;
});
if (resultLength >= limit)
return JSValue::encode(result);
if (resultLength < maxSizeForDirectPath) {
result->putDirectIndex(exec, resultLength, JSRopeString::createSubstringOfResolved(vm, inputString, position, inputSize - position));
return JSValue::encode(result);
}
unsigned savedPosition = position;
unsigned savedMatchPosition = matchPosition;
unsigned dryRunCount = 0;
genericSplit(
vm, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode,
[&] () -> SplitControl {
if (resultLength + dryRunCount >= MAX_STORAGE_VECTOR_LENGTH)
return AbortSplit;
return ContinueSplit;
},
[&] (bool, unsigned, unsigned) -> SplitControl {
dryRunCount++;
if (resultLength + dryRunCount >= limit)
return AbortSplit;
return ContinueSplit;
});
if (resultLength + dryRunCount >= MAX_STORAGE_VECTOR_LENGTH) {
throwOutOfMemoryError(exec);
return JSValue::encode(jsUndefined());
}
position = savedPosition;
matchPosition = savedMatchPosition;
genericSplit(
vm, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode,
[&] () -> SplitControl {
return ContinueSplit;
},
[&] (bool isDefined, unsigned start, unsigned length) -> SplitControl {
result->putDirectIndex(exec, resultLength++, isDefined ? JSRopeString::createSubstringOfResolved(vm, inputString, start, length) : jsUndefined());
if (resultLength >= limit)
return AbortSplit;
return ContinueSplit;
});
if (resultLength >= limit)
return JSValue::encode(result);
result->putDirectIndex(exec, resultLength, JSRopeString::createSubstringOfResolved(vm, inputString, position, inputSize - position));
return JSValue::encode(result);
}
}