WRECGenerator.cpp   [plain text]


/*
 * Copyright (C) 2008 Apple Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 */

#include "config.h"
#include "WREC.h"

#if ENABLE(WREC)

#include "CharacterClassConstructor.h"
#include "Interpreter.h"
#include "WRECFunctors.h"
#include "WRECParser.h"
#include "pcre_internal.h"

using namespace WTF;

namespace JSC { namespace WREC {

void Generator::generateEnter()
{
#if PLATFORM(X86)
    // On x86 edi & esi are callee preserved registers.
    push(X86::edi);
    push(X86::esi);
    
#if COMPILER(MSVC)
    // Move the arguments into registers.
    peek(input, 3);
    peek(index, 4);
    peek(length, 5);
    peek(output, 6);
#else
    // On gcc the function is regparm(3), so the input, index, and length registers
    // (eax, edx, and ecx respectively) already contain the appropriate values.
    // Just load the fourth argument (output) into edi
    peek(output, 3);
#endif
#endif
}

void Generator::generateReturnSuccess()
{
    ASSERT(returnRegister != index);
    ASSERT(returnRegister != output);

    // Set return value.
    pop(returnRegister); // match begin
    store32(returnRegister, output);
    store32(index, Address(output, 4)); // match end
    
    // Restore callee save registers.
#if PLATFORM(X86)
    pop(X86::esi);
    pop(X86::edi);
#endif
    ret();
}

void Generator::generateSaveIndex()
{
    push(index);
}

void Generator::generateIncrementIndex(Jump* failure)
{
    peek(index);
    if (failure)
        *failure = branch32(Equal, length, index);
    add32(Imm32(1), index);
    poke(index);
}

void Generator::generateLoadCharacter(JumpList& failures)
{
    failures.append(branch32(Equal, length, index));
    load16(BaseIndex(input, index, TimesTwo), character);
}

// For the sake of end-of-line assertions, we treat one-past-the-end as if it
// were part of the input string.
void Generator::generateJumpIfNotEndOfInput(Label target)
{
    branch32(LessThanOrEqual, index, length, target);
}

void Generator::generateReturnFailure()
{
    pop();
    move(Imm32(-1), returnRegister);

#if PLATFORM(X86)
    pop(X86::esi);
    pop(X86::edi);
#endif
    ret();
}

void Generator::generateBacktrack1()
{
    sub32(Imm32(1), index);
}

void Generator::generateBacktrackBackreference(unsigned subpatternId)
{
    sub32(Address(output, (2 * subpatternId + 1) * sizeof(int)), index);
    add32(Address(output, (2 * subpatternId) * sizeof(int)), index);
}

void Generator::generateBackreferenceQuantifier(JumpList& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max)
{
    GenerateBackreferenceFunctor functor(subpatternId);

    load32(Address(output, (2 * subpatternId) * sizeof(int)), character);
    Jump skipIfEmpty = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), character);

    ASSERT(quantifierType == Quantifier::Greedy || quantifierType == Quantifier::NonGreedy);
    if (quantifierType == Quantifier::Greedy)
        generateGreedyQuantifier(failures, functor, min, max);
    else
        generateNonGreedyQuantifier(failures, functor, min, max);

    skipIfEmpty.link(this);
}

void Generator::generateNonGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max)
{
    JumpList atomFailedList;
    JumpList alternativeFailedList;

    // (0) Setup: Save, then init repeatCount.
    push(repeatCount);
    move(Imm32(0), repeatCount);
    Jump start = jump();

    // (4) Quantifier failed: No more atom reading possible.
    Label quantifierFailed(this);
    pop(repeatCount);
    failures.append(jump()); 

    // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again.
    Label alternativeFailed(this);
    pop(index);
    if (max != Quantifier::Infinity)
        branch32(Equal, repeatCount, Imm32(max), quantifierFailed);

    // (1) Read an atom.
    if (min)
        start.link(this);
    Label readAtom(this);
    functor.generateAtom(this, atomFailedList);
    atomFailedList.linkTo(quantifierFailed, this);
    add32(Imm32(1), repeatCount);
    
    // (2) Keep reading if we're under the minimum.
    if (min > 1)
        branch32(LessThan, repeatCount, Imm32(min), readAtom);

    // (3) Test the rest of the alternative.
    if (!min)
        start.link(this);
    push(index);
    m_parser.parseAlternative(alternativeFailedList);
    alternativeFailedList.linkTo(alternativeFailed, this);

    pop();
    pop(repeatCount);
}

void Generator::generateGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max)
{
    if (!max)
        return;

    JumpList doneReadingAtomsList;
    JumpList alternativeFailedList;

    // (0) Setup: Save, then init repeatCount.
    push(repeatCount);
    move(Imm32(0), repeatCount);

    // (1) Greedily read as many copies of the atom as possible, then jump to (2).
    Label readAtom(this);
    functor.generateAtom(this, doneReadingAtomsList);
    add32(Imm32(1), repeatCount);
    if (max == Quantifier::Infinity)
        jump(readAtom);
    else if (max == 1)
        doneReadingAtomsList.append(jump());
    else {
        branch32(NotEqual, repeatCount, Imm32(max), readAtom);
        doneReadingAtomsList.append(jump());
    }

    // (5) Quantifier failed: No more backtracking possible.
    Label quantifierFailed(this);
    pop(repeatCount);
    failures.append(jump()); 

    // (4) Alternative failed: Backtrack, then fall through to (2) to try again.
    Label alternativeFailed(this);
    pop(index);
    functor.backtrack(this);
    sub32(Imm32(1), repeatCount);

    // (2) Verify that we have enough atoms.
    doneReadingAtomsList.link(this);
    branch32(LessThan, repeatCount, Imm32(min), quantifierFailed);

    // (3) Test the rest of the alternative.
    push(index);
    m_parser.parseAlternative(alternativeFailedList);
    alternativeFailedList.linkTo(alternativeFailed, this);

    pop();
    pop(repeatCount);
}

void Generator::generatePatternCharacterSequence(JumpList& failures, int* sequence, size_t count)
{
    for (size_t i = 0; i < count;) {
        if (i < count - 1) {
            if (generatePatternCharacterPair(failures, sequence[i], sequence[i + 1])) {
                i += 2;
                continue;
            }
        }

        generatePatternCharacter(failures, sequence[i]);
        ++i;
    }
}

bool Generator::generatePatternCharacterPair(JumpList& failures, int ch1, int ch2)
{
    if (m_parser.ignoreCase()) {
        // Non-trivial case folding requires more than one test, so we can't
        // test as a pair with an adjacent character.
        if (!isASCII(ch1) && Unicode::toLower(ch1) != Unicode::toUpper(ch1))
            return false;
        if (!isASCII(ch2) && Unicode::toLower(ch2) != Unicode::toUpper(ch2))
            return false;
    }

    // Optimistically consume 2 characters.
    add32(Imm32(2), index);
    failures.append(branch32(GreaterThan, index, length));

    // Load the characters we just consumed, offset -2 characters from index.
    load32(BaseIndex(input, index, TimesTwo, -2 * 2), character);

    if (m_parser.ignoreCase()) {
        // Convert ASCII alphabet characters to upper case before testing for
        // equality. (ASCII non-alphabet characters don't require upper-casing
        // because they have no uppercase equivalents. Unicode characters don't
        // require upper-casing because we only handle Unicode characters whose
        // upper and lower cases are equal.)
        int ch1Mask = 0;
        if (isASCIIAlpha(ch1)) {
            ch1 |= 32;
            ch1Mask = 32;
        }

        int ch2Mask = 0;
        if (isASCIIAlpha(ch2)) {
            ch2 |= 32;
            ch2Mask = 32;
        }

        int mask = ch1Mask | (ch2Mask << 16);
        if (mask)
            or32(Imm32(mask), character);
    }
    int pair = ch1 | (ch2 << 16);

    failures.append(branch32(NotEqual, character, Imm32(pair)));
    return true;
}

void Generator::generatePatternCharacter(JumpList& failures, int ch)
{
    generateLoadCharacter(failures);

    // used for unicode case insensitive
    bool hasUpper = false;
    Jump isUpper;
    
    // if case insensitive match
    if (m_parser.ignoreCase()) {
        UChar lower, upper;
        
        // check for ascii case sensitive characters
        if (isASCIIAlpha(ch)) {
            or32(Imm32(32), character);
            ch |= 32;
        } else if (!isASCII(ch) && ((lower = Unicode::toLower(ch)) != (upper = Unicode::toUpper(ch)))) {
            // handle unicode case sentitive characters - branch to success on upper
            isUpper = branch32(Equal, character, Imm32(upper));
            hasUpper = true;
            ch = lower;
        }
    }
    
    // checks for ch, or lower case version of ch, if insensitive
    failures.append(branch32(NotEqual, character, Imm32((unsigned short)ch)));

    if (m_parser.ignoreCase() && hasUpper) {
        // for unicode case insensitive matches, branch here if upper matches.
        isUpper.link(this);
    }
    
    // on success consume the char
    add32(Imm32(1), index);
}

void Generator::generateCharacterClassInvertedRange(JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
{
    do {
        // pick which range we're going to generate
        int which = count >> 1;
        char lo = ranges[which].begin;
        char hi = ranges[which].end;
        
        // check if there are any ranges or matches below lo.  If not, just jl to failure -
        // if there is anything else to check, check that first, if it falls through jmp to failure.
        if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
            Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
            
            // generate code for all ranges before this one
            if (which)
                generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount);
            
            while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
                matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
                ++*matchIndex;
            }
            failures.append(jump());

            loOrAbove.link(this);
        } else if (which) {
            Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));

            generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount);
            failures.append(jump());

            loOrAbove.link(this);
        } else
            failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));

        while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
            ++*matchIndex;

        matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
        // fall through to here, the value is above hi.

        // shuffle along & loop around if there are any more matches to handle.
        unsigned next = which + 1;
        ranges += next;
        count -= next;
    } while (count);
}

void Generator::generateCharacterClassInverted(JumpList& matchDest, const CharacterClass& charClass)
{
    Jump unicodeFail;
    if (charClass.numMatchesUnicode || charClass.numRangesUnicode) {
        Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
    
        if (charClass.numMatchesUnicode) {
            for (unsigned i = 0; i < charClass.numMatchesUnicode; ++i) {
                UChar ch = charClass.matchesUnicode[i];
                matchDest.append(branch32(Equal, character, Imm32(ch)));
            }
        }
        
        if (charClass.numRangesUnicode) {
            for (unsigned i = 0; i < charClass.numRangesUnicode; ++i) {
                UChar lo = charClass.rangesUnicode[i].begin;
                UChar hi = charClass.rangesUnicode[i].end;
                
                Jump below = branch32(LessThan, character, Imm32(lo));
                matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
                below.link(this);
            }
        }

        unicodeFail = jump();
        isAscii.link(this);
    }

    if (charClass.numRanges) {
        unsigned matchIndex = 0;
        JumpList failures; 
        generateCharacterClassInvertedRange(failures, matchDest, charClass.ranges, charClass.numRanges, &matchIndex, charClass.matches, charClass.numMatches);
        while (matchIndex < charClass.numMatches)
            matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass.matches[matchIndex++])));

        failures.link(this);
    } else if (charClass.numMatches) {
        // optimization: gather 'a','A' etc back together, can mask & test once.
        Vector<char> matchesAZaz;

        for (unsigned i = 0; i < charClass.numMatches; ++i) {
            char ch = charClass.matches[i];
            if (m_parser.ignoreCase()) {
                if (isASCIILower(ch)) {
                    matchesAZaz.append(ch);
                    continue;
                }
                if (isASCIIUpper(ch))
                    continue;
            }
            matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
        }

        if (unsigned countAZaz = matchesAZaz.size()) {
            or32(Imm32(32), character);
            for (unsigned i = 0; i < countAZaz; ++i)
                matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
        }
    }

    if (charClass.numMatchesUnicode || charClass.numRangesUnicode)
        unicodeFail.link(this);
}

void Generator::generateCharacterClass(JumpList& failures, const CharacterClass& charClass, bool invert)
{
    generateLoadCharacter(failures);

    if (invert)
        generateCharacterClassInverted(failures, charClass);
    else {
        JumpList successes;
        generateCharacterClassInverted(successes, charClass);
        failures.append(jump());
        successes.link(this);
    }
    
    add32(Imm32(1), index);
}

void Generator::generateParenthesesAssertion(JumpList& failures)
{
    JumpList disjunctionFailed;

    push(index);
    m_parser.parseDisjunction(disjunctionFailed);
    Jump success = jump();

    disjunctionFailed.link(this);
    pop(index);
    failures.append(jump());

    success.link(this);
    pop(index);
}

void Generator::generateParenthesesInvertedAssertion(JumpList& failures)
{
    JumpList disjunctionFailed;

    push(index);
    m_parser.parseDisjunction(disjunctionFailed);

    // If the disjunction succeeded, the inverted assertion failed.
    pop(index);
    failures.append(jump());

    // If the disjunction failed, the inverted assertion succeeded.
    disjunctionFailed.link(this);
    pop(index);
}

void Generator::generateParenthesesNonGreedy(JumpList& failures, Label start, Jump success, Jump fail)
{
    jump(start);
    success.link(this);
    failures.append(fail);
}

Generator::Jump Generator::generateParenthesesResetTrampoline(JumpList& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter)
{
    Jump skip = jump();
    newFailures.link(this);
    for (unsigned i = subpatternIdBefore + 1; i <= subpatternIdAfter; ++i) {
        store32(Imm32(-1), Address(output, (2 * i) * sizeof(int)));
        store32(Imm32(-1), Address(output, (2 * i + 1) * sizeof(int)));
    }
    
    Jump newFailJump = jump();
    skip.link(this);
    
    return newFailJump;
}

void Generator::generateAssertionBOL(JumpList& failures)
{
    if (m_parser.multiline()) {
        JumpList previousIsNewline;

        // begin of input == success
        previousIsNewline.append(branch32(Equal, index, Imm32(0)));

        // now check prev char against newline characters.
        load16(BaseIndex(input, index, TimesTwo, -2), character);
        generateCharacterClassInverted(previousIsNewline, CharacterClass::newline());

        failures.append(jump());

        previousIsNewline.link(this);
    } else
        failures.append(branch32(NotEqual, index, Imm32(0)));
}

void Generator::generateAssertionEOL(JumpList& failures)
{
    if (m_parser.multiline()) {
        JumpList nextIsNewline;

        generateLoadCharacter(nextIsNewline); // end of input == success
        generateCharacterClassInverted(nextIsNewline, CharacterClass::newline());
        failures.append(jump());
        nextIsNewline.link(this);
    } else {
        failures.append(branch32(NotEqual, length, index));
    }
}

void Generator::generateAssertionWordBoundary(JumpList& failures, bool invert)
{
    JumpList wordBoundary;
    JumpList notWordBoundary;

    // (1) Check if the previous value was a word char

    // (1.1) check for begin of input
    Jump atBegin = branch32(Equal, index, Imm32(0));
    // (1.2) load the last char, and chck if is word character
    load16(BaseIndex(input, index, TimesTwo, -2), character);
    JumpList previousIsWord;
    generateCharacterClassInverted(previousIsWord, CharacterClass::wordchar());
    // (1.3) if we get here, previous is not a word char
    atBegin.link(this);

    // (2) Handle situation where previous was NOT a \w

    generateLoadCharacter(notWordBoundary);
    generateCharacterClassInverted(wordBoundary, CharacterClass::wordchar());
    // (2.1) If we get here, neither chars are word chars
    notWordBoundary.append(jump());

    // (3) Handle situation where previous was a \w

    // (3.0) link success in first match to here
    previousIsWord.link(this);
    generateLoadCharacter(wordBoundary);
    generateCharacterClassInverted(notWordBoundary, CharacterClass::wordchar());
    // (3.1) If we get here, this is an end of a word, within the input.
    
    // (4) Link everything up
    
    if (invert) {
        // handle the fall through case
        wordBoundary.append(jump());
    
        // looking for non word boundaries, so link boundary fails to here.
        notWordBoundary.link(this);

        failures.append(wordBoundary);
    } else {
        // looking for word boundaries, so link successes here.
        wordBoundary.link(this);
        
        failures.append(notWordBoundary);
    }
}

void Generator::generateBackreference(JumpList& failures, unsigned subpatternId)
{
    push(index);
    push(repeatCount);

    // get the start pos of the backref into repeatCount (multipurpose!)
    load32(Address(output, (2 * subpatternId) * sizeof(int)), repeatCount);

    Jump skipIncrement = jump();
    Label topOfLoop(this);

    add32(Imm32(1), index);
    add32(Imm32(1), repeatCount);
    skipIncrement.link(this);

    // check if we're at the end of backref (if we are, success!)
    Jump endOfBackRef = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), repeatCount);

    load16(BaseIndex(input, repeatCount, MacroAssembler::TimesTwo), character);

    // check if we've run out of input (this would be a can o'fail)
    Jump endOfInput = branch32(Equal, length, index);

    branch16(Equal, BaseIndex(input, index, TimesTwo), character, topOfLoop);

    endOfInput.link(this);

    // Failure
    pop(repeatCount);
    pop(index);
    failures.append(jump());

    // Success
    endOfBackRef.link(this);
    pop(repeatCount);
    pop();
}

void Generator::terminateAlternative(JumpList& successes, JumpList& failures)
{
    successes.append(jump());
    
    failures.link(this);
    peek(index);
}

void Generator::terminateDisjunction(JumpList& successes)
{
    successes.link(this);
}

} } // namespace JSC::WREC

#endif // ENABLE(WREC)