search.cpp   [plain text]


/*
**********************************************************************
*   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
**********************************************************************
*   Date        Name        Description
*  03/22/2000   helena      Creation.
**********************************************************************
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION

#include "unicode/brkiter.h"
#include "unicode/schriter.h"
#include "unicode/search.h"
#include "usrchimp.h"
#include "cmemory.h"

// public constructors and destructors -----------------------------------
U_NAMESPACE_BEGIN

SearchIterator::SearchIterator(const SearchIterator &other)
    : UObject(other)
{   
    m_breakiterator_            = other.m_breakiterator_;
    m_text_                     = other.m_text_;
    m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));   
    m_search_->breakIter        = other.m_search_->breakIter;
    m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
    m_search_->isOverlap        = other.m_search_->isOverlap;
    m_search_->elementComparisonType = other.m_search_->elementComparisonType;
    m_search_->matchedIndex     = other.m_search_->matchedIndex;
    m_search_->matchedLength    = other.m_search_->matchedLength;
    m_search_->text             = other.m_search_->text;
    m_search_->textLength       = other.m_search_->textLength;
}

SearchIterator::~SearchIterator()
{
    if (m_search_ != NULL) {
        uprv_free(m_search_);
    }
}

// public get and set methods ----------------------------------------

void SearchIterator::setAttribute(USearchAttribute       attribute,
                                  USearchAttributeValue  value,
                                  UErrorCode            &status)
{
    if (U_SUCCESS(status)) {
        switch (attribute)
        {
        case USEARCH_OVERLAP :
            m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
            break;
        case USEARCH_CANONICAL_MATCH :
            m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
            break;
        case USEARCH_ELEMENT_COMPARISON :
            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
                m_search_->elementComparisonType = (int16_t)value;
            } else {
                m_search_->elementComparisonType = 0;
            }
            break;
        default:
            status = U_ILLEGAL_ARGUMENT_ERROR;
        }
    }
    if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
    }
}

USearchAttributeValue SearchIterator::getAttribute(
                                          USearchAttribute  attribute) const
{
    switch (attribute) {
    case USEARCH_OVERLAP :
        return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
    case USEARCH_CANONICAL_MATCH :
        return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : 
                                                                USEARCH_OFF);
    case USEARCH_ELEMENT_COMPARISON :
        {
            int16_t value = m_search_->elementComparisonType;
            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
                return (USearchAttributeValue)value;
            } else {
                return USEARCH_STANDARD_ELEMENT_COMPARISON;
            }
        }
    default :
        return USEARCH_DEFAULT;
    }
}
    
int32_t SearchIterator::getMatchedStart() const
{
    return m_search_->matchedIndex;
}

int32_t SearchIterator::getMatchedLength() const
{
    return m_search_->matchedLength;
}
    
void SearchIterator::getMatchedText(UnicodeString &result) const
{
    int32_t matchedindex  = m_search_->matchedIndex;
    int32_t     matchedlength = m_search_->matchedLength;
    if (matchedindex != USEARCH_DONE && matchedlength != 0) {
        result.setTo(m_search_->text + matchedindex, matchedlength); 
    }
    else {
        result.remove();
    }
}
    
void SearchIterator::setBreakIterator(BreakIterator *breakiter, 
                                      UErrorCode &status)
{
    if (U_SUCCESS(status)) {
#if 0
        m_search_->breakIter = NULL;
        // the c++ breakiterator may not make use of ubreakiterator.
        // so we'll have to keep track of it ourselves.
#else
        // Well, gee... the Constructors that take a BreakIterator
        // all cast the BreakIterator to a UBreakIterator and
        // pass it to the corresponding usearch_openFromXXX
        // routine, so there's no reason not to do this.
        //
        // Besides, a UBreakIterator is a BreakIterator, so
        // any subclass of BreakIterator should work fine here...
        m_search_->breakIter = (UBreakIterator *) breakiter;
#endif
        
        m_breakiterator_ = breakiter;
    }
}
    
const BreakIterator * SearchIterator::getBreakIterator(void) const
{
    return m_breakiterator_;
}

void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        if (text.length() == 0) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
        }
        else {
            m_text_        = text;
            m_search_->text = m_text_.getBuffer();
            m_search_->textLength = m_text_.length();
        }
    }
}

void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        text.getText(m_text_);
        setText(m_text_, status);
    }
}
    
const UnicodeString & SearchIterator::getText(void) const
{
    return m_text_;
}

// operator overloading ----------------------------------------------

UBool SearchIterator::operator==(const SearchIterator &that) const
{
    if (this == &that) {
        return TRUE;
    }
    return (m_breakiterator_            == that.m_breakiterator_ &&
            m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
            m_search_->isOverlap        == that.m_search_->isOverlap &&
            m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
            m_search_->matchedIndex     == that.m_search_->matchedIndex &&
            m_search_->matchedLength    == that.m_search_->matchedLength &&
            m_search_->textLength       == that.m_search_->textLength &&
            getOffset() == that.getOffset() &&
            (uprv_memcmp(m_search_->text, that.m_search_->text, 
                              m_search_->textLength * sizeof(UChar)) == 0));
}

// public methods ----------------------------------------------------

int32_t SearchIterator::first(UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return USEARCH_DONE;
    }
    setOffset(0, status);
    return handleNext(0, status);
}

int32_t SearchIterator::following(int32_t position, 
                                      UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return USEARCH_DONE;
    }
    setOffset(position, status);
    return handleNext(position, status);
}
    
int32_t SearchIterator::last(UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return USEARCH_DONE;
    }
    setOffset(m_search_->textLength, status);
    return handlePrev(m_search_->textLength, status);
}

int32_t SearchIterator::preceding(int32_t position, 
                                      UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return USEARCH_DONE;
    }
    setOffset(position, status);
    return handlePrev(position, status);
}

int32_t SearchIterator::next(UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        int32_t offset = getOffset();
        int32_t matchindex  = m_search_->matchedIndex;
        int32_t     matchlength = m_search_->matchedLength;
        m_search_->reset = FALSE;
        if (m_search_->isForwardSearching == TRUE) {
            int32_t textlength = m_search_->textLength;
            if (offset == textlength || matchindex == textlength || 
                (matchindex != USEARCH_DONE && 
                matchindex + matchlength >= textlength)) {
                // not enough characters to match
                setMatchNotFound();
                return USEARCH_DONE; 
            }
        }
        else {
            // switching direction. 
            // if matchedIndex == USEARCH_DONE, it means that either a 
            // setOffset has been called or that previous ran off the text
            // string. the iterator would have been set to offset 0 if a 
            // match is not found.
            m_search_->isForwardSearching = TRUE;
            if (m_search_->matchedIndex != USEARCH_DONE) {
                // there's no need to set the collation element iterator
                // the next call to next will set the offset.
                return matchindex;
            }
        }

        if (matchlength > 0) {
            // if matchlength is 0 we are at the start of the iteration
            if (m_search_->isOverlap) {
                offset ++;
            }
            else {
                offset += matchlength;
            }
        }
        return handleNext(offset, status);
    }
    return USEARCH_DONE;
}

int32_t SearchIterator::previous(UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        int32_t offset;
        if (m_search_->reset) {
            offset                       = m_search_->textLength;
            m_search_->isForwardSearching = FALSE;
            m_search_->reset              = FALSE;
            setOffset(offset, status);
        }
        else {
            offset = getOffset();
        }
        
        int32_t matchindex = m_search_->matchedIndex;
        if (m_search_->isForwardSearching == TRUE) {
            // switching direction. 
            // if matchedIndex == USEARCH_DONE, it means that either a 
            // setOffset has been called or that next ran off the text
            // string. the iterator would have been set to offset textLength if 
            // a match is not found.
            m_search_->isForwardSearching = FALSE;
            if (matchindex != USEARCH_DONE) {
                return matchindex;
            }
        }
        else {
            if (offset == 0 || matchindex == 0) {
                // not enough characters to match
                setMatchNotFound();
                return USEARCH_DONE; 
            }
        }

        if (matchindex != USEARCH_DONE) {
            if (m_search_->isOverlap) {
                matchindex += m_search_->matchedLength - 2;
            }

            return handlePrev(matchindex, status); 
        }

        return handlePrev(offset, status);
    }

    return USEARCH_DONE;
}

void SearchIterator::reset()
{
    UErrorCode status = U_ZERO_ERROR;
    setMatchNotFound();
    setOffset(0, status);
    m_search_->isOverlap          = FALSE;
    m_search_->isCanonicalMatch   = FALSE;
    m_search_->elementComparisonType = 0;
    m_search_->isForwardSearching = TRUE;
    m_search_->reset              = TRUE;
}

// protected constructors and destructors -----------------------------

SearchIterator::SearchIterator()
{
    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    m_search_->breakIter          = NULL;
    m_search_->isOverlap          = FALSE;
    m_search_->isCanonicalMatch   = FALSE;
    m_search_->elementComparisonType = 0;
    m_search_->isForwardSearching = TRUE;
    m_search_->reset              = TRUE;
    m_search_->matchedIndex       = USEARCH_DONE;
    m_search_->matchedLength      = 0;
    m_search_->text               = NULL;
    m_search_->textLength         = 0;
    m_breakiterator_              = NULL;
}

SearchIterator::SearchIterator(const UnicodeString &text, 
                                     BreakIterator *breakiter) :
                                     m_breakiterator_(breakiter),
                                     m_text_(text)
{
    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    m_search_->breakIter          = NULL;
    m_search_->isOverlap          = FALSE;
    m_search_->isCanonicalMatch   = FALSE;
    m_search_->elementComparisonType = 0;
    m_search_->isForwardSearching = TRUE;
    m_search_->reset              = TRUE;
    m_search_->matchedIndex       = USEARCH_DONE;
    m_search_->matchedLength      = 0;
    m_search_->text               = m_text_.getBuffer();
    m_search_->textLength         = text.length();
}

SearchIterator::SearchIterator(CharacterIterator &text, 
                               BreakIterator     *breakiter) :
                               m_breakiterator_(breakiter)
{
    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    m_search_->breakIter          = NULL;
    m_search_->isOverlap          = FALSE;
    m_search_->isCanonicalMatch   = FALSE;
    m_search_->elementComparisonType = 0;
    m_search_->isForwardSearching = TRUE;
    m_search_->reset              = TRUE;
    m_search_->matchedIndex       = USEARCH_DONE;
    m_search_->matchedLength      = 0;
    text.getText(m_text_);
    m_search_->text               = m_text_.getBuffer();
    m_search_->textLength         = m_text_.length();
    m_breakiterator_             = breakiter;
}

// protected methods ------------------------------------------------------

SearchIterator & SearchIterator::operator=(const SearchIterator &that)
{
    if (this != &that) {
        m_breakiterator_            = that.m_breakiterator_;
        m_text_                     = that.m_text_;
        m_search_->breakIter        = that.m_search_->breakIter;
        m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
        m_search_->isOverlap        = that.m_search_->isOverlap;
        m_search_->elementComparisonType = that.m_search_->elementComparisonType;
        m_search_->matchedIndex     = that.m_search_->matchedIndex;
        m_search_->matchedLength    = that.m_search_->matchedLength;
        m_search_->text             = that.m_search_->text;
        m_search_->textLength       = that.m_search_->textLength;
    }
    return *this;
}

void SearchIterator::setMatchLength(int32_t length)
{
    m_search_->matchedLength = length;
}

void SearchIterator::setMatchStart(int32_t position)
{
    m_search_->matchedIndex = position;
}

void SearchIterator::setMatchNotFound() 
{
    setMatchStart(USEARCH_DONE);
    setMatchLength(0);
    UErrorCode status = U_ZERO_ERROR;
    // by default no errors should be returned here since offsets are within 
    // range.
    if (m_search_->isForwardSearching) {
        setOffset(m_search_->textLength, status);
    }
    else {
        setOffset(0, status);
    }
}


U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_COLLATION */