coleitr.cpp [plain text]

/*
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and    *
* others. All Rights Reserved.                                                *
*******************************************************************************
*/

/*
* File coleitr.cpp
*
* 
*
* Created by: Helena Shih
*
* Modification History:
*
*  Date      Name        Description
*
*  6/23/97   helena      Adding comments to make code more readable.
* 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
* 12/10/99   aliu        Ported Thai collation support from Java.
* 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
* 02/19/01   swquek      Removed CollationElementsIterator() since it is 
*                        private constructor and no calls are made to it
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/coleitr.h"
#include "unicode/ustring.h"
#include "ucol_imp.h"
#include "cmemory.h"


/* Constants --------------------------------------------------------------- */

U_NAMESPACE_BEGIN

const char CollationElementIterator::fgClassID=0;

/* synwee : public can't remove */
int32_t const CollationElementIterator::NULLORDER = 0xffffffff;

/* CollationElementIterator public constructor/destructor ------------------ */

CollationElementIterator::CollationElementIterator(
                                         const CollationElementIterator& other) 
                                         : UObject(other), isDataOwned_(TRUE)
{
  UErrorCode status = U_ZERO_ERROR;
  m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, 
                              &status);

  *this = other;
}

CollationElementIterator::~CollationElementIterator()
{
  if (isDataOwned_) {
    ucol_closeElements(m_data_);
  }
}

/* CollationElementIterator public methods --------------------------------- */

int32_t CollationElementIterator::getOffset() const
{
  return ucol_getOffset(m_data_);
}

/**
* Get the ordering priority of the next character in the string.
* @return the next character's ordering. Returns NULLORDER if an error has 
*         occured or if the end of string has been reached
*/
int32_t CollationElementIterator::next(UErrorCode& status)
{
  return ucol_next(m_data_, &status);
}

UBool CollationElementIterator::operator!=(
                                  const CollationElementIterator& other) const
{
  return !(*this == other);
}

UBool CollationElementIterator::operator==(
                                    const CollationElementIterator& that) const
{
    if (this == &that) {
        return TRUE;
    }
  
    if (m_data_ == that.m_data_) {
        return TRUE;
    }

    // option comparison
    if (!(m_data_->reset_ == that.m_data_->reset_ 
          && m_data_->iteratordata_.coll == that.m_data_->iteratordata_.coll))
    {
        return FALSE;
    }

    // the constructor and setText always sets a length
    // and we only compare the string not the contents of the normalization
    // buffer
    int thislength = m_data_->iteratordata_.endp -
                     m_data_->iteratordata_.string;
    int thatlength = that.m_data_->iteratordata_.endp -
                     that.m_data_->iteratordata_.string;
    
    if (thislength != thatlength) {
        return FALSE;
    }

    if (uprv_memcmp(m_data_->iteratordata_.string, 
                    that.m_data_->iteratordata_.string, 
                    thislength * U_SIZEOF_UCHAR) != 0) {
        return FALSE;
    }
    if (getOffset() != that.getOffset()) {
        return FALSE;
    }

    // checking normalization buffer
    if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
        if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
            return FALSE;
        }
        // both are in the normalization buffer
        if (m_data_->iteratordata_.pos 
            - m_data_->iteratordata_.writableBuffer 
            != that.m_data_->iteratordata_.pos 
            - that.m_data_->iteratordata_.writableBuffer) {
            // not in the same position in the normalization buffer
            return FALSE;
        }
    }
    else if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
        return FALSE;
    }
    // checking ce position
    return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
            == (that.m_data_->iteratordata_.CEpos 
                                        - that.m_data_->iteratordata_.CEs);
}

/**
* Get the ordering priority of the previous collation element in the string.
* @param status the error code status.
* @return the previous element's ordering. Returns NULLORDER if an error has 
*         occured or if the start of string has been reached.
*/
int32_t CollationElementIterator::previous(UErrorCode& status)
{
  return ucol_previous(m_data_, &status);
}

/**
* Resets the cursor to the beginning of the string.
*/
void CollationElementIterator::reset()
{
  ucol_reset(m_data_);
}

void CollationElementIterator::setOffset(int32_t newOffset, 
                                         UErrorCode& status)
{
  ucol_setOffset(m_data_, newOffset, &status);
}

/**
* Sets the source to the new source string.
*/
void CollationElementIterator::setText(const UnicodeString& source,
                                       UErrorCode& status)
{
  if (U_FAILURE(status)) {
    return;
  }

  int32_t length = source.length();
  UChar *string = NULL;
  if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
    uprv_free(m_data_->iteratordata_.string);
  }
  m_data_->isWritable = TRUE;
  if (length > 0) {
    string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    /* test for NULL */
    if (string == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    u_memcpy(string, source.getBuffer(), length);
  }
  else {
    string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    /* test for NULL */
    if (string == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    *string = 0;
  }
  uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 
                   &m_data_->iteratordata_);

  m_data_->reset_   = TRUE;
}

// Sets the source to the new character iterator.
void CollationElementIterator::setText(CharacterIterator& source, 
                                       UErrorCode& status)
{
  if (U_FAILURE(status)) 
    return;
    
  int32_t length = source.getLength();
  UChar *buffer = NULL;

  if (length == 0) {
    buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    /* test for NULL */
    if (buffer == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    *buffer = 0;
  }
  else {
      buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
      /* test for NULL */
      if (buffer == NULL) {
          status = U_MEMORY_ALLOCATION_ERROR;
          return;
      }
      /* 
      Using this constructor will prevent buffer from being removed when
      string gets removed
      */
      UnicodeString string;
      source.getText(string);
      u_memcpy(buffer, string.getBuffer(), length);
  }
  
  if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
      uprv_free(m_data_->iteratordata_.string);
  }
  m_data_->isWritable = TRUE;
  uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 
                   &m_data_->iteratordata_);
  m_data_->reset_   = TRUE;
}

int32_t CollationElementIterator::strengthOrder(int32_t order) const
{
  UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
  // Mask off the unwanted differences.
  if (s == UCOL_PRIMARY) {
      order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
  }
  else if (s == UCOL_SECONDARY) {
      order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
  }
    
  return order;
}

/* CollationElementIterator private constructors/destructors --------------- */

/** 
* This is the "real" constructor for this class; it constructs an iterator
* over the source text using the specified collator
*/
CollationElementIterator::CollationElementIterator(
                                               const UnicodeString& sourceText,
                                               const RuleBasedCollator* order,
                                               UErrorCode& status)
                                               : isDataOwned_(TRUE)
{
  if (U_FAILURE(status)) {
      return;
  }
 
  int32_t length = sourceText.length();
  UChar *string = NULL;
  
  if (length > 0) {
      string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
      /* test for NULL */
      if (string == NULL) {
          status = U_MEMORY_ALLOCATION_ERROR;
          return;
      }
      /* 
      Using this constructor will prevent buffer from being removed when
      string gets removed
      */
      u_memcpy(string, sourceText.getBuffer(), length);
  }
  else {
      string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
      /* test for NULL */
      if (string == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
      }
      *string = 0;
  }
  m_data_ = ucol_openElements(order->ucollator, string, length, &status);
  
  /* Test for buffer overflows */
  if (U_FAILURE(status)) {
    return;
  }
  m_data_->isWritable = TRUE;
}

/** 
* This is the "real" constructor for this class; it constructs an iterator over 
* the source text using the specified collator
*/
CollationElementIterator::CollationElementIterator(
                                           const CharacterIterator& sourceText,
                                           const RuleBasedCollator* order,
                                           UErrorCode& status)
                                           : isDataOwned_(TRUE)
{
  if (U_FAILURE(status))
    return;
    
  // **** should I just drop this test? ****
  /*
  if ( sourceText.endIndex() != 0 )
  {
    // A CollationElementIterator is really a two-layered beast.
    // Internally it uses a Normalizer to munge the source text into a form 
    // where all "composed" Unicode characters (such as �) are split into a 
    // normal character and a combining accent character.  
    // Afterward, CollationElementIterator does its own processing to handle
    // expanding and contracting collation sequences, ignorables, and so on.
    
    Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
                               ? Normalizer::NO_OP : order->getDecomposition();
      
    text = new Normalizer(sourceText, decomp);
    if (text == NULL)
      status = U_MEMORY_ALLOCATION_ERROR;    
  }
  */
  int32_t length = sourceText.getLength();
  UChar *buffer;
  if (length > 0) {
      buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
      /* test for NULL */
      if (buffer == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
      }
      /* 
      Using this constructor will prevent buffer from being removed when
      string gets removed
      */
      UnicodeString string(buffer, length, length);
      ((CharacterIterator &)sourceText).getText(string);
      const UChar *temp = string.getBuffer();
      u_memcpy(buffer, temp, length);
  }
  else {
      buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
      /* test for NULL */
      if (buffer == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
      }
      *buffer = 0;
  }
  m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);

  /* Test for buffer overflows */
  if (U_FAILURE(status)) {
    return;
  }
  m_data_->isWritable = TRUE;
}

/* CollationElementIterator protected methods ----------------------------- */

const CollationElementIterator& CollationElementIterator::operator=(
                                         const CollationElementIterator& other)
{
  if (this != &other)
  {
      UCollationElements *ucolelem      = this->m_data_;
      UCollationElements *otherucolelem = other.m_data_;
      collIterate        *coliter       = &(ucolelem->iteratordata_);
      collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
      int                length         = 0;
      
      // checking only UCOL_ITER_HASLEN is not enough here as we may be in 
      // the normalization buffer
      length = othercoliter->endp - othercoliter->string;
                                    
      ucolelem->reset_         = otherucolelem->reset_;
      ucolelem->isWritable     = TRUE;
    
      /* create a duplicate of string */
      if (length > 0) {
          coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
          if(coliter->string != NULL) {
            uprv_memcpy(coliter->string, othercoliter->string,
                        length * U_SIZEOF_UCHAR);
          } else { // Error: couldn't allocate memory. No copying should be done
            length = 0;
          }
      }
      else {
          coliter->string = NULL;
      }

      /* start and end of string */
      coliter->endp = coliter->string + length;

      /* handle writable buffer here */
      
      if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
          uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
          if (wlength < coliter->writableBufSize) {
              uprv_memcpy(coliter->stackWritableBuffer, 
                        othercoliter->stackWritableBuffer, 
                        othercoliter->writableBufSize * U_SIZEOF_UCHAR);
          }
          else {
              if (coliter->writableBuffer != coliter->stackWritableBuffer) {
                  uprv_free(coliter->writableBuffer);
              }
              coliter->writableBuffer = (UChar *)uprv_malloc(
                                         wlength * U_SIZEOF_UCHAR);
              if(coliter->writableBuffer != NULL) {
                uprv_memcpy(coliter->writableBuffer, 
                            othercoliter->writableBuffer,
                            wlength * U_SIZEOF_UCHAR);
                coliter->writableBufSize = wlength;
              } else { // Error: couldn't allocate memory for writableBuffer
                coliter->writableBufSize = 0;
              }
          }
      }

      /* current position */
      if (othercoliter->pos >= othercoliter->string && 
          othercoliter->pos <= othercoliter->endp) {
          coliter->pos = coliter->string + 
                        (othercoliter->pos - othercoliter->string);
      }
      else {
        coliter->pos = coliter->writableBuffer + 
                        (othercoliter->pos - othercoliter->writableBuffer);
      }

      /* CE buffer */
      uprv_memcpy(coliter->CEs, othercoliter->CEs, 
                  UCOL_EXPAND_CE_BUFFER_SIZE * sizeof(uint32_t));
      coliter->toReturn = coliter->CEs + 
                         (othercoliter->toReturn - othercoliter->CEs);
      coliter->CEpos    = coliter->CEs + 
                         (othercoliter->CEpos - othercoliter->CEs);
    
      if (othercoliter->fcdPosition != NULL) {
          coliter->fcdPosition = coliter->string + 
                                 (othercoliter->fcdPosition 
                                                    - othercoliter->string);
      }
      else {
          coliter->fcdPosition = NULL;
      }
      coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
      coliter->origFlags   = othercoliter->origFlags;
      coliter->coll = othercoliter->coll;
      this->isDataOwned_ = TRUE;
  }

  return *this;
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_COLLATION */

/* eof */