#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#include "unicode/coleitr.h"
#include "unicode/unorm.h"
#include "unicode/udata.h"
#include "unicode/ustring.h"
#include "ucol_imp.h"
#include "bocsu.h"
#include "unormimp.h"
#include "unorm_it.h"
#include "umutex.h"
#include "cmemory.h"
#include "ucln_in.h"
#include "cstring.h"
#include "utracimp.h"
#include "putilimp.h"
#include "uassert.h"
#ifdef UCOL_DEBUG
#include <stdio.h>
#endif
U_NAMESPACE_USE
#define STAGE_1_SHIFT_ 10
#define STAGE_2_SHIFT_ 4
#define STAGE_2_MASK_AFTER_SHIFT_ 0x3F
#define STAGE_3_MASK_ 0xF
#define LAST_BYTE_MASK_ 0xFF
#define SECOND_LAST_BYTE_SHIFT_ 8
#define ZERO_CC_LIMIT_ 0xC0
static const uint16_t *fcdTrieIndex=NULL;
static const int32_t maxRegularPrimary = 0xA0;
static const int32_t minImplicitPrimary = 0xE0;
static const int32_t maxImplicitPrimary = 0xE4;
U_CDECL_BEGIN
static UBool U_CALLCONV
ucol_cleanup(void)
{
fcdTrieIndex = NULL;
return TRUE;
}
static int32_t U_CALLCONV
_getFoldingOffset(uint32_t data) {
return (int32_t)(data&0xFFFFFF);
}
U_CDECL_END
static
inline void IInit_collIterate(const UCollator *collator, const UChar *sourceString,
int32_t sourceLen, collIterate *s)
{
(s)->string = (s)->pos = (UChar *)(sourceString);
(s)->origFlags = 0;
(s)->flags = 0;
if (sourceLen >= 0) {
s->flags |= UCOL_ITER_HASLEN;
(s)->endp = (UChar *)sourceString+sourceLen;
}
else {
(s)->endp = NULL;
}
(s)->extendCEs = NULL;
(s)->extendCEsSize = 0;
(s)->CEpos = (s)->toReturn = (s)->CEs;
(s)->offsetBuffer = NULL;
(s)->offsetBufferSize = 0;
(s)->offsetReturn = (s)->offsetStore = NULL;
(s)->offsetRepeatCount = (s)->offsetRepeatValue = 0;
(s)->writableBuffer = (s)->stackWritableBuffer;
(s)->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE;
(s)->coll = (collator);
(s)->fcdPosition = 0;
if(collator->normalizationMode == UCOL_ON) {
(s)->flags |= UCOL_ITER_NORM;
}
if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) {
(s)->flags |= UCOL_HIRAGANA_Q;
}
(s)->iterator = NULL;
}
U_CAPI void U_EXPORT2
uprv_init_collIterate(const UCollator *collator, const UChar *sourceString,
int32_t sourceLen, collIterate *s){
IInit_collIterate(collator, sourceString, sourceLen, s);
}
static
inline void backupState(const collIterate *data, collIterateState *backup)
{
backup->fcdPosition = data->fcdPosition;
backup->flags = data->flags;
backup->origFlags = data->origFlags;
backup->pos = data->pos;
backup->bufferaddress = data->writableBuffer;
backup->buffersize = data->writableBufSize;
backup->iteratorMove = 0;
backup->iteratorIndex = 0;
if(data->iterator != NULL) {
backup->iteratorIndex = data->iterator->getState(data->iterator);
if(backup->iteratorIndex == UITER_NO_STATE) {
while((backup->iteratorIndex = data->iterator->getState(data->iterator)) == UITER_NO_STATE) {
backup->iteratorMove++;
data->iterator->move(data->iterator, -1, UITER_CURRENT);
}
data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
}
}
}
static
inline void loadState(collIterate *data, const collIterateState *backup,
UBool forwards)
{
UErrorCode status = U_ZERO_ERROR;
data->flags = backup->flags;
data->origFlags = backup->origFlags;
if(data->iterator != NULL) {
data->iterator->setState(data->iterator, backup->iteratorIndex, &status);
if(backup->iteratorMove != 0) {
data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
}
}
data->pos = backup->pos;
if ((data->flags & UCOL_ITER_INNORMBUF) &&
data->writableBuffer != backup->bufferaddress) {
if (forwards) {
data->pos = data->writableBuffer +
(data->pos - backup->bufferaddress);
}
else {
uint32_t temp = backup->buffersize -
(data->pos - backup->bufferaddress);
data->pos = data->writableBuffer + (data->writableBufSize - temp);
}
}
if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
data->fcdPosition = backup->fcdPosition;
}
}
static
inline UBool collIter_eos(collIterate *s) {
if(s->flags & UCOL_USE_ITERATOR) {
return !(s->iterator->hasNext(s->iterator));
}
if ((s->flags & UCOL_ITER_HASLEN) == 0 && *s->pos != 0) {
return FALSE;
}
if (s->flags & UCOL_ITER_HASLEN) {
return (s->pos == s->endp);
}
if ((s->flags & UCOL_ITER_INNORMBUF) == 0) {
return TRUE;
}
if(s->origFlags & UCOL_USE_ITERATOR) {
return !(s->iterator->hasNext(s->iterator));
} else if ((s->origFlags & UCOL_ITER_HASLEN) == 0) {
return (*s->fcdPosition == 0);
}
else {
return s->fcdPosition == s->endp;
}
}
static
inline UBool collIter_bos(collIterate *source) {
if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) {
return !source->iterator->hasPrevious(source->iterator);
}
if (source->pos <= source->string ||
((source->flags & UCOL_ITER_INNORMBUF) &&
*(source->pos - 1) == 0 && source->fcdPosition == NULL)) {
return TRUE;
}
return FALSE;
}
static
inline void freeHeapWritableBuffer(collIterate *data)
{
if (data->writableBuffer != data->stackWritableBuffer) {
uprv_free(data->writableBuffer);
}
}
static UCollator*
ucol_initFromBinary(const uint8_t *bin, int32_t length,
const UCollator *base,
UCollator *fillIn,
UErrorCode *status)
{
UCollator *result = fillIn;
if(U_FAILURE(*status)) {
return NULL;
}
uprv_uca_initImplicitConstants(status);
UCATableHeader *colData = (UCATableHeader *)bin;
if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0)) ||
colData->version[0] != UCOL_BUILDER_VERSION)
{
*status = U_COLLATOR_VERSION_MISMATCH;
return NULL;
}
else {
if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
result = ucol_initCollator((const UCATableHeader *)bin, result, base, status);
if(U_FAILURE(*status)){
return NULL;
}
result->hasRealData = TRUE;
}
else {
if(base) {
result = ucol_initCollator(base->image, result, base, status);
ucol_setOptionsFromHeader(result, (UColOptionSet *)(bin+((const UCATableHeader *)bin)->options), status);
if(U_FAILURE(*status)){
return NULL;
}
result->hasRealData = FALSE;
}
else {
*status = U_USELESS_COLLATOR_ERROR;
return NULL;
}
}
result->freeImageOnClose = FALSE;
}
result->actualLocale = NULL;
result->validLocale = NULL;
result->requestedLocale = NULL;
result->rules = NULL;
result->rulesLength = 0;
result->freeRulesOnClose = FALSE;
result->ucaRules = NULL;
return result;
}
U_CAPI UCollator* U_EXPORT2
ucol_openBinary(const uint8_t *bin, int32_t length,
const UCollator *base,
UErrorCode *status)
{
return ucol_initFromBinary(bin, length, base, NULL, status);
}
U_CAPI int32_t U_EXPORT2
ucol_cloneBinary(const UCollator *coll,
uint8_t *buffer, int32_t capacity,
UErrorCode *status)
{
int32_t length = 0;
if(U_FAILURE(*status)) {
return length;
}
if(capacity < 0) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return length;
}
if(coll->hasRealData == TRUE) {
length = coll->image->size;
if(length <= capacity) {
uprv_memcpy(buffer, coll->image, length);
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
} else {
length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
if(length <= capacity) {
uprv_memset(buffer, 0, length);
UCATableHeader *myData = (UCATableHeader *)buffer;
myData->size = length;
myData->options = sizeof(UCATableHeader);
myData->expansion = myData->options + sizeof(UColOptionSet);
myData->magic = UCOL_HEADER_MAGIC;
myData->isBigEndian = U_IS_BIG_ENDIAN;
myData->charSetFamily = U_CHARSET_FAMILY;
uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
myData->jamoSpecial = coll->image->jamoSpecial;
uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
}
return length;
}
U_CAPI UCollator* U_EXPORT2
ucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t * pBufferSize, UErrorCode *status)
{
UCollator * localCollator;
int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator);
char *stackBufferChars = (char *)stackBuffer;
int32_t imageSize = 0;
int32_t rulesSize = 0;
int32_t rulesPadding = 0;
uint8_t *image;
UChar *rules;
UBool colAllocated = FALSE;
UBool imageAllocated = FALSE;
if (status == NULL || U_FAILURE(*status)){
return 0;
}
if ((stackBuffer && !pBufferSize) || !coll){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if (coll->rules && coll->freeRulesOnClose) {
rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar);
rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar));
bufferSizeNeeded += rulesSize + rulesPadding;
}
if (stackBuffer && *pBufferSize <= 0){
*pBufferSize = bufferSizeNeeded;
return 0;
}
if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
if (*pBufferSize > offsetUp) {
*pBufferSize -= offsetUp;
stackBufferChars += offsetUp;
}
else {
*pBufferSize = 1;
}
}
stackBuffer = (void *)stackBufferChars;
if (stackBuffer == NULL || *pBufferSize < bufferSizeNeeded) {
stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded);
if (stackBufferChars == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
colAllocated = TRUE;
if (U_SUCCESS(*status)) {
*status = U_SAFECLONE_ALLOCATED_WARNING;
}
}
localCollator = (UCollator *)stackBufferChars;
rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding);
{
UErrorCode tempStatus = U_ZERO_ERROR;
imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus);
}
if (coll->freeImageOnClose) {
image = (uint8_t *)uprv_malloc(imageSize);
if (image == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
ucol_cloneBinary(coll, image, imageSize, status);
imageAllocated = TRUE;
}
else {
image = (uint8_t *)coll->image;
}
localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollator, status);
if (U_FAILURE(*status)) {
return NULL;
}
if (coll->rules) {
if (coll->freeRulesOnClose) {
localCollator->rules = u_strcpy(rules, coll->rules);
}
else {
localCollator->rules = coll->rules;
}
localCollator->freeRulesOnClose = FALSE;
localCollator->rulesLength = coll->rulesLength;
}
int32_t i;
for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(coll, (UColAttribute)i, status), status);
}
localCollator->actualLocale = NULL;
localCollator->validLocale = NULL;
localCollator->requestedLocale = NULL;
localCollator->ucaRules = coll->ucaRules; localCollator->freeOnClose = colAllocated;
localCollator->freeImageOnClose = imageAllocated;
return localCollator;
}
U_CAPI void U_EXPORT2
ucol_close(UCollator *coll)
{
UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
if(coll != NULL) {
if(coll->validLocale != NULL) {
uprv_free(coll->validLocale);
}
if(coll->actualLocale != NULL) {
uprv_free(coll->actualLocale);
}
if(coll->requestedLocale != NULL) {
uprv_free(coll->requestedLocale);
}
if(coll->latinOneCEs != NULL) {
uprv_free(coll->latinOneCEs);
}
if(coll->options != NULL && coll->freeOptionsOnClose) {
uprv_free(coll->options);
}
if(coll->rules != NULL && coll->freeRulesOnClose) {
uprv_free((UChar *)coll->rules);
}
if(coll->image != NULL && coll->freeImageOnClose) {
uprv_free((UCATableHeader *)coll->image);
}
UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose);
if(coll->freeOnClose){
uprv_free(coll);
}
}
UTRACE_EXIT();
}
U_CFUNC uint8_t* U_EXPORT2
ucol_cloneRuleData(const UCollator *coll, int32_t *length, UErrorCode *status)
{
uint8_t *result = NULL;
if(U_FAILURE(*status)) {
return NULL;
}
if(coll->hasRealData == TRUE) {
*length = coll->image->size;
result = (uint8_t *)uprv_malloc(*length);
if (result == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(result, coll->image, *length);
} else {
*length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
result = (uint8_t *)uprv_malloc(*length);
if (result == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memset(result, 0, *length);
UCATableHeader *myData = (UCATableHeader *)result;
myData->size = *length;
myData->options = sizeof(UCATableHeader);
myData->expansion = myData->options + sizeof(UColOptionSet);
myData->magic = UCOL_HEADER_MAGIC;
myData->isBigEndian = U_IS_BIG_ENDIAN;
myData->charSetFamily = U_CHARSET_FAMILY;
uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
myData->jamoSpecial = coll->image->jamoSpecial;
uprv_memcpy(result+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
}
return result;
}
void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status) {
if(U_FAILURE(*status)) {
return;
}
result->caseFirst = (UColAttributeValue)opts->caseFirst;
result->caseLevel = (UColAttributeValue)opts->caseLevel;
result->frenchCollation = (UColAttributeValue)opts->frenchCollation;
result->normalizationMode = (UColAttributeValue)opts->normalizationMode;
result->strength = (UColAttributeValue)opts->strength;
result->variableTopValue = opts->variableTopValue;
result->alternateHandling = (UColAttributeValue)opts->alternateHandling;
result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ;
result->numericCollation = (UColAttributeValue)opts->numericCollation;
result->caseFirstisDefault = TRUE;
result->caseLevelisDefault = TRUE;
result->frenchCollationisDefault = TRUE;
result->normalizationModeisDefault = TRUE;
result->strengthisDefault = TRUE;
result->variableTopValueisDefault = TRUE;
result->hiraganaQisDefault = TRUE;
result->numericCollationisDefault = TRUE;
ucol_updateInternalState(result, status);
result->options = opts;
}
static
inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
if (c < coll->minContrEndCP) {
return FALSE;
}
int32_t hash = c;
uint8_t htbyte;
if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
if (U16_IS_TRAIL(c)) {
return TRUE;
}
hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
}
htbyte = coll->contrEndCP[hash>>3];
return (((htbyte >> (hash & 7)) & 1) == 1);
}
static
inline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) {
uint8_t sCC = 0;
if ((c >= 0x300 && ucol_unsafeCP(c, coll)) || c > 0xFFFF) {
sCC = u_getCombiningClass(c);
}
return sCC;
}
UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, const UCollator *UCA, UErrorCode *status) {
UChar c;
UCollator *result = fillIn;
if(U_FAILURE(*status) || image == NULL) {
return NULL;
}
if(result == NULL) {
result = (UCollator *)uprv_malloc(sizeof(UCollator));
if(result == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return result;
}
result->freeOnClose = TRUE;
} else {
result->freeOnClose = FALSE;
}
if (fcdTrieIndex == NULL) {
fcdTrieIndex = unorm_getFCDTrie(status);
ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
}
result->image = image;
result->mapping.getFoldingOffset = _getFoldingOffset;
const uint8_t *mapping = (uint8_t*)result->image+result->image->mappingPosition;
utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status);
if(U_FAILURE(*status)) {
if(result->freeOnClose == TRUE) {
uprv_free(result);
result = NULL;
}
return result;
}
result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping);
result->contractionCEs = (uint32_t*)((uint8_t*)result->image+result->image->contractionCEs);
result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->contractionIndex);
result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expansion);
result->options = (UColOptionSet*)((uint8_t*)result->image+result->image->options);
result->freeOptionsOnClose = FALSE;
result->caseFirst = (UColAttributeValue)result->options->caseFirst;
result->caseLevel = (UColAttributeValue)result->options->caseLevel;
result->frenchCollation = (UColAttributeValue)result->options->frenchCollation;
result->normalizationMode = (UColAttributeValue)result->options->normalizationMode;
result->strength = (UColAttributeValue)result->options->strength;
result->variableTopValue = result->options->variableTopValue;
result->alternateHandling = (UColAttributeValue)result->options->alternateHandling;
result->hiraganaQ = (UColAttributeValue)result->options->hiraganaQ;
result->numericCollation = (UColAttributeValue)result->options->numericCollation;
result->caseFirstisDefault = TRUE;
result->caseLevelisDefault = TRUE;
result->frenchCollationisDefault = TRUE;
result->normalizationModeisDefault = TRUE;
result->strengthisDefault = TRUE;
result->variableTopValueisDefault = TRUE;
result->alternateHandlingisDefault = TRUE;
result->hiraganaQisDefault = TRUE;
result->numericCollationisDefault = TRUE;
result->rules = NULL;
result->rulesLength = 0;
result->freeRulesOnClose = FALSE;
result->dataVersion[0] = result->image->version[0];
result->dataVersion[1] = result->image->version[1];
result->dataVersion[2] = 0;
result->dataVersion[3] = 0;
result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;
result->minUnsafeCP = 0;
for (c=0; c<0x300; c++) { if (ucol_unsafeCP(c, result)) break;
}
result->minUnsafeCP = c;
result->contrEndCP = (uint8_t *)result->image + result->image->contrEndCP;
result->minContrEndCP = 0;
for (c=0; c<0x300; c++) { if (ucol_contractionEndCP(c, result)) break;
}
result->minContrEndCP = c;
result->endExpansionCE = (uint32_t*)((uint8_t*)result->image +
result->image->endExpansionCE);
result->lastEndExpansionCE = result->endExpansionCE +
result->image->endExpansionCECount - 1;
result->expansionCESize = (uint8_t*)result->image +
result->image->expansionCESize;
result->latinOneCEs = NULL;
result->latinOneRegenTable = FALSE;
result->latinOneFailed = FALSE;
result->UCA = UCA;
ucol_updateInternalState(result, status);
result->ucaRules = NULL;
result->actualLocale = NULL;
result->validLocale = NULL;
result->requestedLocale = NULL;
result->hasRealData = FALSE; result->freeImageOnClose = FALSE;
return result;
}
static const UChar32
NON_CJK_OFFSET = 0x110000,
UCOL_MAX_INPUT = 0x220001;
static int32_t
final3Multiplier = 0,
final4Multiplier = 0,
final3Count = 0,
final4Count = 0,
medialCount = 0,
min3Primary = 0,
min4Primary = 0,
max4Primary = 0,
minTrail = 0,
maxTrail = 0,
max3Trail = 0,
max4Trail = 0,
min4Boundary = 0;
static const UChar32
CJK_BASE = 0x4E00,
CJK_LIMIT = 0x9FFF+1,
CJK_COMPAT_USED_BASE = 0xFA0E,
CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
CJK_A_BASE = 0x3400,
CJK_A_LIMIT = 0x4DBF+1,
CJK_B_BASE = 0x20000,
CJK_B_LIMIT = 0x2A6DF+1;
static UChar32 swapCJK(UChar32 i) {
if (i >= CJK_BASE) {
if (i < CJK_LIMIT) return i - CJK_BASE;
if (i < CJK_COMPAT_USED_BASE) return i + NON_CJK_OFFSET;
if (i < CJK_COMPAT_USED_LIMIT) return i - CJK_COMPAT_USED_BASE
+ (CJK_LIMIT - CJK_BASE);
if (i < CJK_B_BASE) return i + NON_CJK_OFFSET;
if (i < CJK_B_LIMIT) return i;
return i + NON_CJK_OFFSET; }
if (i < CJK_A_BASE) return i + NON_CJK_OFFSET;
if (i < CJK_A_LIMIT) return i - CJK_A_BASE
+ (CJK_LIMIT - CJK_BASE)
+ (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
return i + NON_CJK_OFFSET; }
U_CAPI UChar32 U_EXPORT2
uprv_uca_getRawFromCodePoint(UChar32 i) {
return swapCJK(i)+1;
}
U_CAPI UChar32 U_EXPORT2
uprv_uca_getCodePointFromRaw(UChar32 i) {
i--;
UChar32 result = 0;
if(i >= NON_CJK_OFFSET) {
result = i - NON_CJK_OFFSET;
} else if(i >= CJK_B_BASE) {
result = i;
} else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { if(i < CJK_LIMIT - CJK_BASE) {
result = i + CJK_BASE;
} else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
} else {
result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
}
} else {
result = -1;
}
return result;
}
U_CAPI uint32_t U_EXPORT2
uprv_uca_getImplicitFromRaw(UChar32 cp) {
int32_t last0 = cp - min4Boundary;
if (last0 < 0) {
int32_t last1 = cp / final3Count;
last0 = cp % final3Count;
int32_t last2 = last1 / medialCount;
last1 %= medialCount;
last0 = minTrail + last0*final3Multiplier; last1 = minTrail + last1; last2 = min3Primary + last2;
return (last2 << 24) + (last1 << 16) + (last0 << 8);
} else {
int32_t last1 = last0 / final4Count;
last0 %= final4Count;
int32_t last2 = last1 / medialCount;
last1 %= medialCount;
int32_t last3 = last2 / medialCount;
last2 %= medialCount;
last0 = minTrail + last0*final4Multiplier; last1 = minTrail + last1; last2 = minTrail + last2; last3 = min4Primary + last3;
return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
}
}
static uint32_t U_EXPORT2
uprv_uca_getImplicitPrimary(UChar32 cp) {
cp = swapCJK(cp);
cp++;
return uprv_uca_getImplicitFromRaw(cp);
}
U_CAPI UChar32 U_EXPORT2
uprv_uca_getRawFromImplicit(uint32_t implicit) {
UChar32 result;
UChar32 b3 = implicit & 0xFF;
UChar32 b2 = (implicit >> 8) & 0xFF;
UChar32 b1 = (implicit >> 16) & 0xFF;
UChar32 b0 = (implicit >> 24) & 0xFF;
if (b0 < min3Primary || b0 > max4Primary
|| b1 < minTrail || b1 > maxTrail)
return -1;
b1 -= minTrail;
if (b0 < min4Primary) {
if (b2 < minTrail || b2 > max3Trail || b3 != 0)
return -1;
b2 -= minTrail;
UChar32 remainder = b2 % final3Multiplier;
if (remainder != 0)
return -1;
b0 -= min3Primary;
b2 /= final3Multiplier;
result = ((b0 * medialCount) + b1) * final3Count + b2;
} else {
if (b2 < minTrail || b2 > maxTrail
|| b3 < minTrail || b3 > max4Trail)
return -1;
b2 -= minTrail;
b3 -= minTrail;
UChar32 remainder = b3 % final4Multiplier;
if (remainder != 0)
return -1;
b3 /= final4Multiplier;
b0 -= min4Primary;
result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
}
if (result < 0 || result > UCOL_MAX_INPUT)
return -1;
return result;
}
static inline int32_t divideAndRoundUp(int a, int b) {
return 1 + (a-1)/b;
}
static void initImplicitConstants(int minPrimary, int maxPrimary,
int minTrailIn, int maxTrailIn,
int gap3, int primaries3count,
UErrorCode *status) {
if ((minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF)
|| (minTrailIn < 0 || minTrailIn >= maxTrailIn || maxTrailIn > 0xFF)
|| (primaries3count < 1))
{
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
};
minTrail = minTrailIn;
maxTrail = maxTrailIn;
min3Primary = minPrimary;
max4Primary = maxPrimary;
final3Multiplier = gap3 + 1;
final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
medialCount = (maxTrail - minTrail + 1);
int32_t threeByteCount = medialCount * final3Count;
int32_t primariesAvailable = maxPrimary - minPrimary + 1;
int32_t primaries4count = primariesAvailable - primaries3count;
int32_t min3ByteCoverage = primaries3count * threeByteCount;
min4Primary = minPrimary + primaries3count;
min4Boundary = min3ByteCoverage;
int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary;
int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
if (gap4 < 1) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
final4Multiplier = gap4 + 1;
final4Count = neededPerFinalByte;
max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
}
U_CAPI void U_EXPORT2
uprv_uca_initImplicitConstants(UErrorCode *status) {
initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status);
}
static
void collIterNormalize(collIterate *collationSource)
{
UErrorCode status = U_ZERO_ERROR;
int32_t normLen;
UChar *srcP = collationSource->pos - 1;
UChar *endP = collationSource->fcdPosition;
normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize,
srcP, (int32_t)(endP - srcP),
FALSE, 0,
&status);
if(status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) {
if(!u_growBufferFromStatic(collationSource->stackWritableBuffer,
&collationSource->writableBuffer,
(int32_t *)&collationSource->writableBufSize, normLen + 1,
0)
) {
#ifdef UCOL_DEBUG
fprintf(stderr, "collIterNormalize(), out of memory\n");
#endif
return;
}
status = U_ZERO_ERROR;
normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize,
srcP, (int32_t)(endP - srcP),
FALSE, 0,
&status);
}
if (U_FAILURE(status)) {
#ifdef UCOL_DEBUG
fprintf(stderr, "collIterNormalize(), unorm_decompose() failed, status = %s\n", u_errorName(status));
#endif
return;
}
if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {
collationSource->flags |= UCOL_ITER_ALLOCATED;
}
collationSource->pos = collationSource->writableBuffer;
collationSource->origFlags = collationSource->flags;
collationSource->flags |= UCOL_ITER_INNORMBUF;
collationSource->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
}
static
inline UBool collIterFCD(collIterate *collationSource) {
UChar c, c2;
const UChar *srcP, *endP;
uint8_t leadingCC;
uint8_t prevTrailingCC = 0;
uint16_t fcd;
UBool needNormalize = FALSE;
srcP = collationSource->pos-1;
if (collationSource->flags & UCOL_ITER_HASLEN) {
endP = collationSource->endp;
} else {
endP = NULL;
}
c = *srcP++;
fcd = unorm_getFCD16(fcdTrieIndex, c);
if (fcd != 0) {
if (U16_IS_LEAD(c)) {
if ((endP == NULL || srcP != endP) && U16_IS_TRAIL(c2=*srcP)) {
++srcP;
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
} else {
fcd = 0;
}
}
prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
if (prevTrailingCC != 0) {
while (endP == NULL || srcP != endP)
{
const UChar *savedSrcP = srcP;
c = *srcP++;
fcd = unorm_getFCD16(fcdTrieIndex, c);
if (fcd != 0 && U16_IS_LEAD(c)) {
if ((endP == NULL || srcP != endP) && U16_IS_TRAIL(c2=*srcP)) {
++srcP;
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
} else {
fcd = 0;
}
}
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
if (leadingCC == 0) {
srcP = savedSrcP; break;
}
if (leadingCC < prevTrailingCC) {
needNormalize = TRUE;
}
prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
}
}
}
collationSource->fcdPosition = (UChar *)srcP;
return needNormalize;
}
static uint32_t getImplicit(UChar32 cp, collIterate *collationSource);
static uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource);
static
inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
uint32_t order = 0;
if (collationSource->CEpos > collationSource->toReturn) {
order = *(collationSource->toReturn++);
if(collationSource->CEpos == collationSource->toReturn) {
collationSource->CEpos = collationSource->toReturn = collationSource->extendCEs ? collationSource->extendCEs : collationSource->CEs;
}
return order;
}
UChar ch = 0;
collationSource->offsetReturn = NULL;
for (;;)
{
if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
{
ch = *collationSource->pos++;
if (ch != 0) {
break;
}
else {
return UCOL_NO_MORE_CES;
}
}
if (collationSource->flags & UCOL_ITER_HASLEN) {
if (collationSource->pos >= collationSource->endp) {
return UCOL_NO_MORE_CES;
}
ch = *collationSource->pos++;
}
else if(collationSource->flags & UCOL_USE_ITERATOR) {
UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);
if(iterCh == U_SENTINEL) {
return UCOL_NO_MORE_CES;
}
ch = (UChar)iterCh;
}
else
{
ch = *collationSource->pos++;
if (ch == 0) {
if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
collationSource->pos--;
return UCOL_NO_MORE_CES;
}
else
{
if (collationSource->pos == collationSource->writableBuffer+1) {
break;
}
collationSource->pos = collationSource->fcdPosition;
collationSource->flags = collationSource->origFlags;
continue;
}
}
}
if(collationSource->flags&UCOL_HIRAGANA_Q) {
if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||
((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
collationSource->flags |= UCOL_WAS_HIRAGANA;
} else {
collationSource->flags &= ~UCOL_WAS_HIRAGANA;
}
}
if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
break;
}
if (collationSource->fcdPosition >= collationSource->pos) {
break;
}
if (ch < ZERO_CC_LIMIT_ ) {
break;
}
if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {
break;
}
if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
break;
}
}
if (collIterFCD(collationSource)) {
collIterNormalize(collationSource);
}
if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
break;
}
}
if (ch <= 0xFF) {
order = coll->latinOneMapping[ch];
if (order > UCOL_NOT_FOUND) {
order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
}
}
else
{
order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
if(order > UCOL_NOT_FOUND) {
order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
}
if(order == UCOL_NOT_FOUND && coll->UCA) {
order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
if(order > UCOL_NOT_FOUND) {
order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
}
}
}
if(order == UCOL_NOT_FOUND) {
order = getImplicit(ch, collationSource);
}
return order;
}
U_CAPI uint32_t U_EXPORT2
ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
return ucol_IGetNextCE(coll, collationSource, status);
}
static
void collPrevIterNormalize(collIterate *data)
{
UErrorCode status = U_ZERO_ERROR;
UChar *pEnd = data->pos;
UChar *pStart;
uint32_t normLen;
UChar *pStartNorm;
if (data->fcdPosition == NULL) {
pStart = data->string;
}
else {
pStart = data->fcdPosition + 1;
}
normLen = unorm_normalize(pStart, (pEnd - pStart) + 1, UNORM_NFD, 0,
data->writableBuffer, 0, &status);
if (data->writableBufSize <= normLen) {
freeHeapWritableBuffer(data);
data->writableBuffer = (UChar *)uprv_malloc((normLen + 1) *
sizeof(UChar));
if(data->writableBuffer == NULL) { data->writableBufSize = 0; return;
}
data->flags |= UCOL_ITER_ALLOCATED;
data->writableBufSize = normLen + 1;
}
status = U_ZERO_ERROR;
pStartNorm = data->writableBuffer + (data->writableBufSize - normLen);
*(pStartNorm - 1) = 0;
unorm_normalize(pStart, (pEnd - pStart) + 1, UNORM_NFD, 0, pStartNorm,
normLen, &status);
if (data->offsetBuffer == NULL) {
int32_t len = normLen >= UCOL_EXPAND_CE_BUFFER_SIZE ? normLen + 1 : UCOL_EXPAND_CE_BUFFER_SIZE;
data->offsetBufferSize = len;
data->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * len);
data->offsetStore = data->offsetBuffer;
} else if(data->offsetBufferSize < (int32_t) normLen) {
int32_t storeIX = data->offsetStore - data->offsetBuffer;
int32_t *tob = (int32_t *) uprv_realloc(data->offsetBuffer, sizeof(int32_t) * (normLen + 1));
if (tob != NULL) {
data->offsetBuffer = tob;
data->offsetStore = &data->offsetBuffer[storeIX];
data->offsetBufferSize = normLen + 1;
}
}
int32_t firstMarkOffset = 0;
int32_t trailOffset = data->pos - data->string + 1;
int32_t trailCount = normLen - 1;
if (data->fcdPosition != NULL) {
int32_t baseOffset = data->fcdPosition - data->string;
UChar baseChar = *data->fcdPosition;
firstMarkOffset = baseOffset + 1;
if (baseChar >= 0x100) {
uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, baseChar);
if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) {
baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, baseChar);
}
if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION_TAG) {
firstMarkOffset = trailOffset;
}
}
*(data->offsetStore++) = baseOffset;
}
*(data->offsetStore++) = firstMarkOffset;
for (int32_t i = 0; i < trailCount; i += 1) {
*(data->offsetStore++) = trailOffset;
}
data->offsetRepeatValue = trailOffset;
data->offsetReturn = data->offsetStore - 1;
if (data->offsetReturn == data->offsetBuffer) {
data->offsetStore = data->offsetBuffer;
}
data->pos = data->writableBuffer + data->writableBufSize;
data->origFlags = data->flags;
data->flags |= UCOL_ITER_INNORMBUF;
data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
}
static
inline UBool collPrevIterFCD(collIterate *data)
{
const UChar *src, *start;
UChar c, c2;
uint8_t leadingCC;
uint8_t trailingCC = 0;
uint16_t fcd;
UBool result = FALSE;
start = data->string;
src = data->pos + 1;
c = *--src;
if (!U16_IS_SURROGATE(c)) {
fcd = unorm_getFCD16(fcdTrieIndex, c);
} else if (U16_IS_TRAIL(c) && start < src && U16_IS_LEAD(c2 = *(src - 1))) {
--src;
fcd = unorm_getFCD16(fcdTrieIndex, c2);
if (fcd != 0) {
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
}
} else {
fcd = 0;
}
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
if (leadingCC != 0) {
for (;;)
{
if (start == src) {
data->fcdPosition = NULL;
return result;
}
c = *--src;
if (!U16_IS_SURROGATE(c)) {
fcd = unorm_getFCD16(fcdTrieIndex, c);
} else if (U16_IS_TRAIL(c) && start < src && U16_IS_LEAD(c2 = *(src - 1))) {
--src;
fcd = unorm_getFCD16(fcdTrieIndex, c2);
if (fcd != 0) {
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
}
} else {
fcd = 0;
}
trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
if (trailingCC == 0) {
break;
}
if (leadingCC < trailingCC) {
result = TRUE;
}
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
}
}
data->fcdPosition = (UChar *)src;
return result;
}
inline static
UChar peekCharacter(collIterate *source, int32_t offset) {
if(source->pos != NULL) {
return *(source->pos + offset);
} else if(source->iterator != NULL) {
if(offset != 0) {
source->iterator->move(source->iterator, offset, UITER_CURRENT);
UChar toReturn = (UChar)source->iterator->next(source->iterator);
source->iterator->move(source->iterator, -offset-1, UITER_CURRENT);
return toReturn;
} else {
return (UChar)source->iterator->current(source->iterator);
}
} else {
return (UChar)U_SENTINEL;
}
}
static
inline UBool isAtStartPrevIterate(collIterate *data) {
if(data->pos == NULL && data->iterator != NULL) {
return !data->iterator->hasPrevious(data->iterator);
}
return (data->pos == data->string) ||
((data->flags & UCOL_ITER_INNORMBUF) &&
*(data->pos - 1) == 0 && data->fcdPosition == NULL);
}
static
inline void goBackOne(collIterate *data) {
# if 0
if(data->pos) {
data->pos--;
}
if(data->iterator) {
data->iterator->previous(data->iterator);
}
#endif
if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {
data->iterator->previous(data->iterator);
}
if(data->pos) {
data->pos --;
}
}
static
inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
UErrorCode *status)
{
uint32_t result = (uint32_t)UCOL_NULLORDER;
if (data->offsetReturn != NULL) {
if (data->offsetRepeatCount > 0) {
data->offsetRepeatCount -= 1;
} else {
if (data->offsetReturn == data->offsetBuffer) {
data->offsetReturn = NULL;
data->offsetStore = data->offsetBuffer;
} else {
data->offsetReturn -= 1;
}
}
}
if ((data->extendCEs && data->toReturn > data->extendCEs) ||
(!data->extendCEs && data->toReturn > data->CEs))
{
data->toReturn -= 1;
result = *(data->toReturn);
if (data->CEs == data->toReturn || data->extendCEs == data->toReturn) {
data->CEpos = data->toReturn;
}
}
else {
UChar ch = 0;
for (;;) {
if (data->flags & UCOL_ITER_HASLEN) {
if (data->pos <= data->string) {
return UCOL_NO_MORE_CES;
}
data->pos --;
ch = *data->pos;
}
else if (data->flags & UCOL_USE_ITERATOR) {
UChar32 iterCh = data->iterator->previous(data->iterator);
if(iterCh == U_SENTINEL) {
return UCOL_NO_MORE_CES;
} else {
ch = (UChar)iterCh;
}
}
else {
data->pos --;
ch = *data->pos;
if (ch == 0) {
data->flags = data->origFlags;
data->offsetRepeatValue = 0;
if (data->fcdPosition == NULL) {
data->pos = data->string;
return UCOL_NO_MORE_CES;
}
else {
data->pos = data->fcdPosition + 1;
}
continue;
}
}
if(data->flags&UCOL_HIRAGANA_Q) {
if(ch>=0x3040 && ch<=0x309f) {
data->flags |= UCOL_WAS_HIRAGANA;
} else {
data->flags &= ~UCOL_WAS_HIRAGANA;
}
}
if (ch < ZERO_CC_LIMIT_ ||
(data->flags & UCOL_ITER_NORM) == 0 ||
(data->fcdPosition != NULL && data->fcdPosition <= data->pos)
|| data->string == data->pos) {
break;
}
if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
if (data->pos == data->string) {
break;
}
if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
break;
}
}
if (collPrevIterFCD(data)) {
collPrevIterNormalize(data);
}
if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
break;
}
}
if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
} else {
if (ch <= 0xFF) {
result = coll->latinOneMapping[ch];
}
else {
result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
}
if (result > UCOL_NOT_FOUND) {
result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
}
if (result == UCOL_NOT_FOUND) { if (!isAtStartPrevIterate(data) &&
ucol_contractionEndCP(ch, data->coll))
{
result = UCOL_CONTRACTION;
} else {
if(coll->UCA) {
result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
}
}
if (result > UCOL_NOT_FOUND) {
if(coll->UCA) {
result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
}
}
}
}
if(result == UCOL_NOT_FOUND) {
result = getPrevImplicit(ch, data);
}
}
return result;
}
U_CFUNC uint32_t U_EXPORT2
ucol_getPrevCE(const UCollator *coll, collIterate *data,
UErrorCode *status) {
return ucol_IGetPrevCE(coll, data, status);
}
U_CFUNC uint32_t U_EXPORT2
ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) {
collIterate colIt;
uint32_t order;
IInit_collIterate(coll, &u, 1, &colIt);
order = ucol_IGetNextCE(coll, &colIt, status);
return order;
}
static
inline UChar * insertBufferEnd(collIterate *data, UChar *pNull, UChar ch)
{
uint32_t size = data->writableBufSize;
UChar *newbuffer;
static const uint32_t INCSIZE = 5;
if ((data->writableBuffer + size) > (pNull + 1)) {
*pNull = ch;
*(pNull + 1) = 0;
return pNull;
}
size += INCSIZE;
newbuffer = (UChar *)uprv_malloc(sizeof(UChar) * size);
if(newbuffer != NULL) { uprv_memcpy(newbuffer, data->writableBuffer,
data->writableBufSize * sizeof(UChar));
freeHeapWritableBuffer(data);
data->writableBufSize = size;
data->writableBuffer = newbuffer;
newbuffer = newbuffer + data->writableBufSize;
*newbuffer = ch;
*(newbuffer + 1) = 0;
}
return newbuffer;
}
static
inline UChar * insertBufferEnd(collIterate *data, UChar *pNull, UChar *str,
int32_t length)
{
uint32_t size = pNull - data->writableBuffer;
UChar *newbuffer;
if (data->writableBuffer + data->writableBufSize > pNull + length + 1) {
uprv_memcpy(pNull, str, length * sizeof(UChar));
*(pNull + length) = 0;
return pNull;
}
newbuffer = (UChar *)uprv_malloc(sizeof(UChar) * (size + length + 1));
if(newbuffer != NULL) {
uprv_memcpy(newbuffer, data->writableBuffer, size * sizeof(UChar));
uprv_memcpy(newbuffer + size, str, length * sizeof(UChar));
freeHeapWritableBuffer(data);
data->writableBufSize = size + length + 1;
data->writableBuffer = newbuffer;
}
return newbuffer;
}
static
inline void normalizeNextContraction(collIterate *data)
{
UChar *buffer = data->writableBuffer;
uint32_t buffersize = data->writableBufSize;
uint32_t strsize;
UErrorCode status = U_ZERO_ERROR;
UChar *pStart = data->pos - 1;
UChar *pEnd;
uint32_t normLen;
UChar *pStartNorm;
if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
*data->writableBuffer = *(pStart - 1);
strsize = 1;
}
else {
strsize = u_strlen(data->writableBuffer);
}
pEnd = data->fcdPosition;
normLen = unorm_normalize(pStart, pEnd - pStart, UNORM_NFD, 0, buffer, 0,
&status);
if (buffersize <= normLen + strsize) {
uint32_t size = strsize + normLen + 1;
UChar *temp = (UChar *)uprv_malloc(size * sizeof(UChar));
if(temp != NULL) {
uprv_memcpy(temp, buffer, sizeof(UChar) * strsize);
freeHeapWritableBuffer(data);
data->writableBuffer = temp;
data->writableBufSize = size;
data->flags |= UCOL_ITER_ALLOCATED;
} else {
return; }
}
status = U_ZERO_ERROR;
pStartNorm = buffer + strsize;
unorm_normalize(pStart, pEnd - pStart, UNORM_NFD, 0, pStartNorm,
normLen + 1, &status);
data->pos = data->writableBuffer + strsize;
data->origFlags = data->flags;
data->flags |= UCOL_ITER_INNORMBUF;
data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
}
static
inline UChar getNextNormalizedChar(collIterate *data)
{
UChar nextch;
UChar ch;
if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ) {
if(data->flags & UCOL_USE_ITERATOR) {
return (UChar)data->iterator->next(data->iterator);
} else {
return *(data->pos ++);
}
}
UChar *pEndWritableBuffer = NULL;
UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
if ((innormbuf && *data->pos != 0) ||
(data->fcdPosition != NULL && !innormbuf &&
data->pos < data->fcdPosition)) {
return *(data->pos ++);
}
if (data->flags & UCOL_ITER_HASLEN) {
if (data->pos + 1 == data->endp) {
return *(data->pos ++);
}
}
else {
if (innormbuf) {
if(data->fcdPosition) {
if (*(data->fcdPosition + 1) == 0 ||
data->fcdPosition + 1 == data->endp) {
data->pos = insertBufferEnd(data, data->pos,
*(data->fcdPosition)) + 1;
if (data->pos == NULL) {
return (UChar)-1; }
return *(data->fcdPosition ++);
}
pEndWritableBuffer = data->pos;
data->pos = data->fcdPosition;
} else if(data->origFlags & UCOL_USE_ITERATOR) {
data->flags = data->origFlags;
data->pos = NULL;
return (UChar)data->iterator->next(data->iterator);
}
}
else {
if (*(data->pos + 1) == 0) {
return *(data->pos ++);
}
}
}
ch = *data->pos ++;
nextch = *data->pos;
if ((data->fcdPosition == NULL || data->fcdPosition < data->pos) &&
(nextch >= NFC_ZERO_CC_BLOCK_LIMIT_ ||
ch >= NFC_ZERO_CC_BLOCK_LIMIT_)) {
if (collIterFCD(data)) {
normalizeNextContraction(data);
return *(data->pos ++);
}
else if (innormbuf) {
int32_t length = data->fcdPosition - data->pos + 1;
data->pos = insertBufferEnd(data, pEndWritableBuffer,
data->pos - 1, length);
if (data->pos == NULL) {
return (UChar)-1; }
return *(data->pos ++);
}
}
if (innormbuf) {
data->pos = insertBufferEnd(data, pEndWritableBuffer, ch) + 1;
if (data->pos == NULL) {
return (UChar)-1; }
}
return ch;
}
static
inline void setDiscontiguosAttribute(collIterate *source, UChar *buffer,
UChar *tempdb)
{
uint32_t length = u_strlen(buffer);;
if (source->flags & UCOL_ITER_INNORMBUF) {
u_strcpy(tempdb, source->pos);
}
else {
source->fcdPosition = source->pos;
source->origFlags = source->flags;
source->flags |= UCOL_ITER_INNORMBUF;
source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
}
if (length >= source->writableBufSize) {
freeHeapWritableBuffer(source);
source->writableBuffer =
(UChar *)uprv_malloc((length + 1) * sizeof(UChar));
if(source->writableBuffer == NULL) {
source->writableBufSize = 0; return;
}
source->writableBufSize = length;
}
u_strcpy(source->writableBuffer, buffer);
source->pos = source->writableBuffer;
}
static
uint32_t getDiscontiguous(const UCollator *coll, collIterate *source,
const UChar *constart)
{
UChar *temppos = source->pos;
UChar buffer[4*UCOL_MAX_BUFFER];
UChar *tempdb = buffer;
const UChar *tempconstart = constart;
uint8_t tempflags = source->flags;
UBool multicontraction = FALSE;
UChar *tempbufferpos = 0;
collIterateState discState;
backupState(source, &discState);
*tempdb = peekCharacter(source, -1);
tempdb++;
for (;;) {
UChar *UCharOffset;
UChar schar,
tchar;
uint32_t result;
if (((source->flags & UCOL_ITER_HASLEN) && source->pos >= source->endp)
|| (peekCharacter(source, 0) == 0 &&
((source->flags & UCOL_ITER_INNORMBUF) == 0 ||
source->fcdPosition == NULL ||
source->fcdPosition == source->endp ||
*(source->fcdPosition) == 0 ||
u_getCombiningClass(*(source->fcdPosition)) == 0)) ||
u_getCombiningClass(peekCharacter(source, 0)) == 0) {
if (multicontraction) {
*tempbufferpos = 0;
source->pos = temppos - 1;
setDiscontiguosAttribute(source, buffer, tempdb);
return *(coll->contractionCEs +
(tempconstart - coll->contractionIndex));
}
constart = tempconstart;
break;
}
UCharOffset = (UChar *)(tempconstart + 1);
schar = getNextNormalizedChar(source);
while (schar > (tchar = *UCharOffset)) {
UCharOffset++;
}
if (schar != tchar) {
*tempdb = schar;
tempdb ++;
continue;
}
else {
if (u_getCombiningClass(schar) ==
u_getCombiningClass(peekCharacter(source, -2))) {
*tempdb = schar;
tempdb ++;
continue;
}
result = *(coll->contractionCEs +
(UCharOffset - coll->contractionIndex));
}
*tempdb = 0;
if (result == UCOL_NOT_FOUND) {
break;
} else if (isContraction(result)) {
tempconstart = (UChar *)coll->image + getContractOffset(result);
if (*(coll->contractionCEs + (constart - coll->contractionIndex))
!= UCOL_NOT_FOUND) {
multicontraction = TRUE;
temppos = source->pos + 1;
tempbufferpos = buffer + u_strlen(buffer);
}
} else {
setDiscontiguosAttribute(source, buffer, tempdb);
return result;
}
}
loadState(source, &discState, TRUE);
goBackOne(source);
source->flags = tempflags;
return *(coll->contractionCEs + (constart - coll->contractionIndex));
}
static
inline UBool isNonChar(UChar32 cp) {
return (UBool)((cp & 0xFFFE) == 0xFFFE || (0xFDD0 <= cp && cp <= 0xFDEF) || (0xD800 <= cp && cp <= 0xDFFF));
}
static
inline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) {
if(isNonChar(cp)) {
return 0;
}
uint32_t r = uprv_uca_getImplicitPrimary(cp);
*(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0;
collationSource->offsetRepeatCount += 1;
return (r & UCOL_PRIMARYMASK) | 0x00000505; }
static
inline UChar * insertBufferFront(collIterate *data, UChar *pNull, UChar ch)
{
uint32_t size = data->writableBufSize;
UChar *end;
UChar *newbuffer;
static const uint32_t INCSIZE = 5;
if (pNull > data->writableBuffer + 1) {
*pNull = ch;
*(pNull - 1) = 0;
return pNull;
}
size += INCSIZE;
newbuffer = (UChar *)uprv_malloc(sizeof(UChar) * size);
if(newbuffer == NULL) {
return NULL;
}
end = newbuffer + INCSIZE;
uprv_memcpy(end, data->writableBuffer,
data->writableBufSize * sizeof(UChar));
*end = ch;
*(end - 1) = 0;
freeHeapWritableBuffer(data);
data->writableBufSize = size;
data->writableBuffer = newbuffer;
return end;
}
static
inline void normalizePrevContraction(collIterate *data, UErrorCode *status)
{
uint32_t nulltermsize;
UErrorCode localstatus = U_ZERO_ERROR;
UChar *pEnd = data->pos + 1;
UChar *pStart;
uint32_t normLen;
UChar *pStartNorm;
if (data->flags & UCOL_ITER_HASLEN) {
*(data->writableBuffer + (data->writableBufSize - 1)) = *(data->pos + 1);
nulltermsize = data->writableBufSize - 1;
}
else {
nulltermsize = data->writableBufSize;
UChar *temp = data->writableBuffer + (nulltermsize - 1);
while (*(temp --) != 0) {
nulltermsize --;
}
}
if (data->fcdPosition == NULL) {
pStart = data->string;
}
else {
pStart = data->fcdPosition + 1;
}
normLen = unorm_normalize(pStart, pEnd - pStart, UNORM_NFD, 0, data->writableBuffer, 0,
&localstatus);
if (nulltermsize <= normLen) {
uint32_t size = data->writableBufSize - nulltermsize + normLen + 1;
UChar *temp = (UChar *)uprv_malloc(size * sizeof(UChar));
if (temp == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
nulltermsize = normLen + 1;
uprv_memcpy(temp + normLen, data->writableBuffer,
sizeof(UChar) * (data->writableBufSize - nulltermsize));
freeHeapWritableBuffer(data);
data->writableBuffer = temp;
data->writableBufSize = size;
}
pStartNorm = data->writableBuffer + (nulltermsize - normLen);
*(pStartNorm - 1) = 0;
unorm_normalize(pStart, pEnd - pStart, UNORM_NFD, 0, pStartNorm, normLen,
status);
data->pos = data->writableBuffer + nulltermsize;
data->origFlags = data->flags;
data->flags |= UCOL_ITER_INNORMBUF;
data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
}
static
inline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status)
{
UChar prevch;
UChar ch;
UChar *start;
UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
UChar *pNull = NULL;
if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ||
(innormbuf && *(data->pos - 1) != 0)) {
if(data->flags & UCOL_USE_ITERATOR) {
data->iterator->move(data->iterator, -1, UITER_CURRENT);
return (UChar)data->iterator->next(data->iterator);
} else {
return *(data->pos - 1);
}
}
start = data->pos;
if ((data->fcdPosition==NULL)||(data->flags & UCOL_ITER_HASLEN)) {
if ((start - 1) == data->string) {
return *(start - 1);
}
start --;
ch = *start;
prevch = *(start - 1);
}
else {
if (data->fcdPosition == data->string) {
insertBufferFront(data, data->pos - 1, *(data->fcdPosition));
data->fcdPosition = NULL;
return *(data->pos - 1);
}
pNull = data->pos - 1;
start = data->fcdPosition;
ch = *start;
prevch = *(start - 1);
}
if (data->fcdPosition > start &&
(ch >= NFC_ZERO_CC_BLOCK_LIMIT_ || prevch >= NFC_ZERO_CC_BLOCK_LIMIT_))
{
UChar *backuppos = data->pos;
data->pos = start;
if (collPrevIterFCD(data)) {
normalizePrevContraction(data, status);
return *(data->pos - 1);
}
data->pos = backuppos;
data->fcdPosition ++;
}
if (innormbuf) {
insertBufferFront(data, pNull, ch);
data->fcdPosition --;
}
return ch;
}
#define UCOL_MAX_DIGITS_FOR_NUMBER 254
uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, collIterate *source, UErrorCode *status) {
collIterateState entryState;
backupState(source, &entryState);
UChar32 cp = ch;
for (;;) {
const uint32_t *CEOffset = NULL;
switch(getCETag(CE)) {
case NOT_FOUND_TAG:
return CE;
case SPEC_PROC_TAG:
{
const UChar *UCharOffset;
UChar schar, tchar;
collIterateState prefixState;
backupState(source, &prefixState);
loadState(source, &entryState, TRUE);
goBackOne(source);
for(;;) {
const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
if (collIter_bos(source)) {
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
break;
}
schar = getPrevNormalizedChar(source, status);
goBackOne(source);
while(schar > (tchar = *UCharOffset)) {
UCharOffset++;
}
if (schar == tchar) {
CE = *(coll->contractionCEs +
(UCharOffset - coll->contractionIndex));
}
else
{
CE = *(coll->contractionCEs +
(ContractionStart - coll->contractionIndex));
}
if(!isPrefix(CE)) {
break;
}
}
if(CE != UCOL_NOT_FOUND) { loadState(source, &prefixState, TRUE);
if(source->origFlags & UCOL_USE_ITERATOR) {
source->flags = source->origFlags;
}
} else { loadState(source, &entryState, TRUE);
}
break;
}
case CONTRACTION_TAG:
{
collIterateState state;
backupState(source, &state);
uint32_t firstCE = *(coll->contractionCEs + ((UChar *)coll->image+getContractOffset(CE) - coll->contractionIndex)); const UChar *UCharOffset;
UChar schar, tchar;
for (;;) {
const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
if (collIter_eos(source)) {
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
if (CE == UCOL_NOT_FOUND) {
CE = firstCE;
loadState(source, &state, TRUE);
if(source->origFlags & UCOL_USE_ITERATOR) {
source->flags = source->origFlags;
}
}
break;
}
uint8_t maxCC = (uint8_t)(*(UCharOffset)&0xFF);
uint8_t allSame = (uint8_t)(*(UCharOffset++)>>8);
schar = getNextNormalizedChar(source);
while(schar > (tchar = *UCharOffset)) {
UCharOffset++;
}
if (schar == tchar) {
CE = *(coll->contractionCEs +
(UCharOffset - coll->contractionIndex));
}
else
{
UChar32 miss = schar;
if (source->iterator) {
UChar32 surrNextChar;
int32_t prevPos;
if(U16_IS_LEAD(schar) && source->iterator->hasNext(source->iterator)) {
prevPos = source->iterator->index;
surrNextChar = getNextNormalizedChar(source);
if (U16_IS_TRAIL(surrNextChar)) {
miss = U16_GET_SUPPLEMENTARY(schar, surrNextChar);
} else if (prevPos < source->iterator->index){
goBackOne(source);
}
}
} else if (U16_IS_LEAD(schar)) {
miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalizedChar(source));
}
uint8_t sCC;
if (miss < 0x300 ||
maxCC == 0 ||
(sCC = i_getCombiningClass(miss, coll)) == 0 ||
sCC>maxCC ||
(allSame != 0 && sCC == maxCC) ||
collIter_eos(source))
{
goBackOne(source); if(U_IS_SUPPLEMENTARY(miss)) {
goBackOne(source);
}
CE = *(coll->contractionCEs +
(ContractionStart - coll->contractionIndex));
} else {
UChar tempchar;
tempchar = getNextNormalizedChar(source);
goBackOne(source);
if (i_getCombiningClass(tempchar, coll) == 0) {
goBackOne(source);
if(U_IS_SUPPLEMENTARY(miss)) {
goBackOne(source);
}
CE = *(coll->contractionCEs +
(ContractionStart - coll->contractionIndex));
} else {
CE = getDiscontiguous(coll, source, ContractionStart);
}
}
}
if(CE == UCOL_NOT_FOUND) {
loadState(source, &state, TRUE);
CE = firstCE;
break;
}
if(!isContraction(CE)) {
break;
}
uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex));
if(tempCE != UCOL_NOT_FOUND) {
firstCE = tempCE;
goBackOne(source);
backupState(source, &state);
getNextNormalizedChar(source);
}
} break;
} case LONG_PRIMARY_TAG:
{
*(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
CE = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
source->offsetRepeatCount += 1;
return CE;
}
case EXPANSION_TAG:
{
uint32_t size;
uint32_t i;
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE);
size = getExpansionCount(CE);
CE = *CEOffset++;
if(size != 0) {
for(i = 1; i<size; i++) {
*(source->CEpos++) = *CEOffset++;
source->offsetRepeatCount += 1;
}
} else {
while(*CEOffset != 0) {
*(source->CEpos++) = *CEOffset++;
source->offsetRepeatCount += 1;
}
}
return CE;
}
case DIGIT_TAG:
{
uint32_t i;
if (source->coll->numericCollation == UCOL_ON){
collIterateState digitState = {0,0,0,0,0,0,0,0,0};
UChar32 char32 = 0;
int32_t digVal = 0;
uint32_t digIndx = 0;
uint32_t endIndex = 0;
uint32_t trailingZeroIndex = 0;
uint8_t collateVal = 0;
UBool nonZeroValReached = FALSE;
uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3];
digVal = u_charDigitValue(cp);
digIndx++;
for(;;){
if (digVal != 0) {
nonZeroValReached = TRUE;
}
if (nonZeroValReached) {
if (digIndx % 2 == 1){
collateVal += (uint8_t)digVal;
if (collateVal != 0)
trailingZeroIndex = 0;
numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
collateVal = 0;
}
else{
collateVal = (uint8_t)(digVal * 10);
if (collateVal == 0)
{
if (!trailingZeroIndex)
trailingZeroIndex = (digIndx/2) + 2;
}
else
trailingZeroIndex = 0;
numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
}
digIndx++;
}
if (!collIter_eos(source)){
ch = getNextNormalizedChar(source);
if (U16_IS_LEAD(ch)){
if (!collIter_eos(source)) {
backupState(source, &digitState);
UChar trail = getNextNormalizedChar(source);
if(U16_IS_TRAIL(trail)) {
char32 = U16_GET_SUPPLEMENTARY(ch, trail);
} else {
loadState(source, &digitState, TRUE);
char32 = ch;
}
}
} else {
char32 = ch;
}
if ((digVal = u_charDigitValue(char32)) == -1 || digIndx > UCOL_MAX_DIGITS_FOR_NUMBER){
if (char32 > 0xFFFF) { loadState(source, &digitState, TRUE);
}
goBackOne(source);
break;
}
} else {
break;
}
}
if (nonZeroValReached == FALSE){
digIndx = 2;
numTempBuf[2] = 6;
}
endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ;
if (digIndx % 2 != 0){
for(i = 2; i < endIndex; i++){
numTempBuf[i] = (((((numTempBuf[i] - 6)/2) % 10) * 10) +
(((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;
}
--digIndx;
}
numTempBuf[endIndex-1] -= 1;
numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));
CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | UCOL_BYTE_COMMON; i = 2; while(i < endIndex)
{
uint32_t primWeight = numTempBuf[i++] << 8;
if ( i < endIndex)
primWeight |= numTempBuf[i++];
*(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
}
} else {
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE);
CE = *CEOffset++;
break;
}
return CE;
}
case IMPLICIT_TAG:
return getImplicit(cp, source);
case CJK_IMPLICIT_TAG:
return getImplicit(cp, source);
case HANGUL_SYLLABLE_TAG:
{
static const uint32_t
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
static const uint32_t VCount = 21;
static const uint32_t TCount = 28;
uint32_t L = ch - SBase;
uint32_t T = L % TCount; L /= TCount;
uint32_t V = L % VCount;
L /= VCount;
L += LBase;
V += VBase;
T += TBase;
if (!source->coll->image->jamoSpecial) {
*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
if (T != TBase) {
*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
}
return UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
} else { if(source->iterator != NULL && source->flags & UCOL_ITER_INNORMBUF) {
source->flags = source->origFlags; source->pos = NULL;
}
source->writableBuffer[0] = (UChar)L;
source->writableBuffer[1] = (UChar)V;
if (T != TBase) {
source->writableBuffer[2] = (UChar)T;
source->writableBuffer[3] = 0;
} else {
source->writableBuffer[2] = 0;
}
source->fcdPosition = source->pos; source->pos = source->writableBuffer;
source->origFlags = source->flags;
source->flags |= UCOL_ITER_INNORMBUF;
source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
return(UCOL_IGNORABLE);
}
}
case SURROGATE_TAG:
{
UChar trail;
collIterateState state;
backupState(source, &state);
if (collIter_eos(source) || !(U16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
loadState(source, &state, TRUE);
return 0;
} else {
CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFFFF, trail);
if(CE == UCOL_NOT_FOUND) { loadState(source, &state, TRUE);
return CE;
}
cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
}
}
break;
case LEAD_SURROGATE_TAG:
UChar nextChar;
if( source->flags & UCOL_USE_ITERATOR) {
if(U_IS_TRAIL(nextChar = (UChar)source->iterator->current(source->iterator))) {
cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
source->iterator->next(source->iterator);
return getImplicit(cp, source);
} else {
return 0;
}
} else if((((source->flags & UCOL_ITER_HASLEN) == 0 ) || (source->pos<source->endp)) &&
U_IS_TRAIL((nextChar=*source->pos))) {
cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
source->pos++;
return getImplicit(cp, source);
} else {
return 0;
}
case TRAIL_SURROGATE_TAG:
return 0;
case CHARSET_TAG:
return UCOL_NOT_FOUND;
default:
*status = U_INTERNAL_PROGRAM_ERROR;
CE=0;
break;
}
if (CE <= UCOL_NOT_FOUND) break;
}
return CE;
}
static
inline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) {
if(isNonChar(cp)) {
return 0;
}
uint32_t r = uprv_uca_getImplicitPrimary(cp);
*(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505;
collationSource->toReturn = collationSource->CEpos;
if (collationSource->offsetBuffer == NULL) {
collationSource->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
collationSource->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
collationSource->offsetStore = collationSource->offsetBuffer;
}
if (collationSource->flags & UCOL_ITER_INNORMBUF) {
collationSource->offsetRepeatCount = 1;
} else {
int32_t firstOffset = (int32_t)(collationSource->pos - collationSource->string);
*(collationSource->offsetStore++) = firstOffset;
*(collationSource->offsetStore++) = firstOffset + 1;
collationSource->offsetReturn = collationSource->offsetStore - 1;
*(collationSource->offsetBuffer) = firstOffset;
if (collationSource->offsetReturn == collationSource->offsetBuffer) {
collationSource->offsetStore = collationSource->offsetBuffer;
}
}
return ((r & 0x0000FFFF)<<16) | 0x000000C0;
}
uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
collIterate *source,
UErrorCode *status)
{
const uint32_t *CEOffset = NULL;
UChar *UCharOffset = NULL;
UChar schar;
const UChar *constart = NULL;
uint32_t size;
UChar buffer[UCOL_MAX_BUFFER];
uint32_t *endCEBuffer;
UChar *strbuffer;
int32_t noChars = 0;
int32_t CECount = 0;
for(;;)
{
switch (getCETag(CE))
{
case NOT_FOUND_TAG:
return CE;
case SPEC_PROC_TAG:
{
const UChar *UCharOffset;
UChar schar, tchar;
collIterateState prefixState;
backupState(source, &prefixState);
for(;;) {
const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
if (collIter_bos(source)) {
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
break;
}
schar = getPrevNormalizedChar(source, status);
goBackOne(source);
while(schar > (tchar = *UCharOffset)) {
UCharOffset++;
}
if (schar == tchar) {
CE = *(coll->contractionCEs +
(UCharOffset - coll->contractionIndex));
}
else
{
uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
if(isZeroCE == 0) {
continue;
} else if(U16_IS_TRAIL(schar) || U16_IS_LEAD(schar)) {
if (!collIter_bos(source)) {
UChar lead;
if(U16_IS_LEAD(lead = getPrevNormalizedChar(source, status))) {
isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, lead);
if(getCETag(isZeroCE) == SURROGATE_TAG) {
uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar);
if(finalCE == 0) {
goBackOne(source);
continue;
}
}
} else {
continue;
}
} else {
continue;
}
}
CE = *(coll->contractionCEs +
(ContractionStart - coll->contractionIndex));
}
if(!isPrefix(CE)) {
break;
}
}
loadState(source, &prefixState, TRUE);
break;
}
case CONTRACTION_TAG:
schar = peekCharacter(source, 0);
constart = (UChar *)coll->image + getContractOffset(CE);
if (isAtStartPrevIterate(source)
) {
CE = *(coll->contractionCEs +
(constart - coll->contractionIndex));
break;
}
strbuffer = buffer;
UCharOffset = strbuffer + (UCOL_MAX_BUFFER - 1);
*(UCharOffset --) = 0;
noChars = 0;
while (ucol_unsafeCP(schar, coll)) {
*(UCharOffset) = schar;
noChars++;
UCharOffset --;
schar = getPrevNormalizedChar(source, status);
goBackOne(source);
if (UCharOffset + 1 == buffer) {
int32_t newsize = 0;
if(source->pos) { newsize = source->pos - source->string + 1;
} else { newsize = 4 * UCOL_MAX_BUFFER;
}
strbuffer = (UChar *)uprv_malloc(sizeof(UChar) *
(newsize + UCOL_MAX_BUFFER));
if (strbuffer == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return UCOL_NO_MORE_CES;
}
UCharOffset = strbuffer + newsize;
uprv_memcpy(UCharOffset, buffer,
UCOL_MAX_BUFFER * sizeof(UChar));
UCharOffset --;
}
if ((source->pos && (source->pos == source->string ||
((source->flags & UCOL_ITER_INNORMBUF) &&
*(source->pos - 1) == 0 && source->fcdPosition == NULL)))
|| (source->iterator && !source->iterator->hasPrevious(source->iterator))) {
break;
}
}
*(UCharOffset) = schar;
noChars++;
int32_t offsetBias;
#if 0
if (source->offsetReturn != NULL) {
source->offsetStore = source->offsetReturn - noChars;
}
if (source->flags & UCOL_ITER_INNORMBUF) {
if (source->fcdPosition == NULL) {
offsetBias = 0;
} else {
offsetBias = (int32_t)(source->fcdPosition - source->string);
}
} else {
offsetBias = (int32_t)(source->pos - source->string);
}
#else
if (source->flags & UCOL_ITER_INNORMBUF) {
#if 1
offsetBias = -1;
#else
if (source->fcdPosition == NULL) {
offsetBias = 0;
} else {
offsetBias = (int32_t)(source->fcdPosition - source->string);
}
#endif
} else {
offsetBias = (int32_t)(source->pos - source->string);
}
#endif
collIterate temp;
int32_t rawOffset;
IInit_collIterate(coll, UCharOffset, noChars, &temp);
temp.flags &= ~UCOL_ITER_NORM;
rawOffset = temp.pos - temp.string; CE = ucol_IGetNextCE(coll, &temp, status);
if (source->extendCEs) {
endCEBuffer = source->extendCEs + source->extendCEsSize;
CECount = (source->CEpos - source->extendCEs)/sizeof(uint32_t);
} else {
endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE;
CECount = (source->CEpos - source->CEs)/sizeof(uint32_t);
}
if (source->offsetBuffer == NULL) {
source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
source->offsetStore = source->offsetBuffer;
}
while (CE != UCOL_NO_MORE_CES) {
*(source->CEpos ++) = CE;
if (offsetBias >= 0) {
*(source->offsetStore ++) = rawOffset + offsetBias;
}
CECount++;
if (source->CEpos == endCEBuffer) {
if (source->extendCEs == NULL) {
source->extendCEs = (uint32_t *)uprv_malloc(sizeof(uint32_t) *
(source->extendCEsSize =UCOL_EXPAND_CE_BUFFER_SIZE + UCOL_EXPAND_CE_BUFFER_EXTEND_SIZE));
if (source->extendCEs == NULL) {
CECount = -1;
} else {
source->extendCEs = (uint32_t *)uprv_memcpy(source->extendCEs, source->CEs, UCOL_EXPAND_CE_BUFFER_SIZE * sizeof(uint32_t));
}
} else {
uint32_t *tempBufCE = (uint32_t *)uprv_realloc(source->extendCEs,
sizeof(uint32_t) * (source->extendCEsSize += UCOL_EXPAND_CE_BUFFER_EXTEND_SIZE));
if (tempBufCE == NULL) {
CECount = -1;
}
else {
source->extendCEs = tempBufCE;
}
}
if (CECount == -1) {
*status = U_MEMORY_ALLOCATION_ERROR;
source->extendCEsSize = 0;
source->CEpos = source->CEs;
freeHeapWritableBuffer(&temp);
if (strbuffer != buffer) {
uprv_free(strbuffer);
}
return (uint32_t)UCOL_NULLORDER;
}
source->CEpos = source->extendCEs + CECount;
endCEBuffer = source->extendCEs + source->extendCEsSize;
}
if (offsetBias >= 0 && source->offsetStore >= &source->offsetBuffer[source->offsetBufferSize]) {
int32_t storeIX = source->offsetStore - source->offsetBuffer;
int32_t *tob = (int32_t *) uprv_realloc(source->offsetBuffer,
sizeof(int32_t) * (source->offsetBufferSize + UCOL_EXPAND_CE_BUFFER_EXTEND_SIZE));
if (tob != NULL) {
source->offsetBuffer = tob;
source->offsetStore = &source->offsetBuffer[storeIX];
source->offsetBufferSize += UCOL_EXPAND_CE_BUFFER_EXTEND_SIZE;
} else {
*status = U_MEMORY_ALLOCATION_ERROR;
source->CEpos = source->CEs;
freeHeapWritableBuffer(&temp);
if (strbuffer != buffer) {
uprv_free(strbuffer);
}
return (uint32_t) UCOL_NULLORDER;
}
}
rawOffset = temp.pos - temp.string;
CE = ucol_IGetNextCE(coll, &temp, status);
}
if (source->offsetRepeatValue != 0) {
if (CECount > noChars) {
source->offsetRepeatCount += temp.offsetRepeatCount;
} else {
source->offsetReturn -= (noChars - CECount);
}
}
freeHeapWritableBuffer(&temp);
if (strbuffer != buffer) {
uprv_free(strbuffer);
}
if (offsetBias >= 0) {
source->offsetReturn = source->offsetStore - 1;
if (source->offsetReturn == source->offsetBuffer) {
source->offsetStore = source->offsetBuffer;
}
}
source->toReturn = source->CEpos - 1;
if (source->toReturn == source->CEs) {
source->CEpos = source->CEs;
}
return *(source->toReturn);
case LONG_PRIMARY_TAG:
{
*(source->CEpos++) = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
*(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
source->toReturn = source->CEpos - 1;
if (source->offsetBuffer == NULL) {
source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
source->offsetStore = source->offsetBuffer;
}
if (source->flags & UCOL_ITER_INNORMBUF) {
source->offsetRepeatCount = 1;
} else {
int32_t firstOffset = (int32_t)(source->pos - source->string);
*(source->offsetStore++) = firstOffset;
*(source->offsetStore++) = firstOffset + 1;
source->offsetReturn = source->offsetStore - 1;
*(source->offsetBuffer) = firstOffset;
if (source->offsetReturn == source->offsetBuffer) {
source->offsetStore = source->offsetBuffer;
}
}
return *(source->toReturn);
}
case EXPANSION_TAG:
{
int32_t firstOffset = (int32_t)(source->pos - source->string);
if (source->offsetReturn != NULL) {
if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetReturn == source->offsetBuffer) {
source->offsetStore = source->offsetBuffer;
}else {
firstOffset = -1;
}
}
if (source->offsetBuffer == NULL) {
source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
source->offsetStore = source->offsetBuffer;
}
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
size = getExpansionCount(CE);
if (size != 0) {
uint32_t count;
for (count = 0; count < size; count++) {
*(source->CEpos ++) = *CEOffset++;
if (firstOffset >= 0) {
*(source->offsetStore ++) = firstOffset + 1;
}
}
} else {
while (*CEOffset != 0) {
*(source->CEpos ++) = *CEOffset ++;
if (firstOffset >= 0) {
*(source->offsetStore ++) = firstOffset + 1;
}
}
}
if (firstOffset >= 0) {
source->offsetReturn = source->offsetStore - 1;
*(source->offsetBuffer) = firstOffset;
if (source->offsetReturn == source->offsetBuffer) {
source->offsetStore = source->offsetBuffer;
}
} else {
source->offsetRepeatCount += size - 1;
}
source->toReturn = source->CEpos - 1;
if(source->toReturn == source->CEs) {
source->CEpos = source->CEs;
}
return *(source->toReturn);
}
case DIGIT_TAG:
{
uint32_t i;
if (source->coll->numericCollation == UCOL_ON){
uint32_t digIndx = 0;
uint32_t endIndex = 0;
uint32_t leadingZeroIndex = 0;
uint32_t trailingZeroCount = 0;
uint8_t collateVal = 0;
UBool nonZeroValReached = FALSE;
uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2];
uint32_t ceLimit = 0;
UChar initial_ch = ch;
collIterateState initialState = {0,0,0,0,0,0,0,0,0};
backupState(source, &initialState);
for(;;) {
collIterateState state = {0,0,0,0,0,0,0,0,0};
UChar32 char32 = 0;
int32_t digVal = 0;
if (U16_IS_TRAIL (ch)) {
if (!collIter_bos(source)){
UChar lead = getPrevNormalizedChar(source, status);
if(U16_IS_LEAD(lead)) {
char32 = U16_GET_SUPPLEMENTARY(lead,ch);
goBackOne(source);
} else {
char32 = ch;
}
} else {
char32 = ch;
}
} else {
char32 = ch;
}
digVal = u_charDigitValue(char32);
for(;;) {
if (digVal != 0)
nonZeroValReached = TRUE;
if (nonZeroValReached) {
if ((digIndx + trailingZeroCount) % 2 == 1) {
collateVal += (uint8_t)(digVal * 10);
if (collateVal != 0)
leadingZeroIndex = 0;
if ( digIndx < UCOL_MAX_DIGITS_FOR_NUMBER ) {
numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
}
collateVal = 0;
} else {
collateVal = (uint8_t)digVal;
if (collateVal == 0) {
if (!leadingZeroIndex)
leadingZeroIndex = (digIndx/2) + 2;
} else
leadingZeroIndex = 0;
}
++digIndx;
} else
++trailingZeroCount;
if (!collIter_bos(source)) {
ch = getPrevNormalizedChar(source, status);
if (U16_IS_TRAIL(ch)) {
backupState(source, &state);
if (!collIter_bos(source)) {
goBackOne(source);
UChar lead = getPrevNormalizedChar(source, status);
if(U16_IS_LEAD(lead)) {
char32 = U16_GET_SUPPLEMENTARY(lead,ch);
} else {
loadState(source, &state, FALSE);
char32 = ch;
}
}
} else
char32 = ch;
if ((digVal = u_charDigitValue(char32)) == -1 || (ceLimit > 0 && (digIndx + trailingZeroCount) >= ceLimit)) {
if (char32 > 0xFFFF) { loadState(source, &state, FALSE);
}
break;
}
goBackOne(source);
}else
break;
}
if (digIndx + trailingZeroCount <= UCOL_MAX_DIGITS_FOR_NUMBER) {
break;
}
ceLimit = (digIndx + trailingZeroCount) % UCOL_MAX_DIGITS_FOR_NUMBER;
if ( ceLimit == 0 ) {
ceLimit = UCOL_MAX_DIGITS_FOR_NUMBER;
}
ch = initial_ch;
loadState(source, &initialState, FALSE);
digIndx = endIndex = leadingZeroIndex = trailingZeroCount = 0;
collateVal = 0;
nonZeroValReached = FALSE;
}
if (! nonZeroValReached) {
digIndx = 2;
trailingZeroCount = 0;
numTempBuf[2] = 6;
}
if ((digIndx + trailingZeroCount) % 2 != 0) {
numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
digIndx += 1; }
if (trailingZeroCount % 2 != 0) {
digIndx += 1; trailingZeroCount -= 1;
}
endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ;
numTempBuf[2] -= 1;
numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
uint32_t exponent = (digIndx+trailingZeroCount)/2;
if (leadingZeroIndex)
exponent -= ((digIndx/2) + 2 - leadingZeroIndex);
numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F));
*(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | UCOL_BYTE_COMMON; i = endIndex - 1; while(i >= 2) {
uint32_t primWeight = numTempBuf[i--] << 8;
if ( i >= 2)
primWeight |= numTempBuf[i--];
*(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
}
source->toReturn = source->CEpos -1;
return *(source->toReturn);
} else {
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
CE = *(CEOffset++);
break;
}
}
case HANGUL_SYLLABLE_TAG:
{
static const uint32_t
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
static const uint32_t VCount = 21;
static const uint32_t TCount = 28;
uint32_t L = ch - SBase;
uint32_t T = L % TCount;
L /= TCount;
uint32_t V = L % VCount;
L /= VCount;
L += LBase;
V += VBase;
T += TBase;
if (source->offsetBuffer == NULL) {
source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
source->offsetStore = source->offsetBuffer;
}
int32_t firstOffset = (int32_t)(source->pos - source->string);
*(source->offsetStore++) = firstOffset;
if (!source->coll->image->jamoSpecial) {
*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
*(source->offsetStore++) = firstOffset + 1;
if (T != TBase) {
*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
*(source->offsetStore++) = firstOffset + 1;
}
source->toReturn = source->CEpos - 1;
source->offsetReturn = source->offsetStore - 1;
if (source->offsetReturn == source->offsetBuffer) {
source->offsetStore = source->offsetBuffer;
}
return *(source->toReturn);
} else {
UChar *tempbuffer = source->writableBuffer +
(source->writableBufSize - 1);
*(tempbuffer) = 0;
if (T != TBase) {
*(tempbuffer - 1) = (UChar)T;
*(tempbuffer - 2) = (UChar)V;
*(tempbuffer - 3) = (UChar)L;
*(tempbuffer - 4) = 0;
} else {
*(tempbuffer - 1) = (UChar)V;
*(tempbuffer - 2) = (UChar)L;
*(tempbuffer - 3) = 0;
}
if (source->pos == source->string) {
source->fcdPosition = NULL;
} else {
source->fcdPosition = source->pos-1;
}
source->pos = tempbuffer;
source->origFlags = source->flags;
source->flags |= UCOL_ITER_INNORMBUF;
source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
return(UCOL_IGNORABLE);
}
}
case IMPLICIT_TAG:
#if 0
if (source->offsetBuffer == NULL) {
source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
source->offsetStore = source->offsetBuffer;
}
if (source->flags & UCOL_ITER_INNORMBUF) {
source->offsetRepeatCount = 1;
} else {
int32_t firstOffset = (int32_t)(source->pos - source->string);
*(source->offsetStore++) = firstOffset;
*(source->offsetStore++) = firstOffset + 1;
source->offsetReturn = source->offsetStore - 1;
if (source->offsetReturn == source->offsetBuffer) {
source->offsetStore = source->offsetBuffer;
}
}
#endif
return getPrevImplicit(ch, source);
case CJK_IMPLICIT_TAG:
return getPrevImplicit(ch, source);
case SURROGATE_TAG:
return 0;
case LEAD_SURROGATE_TAG:
return 0;
case TRAIL_SURROGATE_TAG:
{
UChar32 cp = 0;
UChar prevChar;
UChar *prev;
if (isAtStartPrevIterate(source)) {
return 0;
}
if (source->pos != source->writableBuffer) {
prev = source->pos - 1;
} else {
prev = source->fcdPosition;
}
prevChar = *prev;
if (U16_IS_LEAD(prevChar)) {
cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
source->pos = prev;
} else {
return 0;
}
return getPrevImplicit(cp, source);
}
case CHARSET_TAG:
return UCOL_NOT_FOUND;
default:
*status = U_INTERNAL_PROGRAM_ERROR;
CE=0;
break;
}
if (CE <= UCOL_NOT_FOUND) {
break;
}
}
return CE;
}
static
uint8_t *reallocateBuffer(uint8_t **secondaries, uint8_t *secStart, uint8_t *second, uint32_t *secSize, uint32_t newSize, UErrorCode *status) {
#ifdef UCOL_DEBUG
fprintf(stderr, ".");
#endif
uint8_t *newStart = NULL;
uint32_t offset = *secondaries-secStart;
if(secStart==second) {
newStart=(uint8_t*)uprv_malloc(newSize);
if(newStart==NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(newStart, secStart, *secondaries-secStart);
} else {
newStart=(uint8_t*)uprv_realloc(secStart, newSize);
if(newStart==NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return secStart;
}
}
*secondaries=newStart+offset;
*secSize=newSize;
return newStart;
}
#define uprv_ucol_reverse_buffer(TYPE, start, end) { \
TYPE tempA; \
while((start)<(end)) { \
tempA = *(start); \
*(start)++ = *(end); \
*(end)-- = tempA; \
} \
}
U_CAPI int32_t U_EXPORT2
ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
const uint8_t *src2, int32_t src2Length,
uint8_t *dest, int32_t destCapacity) {
int32_t destLength;
uint8_t b;
if( src1==NULL || src1Length<-2 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
src2==NULL || src2Length<-2 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
destCapacity<0 || (destCapacity>0 && dest==NULL)
) {
if(dest!=NULL && destCapacity>0) {
*dest=0;
}
return 0;
}
if(src1Length<0) {
src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
}
if(src2Length<0) {
src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
}
destLength=src1Length+src2Length-1;
if(destLength>destCapacity) {
return destLength;
}
while(*src1!=0 && *src2!=0) {
while((b=*src1)>=2) {
++src1;
*dest++=b;
}
*dest++=2;
while((b=*src2)>=2) {
++src2;
*dest++=b;
}
if(*src1==1 && *src2==1) {
++src1;
++src2;
*dest++=1;
}
}
if(*src1!=0) {
src2=src1;
}
uprv_strcpy((char *)dest, (const char *)src2);
return destLength;
}
U_CAPI int32_t U_EXPORT2
ucol_getSortKey(const UCollator *coll,
const UChar *source,
int32_t sourceLength,
uint8_t *result,
int32_t resultLength)
{
UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength));
}
UErrorCode status = U_ZERO_ERROR;
int32_t keySize = 0;
if(source != NULL) {
keySize = coll->sortKeyGen(coll, source, sourceLength, &result, resultLength, FALSE, &status);
}
UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
UTRACE_EXIT_STATUS(status);
return keySize;
}
U_CFUNC int32_t
ucol_getSortKeyWithAllocation(const UCollator *coll,
const UChar *source, int32_t sourceLength,
uint8_t **pResult,
UErrorCode *pErrorCode) {
*pResult = 0;
return coll->sortKeyGen(coll, source, sourceLength, pResult, 0, TRUE, pErrorCode);
}
#define UCOL_FSEC_BUF_SIZE 256
int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t currentSize, UColAttributeValue strength, int32_t len) {
UErrorCode status = U_ZERO_ERROR;
uint8_t compareSec = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
uint8_t compareTer = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
uint8_t compareQuad = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
UBool compareIdent = (strength == UCOL_IDENTICAL);
UBool doCase = (coll->caseLevel == UCOL_ON);
UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0);
uint8_t fSecsBuff[UCOL_FSEC_BUF_SIZE];
uint8_t *fSecs = fSecsBuff;
uint32_t fSecsLen = 0, fSecsMaxLen = UCOL_FSEC_BUF_SIZE;
uint8_t *frenchStartPtr = NULL, *frenchEndPtr = NULL;
uint32_t variableTopValue = coll->variableTopValue;
uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);
if(doHiragana) {
UCOL_COMMON_BOT4++;
}
uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4);
uint32_t order = UCOL_NO_MORE_CES;
uint8_t primary1 = 0;
uint8_t primary2 = 0;
uint8_t secondary = 0;
uint8_t tertiary = 0;
int32_t caseShift = 0;
uint32_t c2 = 0, c3 = 0, c4 = 0;
uint8_t caseSwitch = coll->caseSwitch;
uint8_t tertiaryMask = coll->tertiaryMask;
uint8_t tertiaryCommon = coll->tertiaryCommon;
UBool wasShifted = FALSE;
UBool notIsContinuation = FALSE;
uint8_t leadPrimary = 0;
for(;;) {
order = ucol_IGetNextCE(coll, s, &status);
if(order == UCOL_NO_MORE_CES) {
break;
}
if(order == 0) {
continue;
}
notIsContinuation = !isContinuation(order);
if(notIsContinuation) {
tertiary = (uint8_t)((order & UCOL_BYTE_SIZE_MASK));
} else {
tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
}
secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary1 = (uint8_t)(order >> 8);
if(shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
|| (!notIsContinuation && wasShifted))
|| (wasShifted && primary1 == 0)) {
if(primary1 == 0) {
continue;
}
if(compareQuad == 0) {
if(c4 > 0) {
currentSize += (c2/UCOL_BOT_COUNT4)+1;
c4 = 0;
}
currentSize++;
if(primary2 != 0) {
currentSize++;
}
}
wasShifted = TRUE;
} else {
wasShifted = FALSE;
if(primary1 != UCOL_IGNORABLE) {
if(notIsContinuation) {
if(leadPrimary == primary1) {
currentSize++;
} else {
if(leadPrimary != 0) {
currentSize++;
}
if(primary2 == UCOL_IGNORABLE) {
currentSize++;
leadPrimary = 0;
}
else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
(primary1 > maxRegularPrimary && primary1 < minImplicitPrimary))
{
leadPrimary = 0;
currentSize+=2;
}
else {
leadPrimary = primary1;
currentSize+=2;
}
}
} else {
currentSize++;
if(primary2 != UCOL_IGNORABLE) {
currentSize++;
}
}
}
if(secondary > compareSec) {
if(!isFrenchSec){
if (secondary == UCOL_COMMON2 && notIsContinuation) {
c2++;
} else {
if(c2 > 0) {
if (secondary > UCOL_COMMON2) { currentSize += (c2/(uint32_t)UCOL_TOP_COUNT2)+1;
} else {
currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+1;
}
c2 = 0;
}
currentSize++;
}
} else {
fSecs[fSecsLen++] = secondary;
if(fSecsLen == fSecsMaxLen) {
uint8_t *fSecsTemp;
if(fSecs == fSecsBuff) {
fSecsTemp = (uint8_t *)uprv_malloc(2*fSecsLen);
} else {
fSecsTemp = (uint8_t *)uprv_realloc(fSecs, 2*fSecsLen);
}
if(fSecsTemp == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
fSecs = fSecsTemp;
fSecsMaxLen *= 2;
}
if(notIsContinuation) {
if (frenchStartPtr != NULL) {
uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
frenchStartPtr = NULL;
}
} else {
if (frenchStartPtr == NULL) {
frenchStartPtr = fSecs+fSecsLen-2;
}
frenchEndPtr = fSecs+fSecsLen-1;
}
}
}
if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) {
if (caseShift == 0) {
currentSize++;
caseShift = UCOL_CASE_SHIFT_START;
}
if((tertiary&0x3F) > 0 && notIsContinuation) {
caseShift--;
if((tertiary &0xC0) != 0) {
if (caseShift == 0) {
currentSize++;
caseShift = UCOL_CASE_SHIFT_START;
}
caseShift--;
}
}
} else {
if(notIsContinuation) {
tertiary ^= caseSwitch;
}
}
tertiary &= tertiaryMask;
if(tertiary > compareTer) {
if (tertiary == tertiaryCommon && notIsContinuation) {
c3++;
} else {
if(c3 > 0) {
if((tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL)
|| (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST)) {
currentSize += (c3/(uint32_t)coll->tertiaryTopCount)+1;
} else {
currentSize += (c3/(uint32_t)coll->tertiaryBottomCount)+1;
}
c3 = 0;
}
currentSize++;
}
}
if((compareQuad==0) && notIsContinuation) {
if(s->flags & UCOL_WAS_HIRAGANA) { if(c4>0) { currentSize += (c4/UCOL_BOT_COUNT4)+1;
c4 = 0;
}
currentSize++; } else { c4++;
}
}
}
}
if(!isFrenchSec){
if(c2 > 0) {
currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BOT_COUNT2 != 0)?1:0);
}
} else {
uint32_t i = 0;
if(frenchStartPtr != NULL) {
uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
}
for(i = 0; i<fSecsLen; i++) {
secondary = *(fSecs+fSecsLen-i-1);
if (secondary == UCOL_COMMON2) {
++c2;
} else {
if(c2 > 0) {
if (secondary > UCOL_COMMON2) { currentSize += (c2/(uint32_t)UCOL_TOP_COUNT2)+((c2%(uint32_t)UCOL_TOP_COUNT2 != 0)?1:0);
} else {
currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BOT_COUNT2 != 0)?1:0);
}
c2 = 0;
}
currentSize++;
}
}
if(c2 > 0) {
currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BOT_COUNT2 != 0)?1:0);
}
if(fSecs != fSecsBuff) {
uprv_free(fSecs);
}
}
if(c3 > 0) {
currentSize += (c3/(uint32_t)coll->tertiaryBottomCount) + ((c3%(uint32_t)coll->tertiaryBottomCount != 0)?1:0);
}
if(c4 > 0 && compareQuad == 0) {
currentSize += (c4/(uint32_t)UCOL_BOT_COUNT4)+((c4%(uint32_t)UCOL_BOT_COUNT4 != 0)?1:0);
}
if(compareIdent) {
currentSize += u_lengthOfIdenticalLevelRun(s->string, len);
}
return currentSize;
}
static
inline void doCaseShift(uint8_t **cases, uint32_t &caseShift) {
if (caseShift == 0) {
*(*cases)++ = UCOL_CASE_BYTE_START;
caseShift = UCOL_CASE_SHIFT_START;
}
}
static
inline void addWithIncrement(uint8_t *&primaries, uint8_t *limit, uint32_t &size, const uint8_t value) {
size++;
if(primaries < limit) {
*(primaries)++ = value;
}
}
static
inline uint8_t *packFrench(uint8_t *primaries, uint8_t *primEnd, uint8_t *secondaries, uint32_t *secsize, uint8_t *frenchStartPtr, uint8_t *frenchEndPtr) {
uint8_t secondary;
int32_t count2 = 0;
uint32_t i = 0, size = 0;
addWithIncrement(primaries, primEnd, i, UCOL_LEVELTERMINATOR);
if(frenchStartPtr != NULL) {
uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
}
for(i = 0; i<*secsize; i++) {
secondary = *(secondaries-i-1);
if (secondary == UCOL_COMMON2) {
++count2;
} else {
if (count2 > 0) {
if (secondary > UCOL_COMMON2) { while (count2 > UCOL_TOP_COUNT2) {
addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
count2 -= (uint32_t)UCOL_TOP_COUNT2;
}
addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
} else {
while (count2 > UCOL_BOT_COUNT2) {
addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
count2 -= (uint32_t)UCOL_BOT_COUNT2;
}
addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
}
count2 = 0;
}
addWithIncrement(primaries, primEnd, size, secondary);
}
}
if (count2 > 0) {
while (count2 > UCOL_BOT_COUNT2) {
addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
count2 -= (uint32_t)UCOL_BOT_COUNT2;
}
addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
}
*secsize = size;
return primaries;
}
#define DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY 0
U_CFUNC int32_t U_CALLCONV
ucol_calcSortKey(const UCollator *coll,
const UChar *source,
int32_t sourceLength,
uint8_t **result,
uint32_t resultLength,
UBool allocateSKBuffer,
UErrorCode *status)
{
uint32_t i = 0;
uint8_t prim[UCOL_PRIMARY_MAX_BUFFER], second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER], caseB[UCOL_CASE_MAX_BUFFER], quad[UCOL_QUAD_MAX_BUFFER];
uint8_t *primaries = *result, *secondaries = second, *tertiaries = tert, *cases = caseB, *quads = quad;
if(U_FAILURE(*status)) {
return 0;
}
if(primaries == NULL && allocateSKBuffer == TRUE) {
primaries = *result = prim;
resultLength = UCOL_PRIMARY_MAX_BUFFER;
}
uint32_t secSize = UCOL_SECONDARY_MAX_BUFFER, terSize = UCOL_TERTIARY_MAX_BUFFER,
caseSize = UCOL_CASE_MAX_BUFFER, quadSize = UCOL_QUAD_MAX_BUFFER;
uint32_t sortKeySize = 1;
UChar normBuffer[UCOL_NORMALIZATION_MAX_BUFFER];
UChar *normSource = normBuffer;
int32_t normSourceLen = UCOL_NORMALIZATION_MAX_BUFFER;
int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength);
UColAttributeValue strength = coll->strength;
uint8_t compareSec = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
uint8_t compareTer = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
uint8_t compareQuad = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
UBool compareIdent = (strength == UCOL_IDENTICAL);
UBool doCase = (coll->caseLevel == UCOL_ON);
UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0);
UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
uint32_t variableTopValue = coll->variableTopValue;
uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);
uint8_t UCOL_HIRAGANA_QUAD = 0;
if(doHiragana) {
UCOL_HIRAGANA_QUAD=UCOL_COMMON_BOT4++;
}
uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4);
uint8_t *frenchStartPtr = NULL;
uint8_t *frenchEndPtr = NULL;
uint32_t caseShift = 0;
sortKeySize += ((compareSec?0:1) + (compareTer?0:1) + (doCase?1:0) + (compareQuad?0:1) + (compareIdent?1:0));
UNormalizationMode normMode;
if(compareIdent) {
normMode = UNORM_NFD;
} else if(coll->normalizationMode != UCOL_OFF) {
normMode = UNORM_FCD;
} else {
normMode = UNORM_NONE;
}
if(normMode != UNORM_NONE && UNORM_YES != unorm_quickCheck(source, len, normMode, status)) {
len = unorm_internalNormalize(normSource, normSourceLen,
source, len,
normMode, FALSE,
status);
if(*status == U_BUFFER_OVERFLOW_ERROR) {
normSourceLen = len;
normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR);
if(normSource == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
*status = U_ZERO_ERROR;
len = unorm_internalNormalize(normSource, normSourceLen,
source, len,
normMode, FALSE,
status);
}
if(U_FAILURE(*status)) {
return 0;
}
source = normSource;
}
collIterate s;
IInit_collIterate(coll, (UChar *)source, len, &s);
if(source == normSource) {
s.flags &= ~UCOL_ITER_NORM;
}
if(resultLength == 0 || primaries == NULL) {
int32_t keyLen = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len);
if(normSource != normBuffer) {
uprv_free(normSource);
}
return keyLen;
}
uint8_t *primarySafeEnd = primaries + resultLength - 1;
if(strength > UCOL_PRIMARY) {
primarySafeEnd--;
}
uint32_t minBufferSize = UCOL_MAX_BUFFER;
uint8_t *primStart = primaries;
uint8_t *secStart = secondaries;
uint8_t *terStart = tertiaries;
uint8_t *caseStart = cases;
uint8_t *quadStart = quads;
uint32_t order = 0;
uint8_t primary1 = 0;
uint8_t primary2 = 0;
uint8_t secondary = 0;
uint8_t tertiary = 0;
uint8_t caseSwitch = coll->caseSwitch;
uint8_t tertiaryMask = coll->tertiaryMask;
int8_t tertiaryAddition = coll->tertiaryAddition;
uint8_t tertiaryTop = coll->tertiaryTop;
uint8_t tertiaryBottom = coll->tertiaryBottom;
uint8_t tertiaryCommon = coll->tertiaryCommon;
uint8_t caseBits = 0;
UBool finished = FALSE;
UBool wasShifted = FALSE;
UBool notIsContinuation = FALSE;
uint32_t prevBuffSize = 0;
uint32_t count2 = 0, count3 = 0, count4 = 0;
uint8_t leadPrimary = 0;
for(;;) {
for(i=prevBuffSize; i<minBufferSize; ++i) {
order = ucol_IGetNextCE(coll, &s, status);
if(order == UCOL_NO_MORE_CES) {
finished = TRUE;
break;
}
if(order == 0) {
continue;
}
notIsContinuation = !isContinuation(order);
if(notIsContinuation) {
tertiary = (uint8_t)(order & UCOL_BYTE_SIZE_MASK);
} else {
tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
}
secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary1 = (uint8_t)(order >> 8);
if(shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
|| (!notIsContinuation && wasShifted))
|| (wasShifted && primary1 == 0))
{
if(primary1 == 0) {
continue;
}
if(compareQuad == 0) {
if(count4 > 0) {
while (count4 > UCOL_BOT_COUNT4) {
*quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
count4 -= UCOL_BOT_COUNT4;
}
*quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1));
count4 = 0;
}
if(primary1 != 0) {
*quads++ = primary1;
}
if(primary2 != 0) {
*quads++ = primary2;
}
}
wasShifted = TRUE;
} else {
wasShifted = FALSE;
if(primary1 != UCOL_IGNORABLE) {
if(notIsContinuation) {
if(leadPrimary == primary1) {
*primaries++ = primary2;
} else {
if(leadPrimary != 0) {
*primaries++ = (uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);
}
if(primary2 == UCOL_IGNORABLE) {
*primaries++ = primary1;
leadPrimary = 0;
} else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
(primary1 > maxRegularPrimary && primary1 < minImplicitPrimary)) {
leadPrimary = 0;
*primaries++ = primary1;
if(primaries <= primarySafeEnd) {
*primaries++ = primary2;
}
} else {
*primaries++ = leadPrimary = primary1;
if(primaries <= primarySafeEnd) {
*primaries++ = primary2;
}
}
}
} else {
*primaries++ = primary1;
if((primary2 != UCOL_IGNORABLE) && (primaries <= primarySafeEnd)) {
*primaries++ = primary2;
}
}
}
if(secondary > compareSec) {
if(!isFrenchSec) {
if (secondary == UCOL_COMMON2 && notIsContinuation) {
++count2;
} else {
if (count2 > 0) {
if (secondary > UCOL_COMMON2) { while (count2 > UCOL_TOP_COUNT2) {
*secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
count2 -= (uint32_t)UCOL_TOP_COUNT2;
}
*secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - (count2-1));
} else {
while (count2 > UCOL_BOT_COUNT2) {
*secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
count2 -= (uint32_t)UCOL_BOT_COUNT2;
}
*secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));
}
count2 = 0;
}
*secondaries++ = secondary;
}
} else {
*secondaries++ = secondary;
if(notIsContinuation) {
if (frenchStartPtr != NULL) {
uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
frenchStartPtr = NULL;
}
} else {
if (frenchStartPtr == NULL) {
frenchStartPtr = secondaries - 2;
}
frenchEndPtr = secondaries-1;
}
}
}
if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) {
doCaseShift(&cases, caseShift);
if(notIsContinuation) {
caseBits = (uint8_t)(tertiary & 0xC0);
if(tertiary != 0) {
if(coll->caseFirst == UCOL_UPPER_FIRST) {
if((caseBits & 0xC0) == 0) {
*(cases-1) |= 1 << (--caseShift);
} else {
*(cases-1) |= 0 << (--caseShift);
doCaseShift(&cases, caseShift);
*(cases-1) |= ((caseBits>>6)&1) << (--caseShift);
}
} else {
if((caseBits & 0xC0) == 0) {
*(cases-1) |= 0 << (--caseShift);
} else {
*(cases-1) |= 1 << (--caseShift);
doCaseShift(&cases, caseShift);
*(cases-1) |= ((caseBits>>7)&1) << (--caseShift);
}
}
}
}
} else {
if(notIsContinuation) {
tertiary ^= caseSwitch;
}
}
tertiary &= tertiaryMask;
if(tertiary > compareTer) {
if (tertiary == tertiaryCommon && notIsContinuation) {
++count3;
} else {
if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
tertiary += tertiaryAddition;
} else if(tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
tertiary -= tertiaryAddition;
}
if (count3 > 0) {
if ((tertiary > tertiaryCommon)) {
while (count3 > coll->tertiaryTopCount) {
*tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount);
count3 -= (uint32_t)coll->tertiaryTopCount;
}
*tertiaries++ = (uint8_t)(tertiaryTop - (count3-1));
} else {
while (count3 > coll->tertiaryBottomCount) {
*tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount);
count3 -= (uint32_t)coll->tertiaryBottomCount;
}
*tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));
}
count3 = 0;
}
*tertiaries++ = tertiary;
}
}
if((compareQuad==0) && notIsContinuation) {
if(s.flags & UCOL_WAS_HIRAGANA) { if(count4>0) { while (count4 > UCOL_BOT_COUNT4) {
*quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
count4 -= UCOL_BOT_COUNT4;
}
*quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1));
count4 = 0;
}
*quads++ = UCOL_HIRAGANA_QUAD; } else { count4++;
}
}
}
if(primaries > primarySafeEnd) {
if(allocateSKBuffer == FALSE) {
IInit_collIterate(coll, (UChar *)source, len, &s);
if(source == normSource) {
s.flags &= ~UCOL_ITER_NORM;
}
sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len);
*status = U_BUFFER_OVERFLOW_ERROR;
finished = TRUE;
break;
} else {
int32_t sks = sortKeySize+(primaries - primStart)+(secondaries - secStart)+(tertiaries - terStart)+(cases-caseStart)+(quads-quadStart);
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sks, status);
if(U_SUCCESS(*status)) {
*result = primStart;
primarySafeEnd = primStart + resultLength - 1;
if(strength > UCOL_PRIMARY) {
primarySafeEnd--;
}
} else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
finished = TRUE;
break;
}
}
}
}
if(finished) {
break;
} else {
prevBuffSize = minBufferSize;
uint32_t frenchStartOffset = 0, frenchEndOffset = 0;
if (frenchStartPtr != NULL) {
frenchStartOffset = frenchStartPtr - secStart;
frenchEndOffset = frenchEndPtr - secStart;
}
secStart = reallocateBuffer(&secondaries, secStart, second, &secSize, 2*secSize, status);
terStart = reallocateBuffer(&tertiaries, terStart, tert, &terSize, 2*terSize, status);
caseStart = reallocateBuffer(&cases, caseStart, caseB, &caseSize, 2*caseSize, status);
quadStart = reallocateBuffer(&quads, quadStart, quad, &quadSize, 2*quadSize, status);
if(U_FAILURE(*status)) {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
break;
}
if (frenchStartPtr != NULL) {
frenchStartPtr = secStart + frenchStartOffset;
frenchEndPtr = secStart + frenchEndOffset;
}
minBufferSize *= 2;
}
}
if(U_SUCCESS(*status)) {
sortKeySize += (primaries - primStart);
if(compareSec == 0) {
if (count2 > 0) {
while (count2 > UCOL_BOT_COUNT2) {
*secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
count2 -= (uint32_t)UCOL_BOT_COUNT2;
}
*secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));
}
uint32_t secsize = secondaries-secStart;
if(!isFrenchSec) { sortKeySize += secsize;
if(sortKeySize <= resultLength) {
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, secStart, secsize);
primaries += secsize;
} else {
if(allocateSKBuffer == TRUE) {
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
if(U_SUCCESS(*status)) {
*result = primStart;
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, secStart, secsize);
primaries += secsize;
}
else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
goto cleanup;
}
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
}
} else { uint8_t *newPrim = packFrench(primaries, primStart+resultLength, secondaries, &secsize, frenchStartPtr, frenchEndPtr);
sortKeySize += secsize;
if(sortKeySize <= resultLength) { primaries = newPrim; } else { if(allocateSKBuffer == TRUE) {
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
if(U_SUCCESS(*status)) {
primaries = packFrench(primaries, primStart+resultLength, secondaries, &secsize, frenchStartPtr, frenchEndPtr);
}
else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
goto cleanup;
}
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
}
}
}
if(doCase) {
uint32_t casesize = cases - caseStart;
sortKeySize += casesize;
if(sortKeySize <= resultLength) {
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, caseStart, casesize);
primaries += casesize;
} else {
if(allocateSKBuffer == TRUE) {
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
if(U_SUCCESS(*status)) {
*result = primStart;
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, caseStart, casesize);
}
else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
goto cleanup;
}
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
}
}
if(compareTer == 0) {
if (count3 > 0) {
if (coll->tertiaryCommon != UCOL_COMMON_BOT3) {
while (count3 >= coll->tertiaryTopCount) {
*tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount);
count3 -= (uint32_t)coll->tertiaryTopCount;
}
*tertiaries++ = (uint8_t)(tertiaryTop - count3);
} else {
while (count3 > coll->tertiaryBottomCount) {
*tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount);
count3 -= (uint32_t)coll->tertiaryBottomCount;
}
*tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));
}
}
uint32_t tersize = tertiaries - terStart;
sortKeySize += tersize;
if(sortKeySize <= resultLength) {
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, terStart, tersize);
primaries += tersize;
} else {
if(allocateSKBuffer == TRUE) {
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
if(U_SUCCESS(*status)) {
*result = primStart;
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, terStart, tersize);
}
else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
goto cleanup;
}
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
}
if(compareQuad == 0) {
if(count4 > 0) {
while (count4 > UCOL_BOT_COUNT4) {
*quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
count4 -= UCOL_BOT_COUNT4;
}
*quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1));
}
uint32_t quadsize = quads - quadStart;
sortKeySize += quadsize;
if(sortKeySize <= resultLength) {
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, quadStart, quadsize);
primaries += quadsize;
} else {
if(allocateSKBuffer == TRUE) {
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
if(U_SUCCESS(*status)) {
*result = primStart;
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, quadStart, quadsize);
}
else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
goto cleanup;
}
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
}
}
if(compareIdent) {
sortKeySize += u_lengthOfIdenticalLevelRun(s.string, len);
if(sortKeySize <= resultLength) {
*(primaries++) = UCOL_LEVELTERMINATOR;
primaries += u_writeIdenticalLevelRun(s.string, len, primaries);
} else {
if(allocateSKBuffer == TRUE) {
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, sortKeySize, status);
if(U_SUCCESS(*status)) {
*result = primStart;
*(primaries++) = UCOL_LEVELTERMINATOR;
u_writeIdenticalLevelRun(s.string, len, primaries);
}
else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
goto cleanup;
}
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
}
}
}
*(primaries++) = '\0';
}
if(allocateSKBuffer == TRUE) {
*result = (uint8_t*)uprv_malloc(sortKeySize);
if (*result == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
goto cleanup;
}
uprv_memcpy(*result, primStart, sortKeySize);
if(primStart != prim) {
uprv_free(primStart);
}
}
cleanup:
if (allocateSKBuffer == FALSE && resultLength > 0 && U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) {
**result = 0;
}
if(terStart != tert) {
uprv_free(terStart);
uprv_free(secStart);
uprv_free(caseStart);
uprv_free(quadStart);
}
freeOffsetBuffer(&s);
if(normSource != normBuffer) {
uprv_free(normSource);
}
return sortKeySize;
}
U_CFUNC int32_t U_CALLCONV
ucol_calcSortKeySimpleTertiary(const UCollator *coll,
const UChar *source,
int32_t sourceLength,
uint8_t **result,
uint32_t resultLength,
UBool allocateSKBuffer,
UErrorCode *status)
{
U_ALIGN_CODE(16);
uint32_t i = 0;
uint8_t prim[UCOL_PRIMARY_MAX_BUFFER], second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER];
uint8_t *primaries = *result, *secondaries = second, *tertiaries = tert;
if(U_FAILURE(*status)) {
return 0;
}
if(primaries == NULL && allocateSKBuffer == TRUE) {
primaries = *result = prim;
resultLength = UCOL_PRIMARY_MAX_BUFFER;
}
uint32_t secSize = UCOL_SECONDARY_MAX_BUFFER, terSize = UCOL_TERTIARY_MAX_BUFFER;
uint32_t sortKeySize = 3;
UChar normBuffer[UCOL_NORMALIZATION_MAX_BUFFER];
UChar *normSource = normBuffer;
int32_t normSourceLen = UCOL_NORMALIZATION_MAX_BUFFER;
int32_t len = sourceLength;
if(coll->normalizationMode != UCOL_OFF && UNORM_YES != unorm_quickCheck(source, len, UNORM_FCD, status)) {
len = unorm_internalNormalize(normSource, normSourceLen,
source, len,
UNORM_FCD, FALSE,
status);
if(*status == U_BUFFER_OVERFLOW_ERROR) {
normSourceLen = len;
normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR);
if(normSource == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
*status = U_ZERO_ERROR;
len = unorm_internalNormalize(normSource, normSourceLen,
source, len,
UNORM_FCD, FALSE,
status);
if(U_FAILURE(*status)) {
uprv_free(normSource);
normSource = normBuffer;
}
}
if(U_FAILURE(*status)) {
return 0;
}
source = normSource;
}
collIterate s;
IInit_collIterate(coll, (UChar *)source, len, &s);
if(source == normSource) {
s.flags &= ~UCOL_ITER_NORM;
}
if(resultLength == 0 || primaries == NULL) {
int32_t t = ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len);
if(normSource != normBuffer) {
uprv_free(normSource);
}
return t;
}
uint8_t *primarySafeEnd = primaries + resultLength - 2;
uint32_t minBufferSize = UCOL_MAX_BUFFER;
uint8_t *primStart = primaries;
uint8_t *secStart = secondaries;
uint8_t *terStart = tertiaries;
uint32_t order = 0;
uint8_t primary1 = 0;
uint8_t primary2 = 0;
uint8_t secondary = 0;
uint8_t tertiary = 0;
uint8_t caseSwitch = coll->caseSwitch;
uint8_t tertiaryMask = coll->tertiaryMask;
int8_t tertiaryAddition = coll->tertiaryAddition;
uint8_t tertiaryTop = coll->tertiaryTop;
uint8_t tertiaryBottom = coll->tertiaryBottom;
uint8_t tertiaryCommon = coll->tertiaryCommon;
uint32_t prevBuffSize = 0;
UBool finished = FALSE;
UBool notIsContinuation = FALSE;
uint32_t count2 = 0, count3 = 0;
uint8_t leadPrimary = 0;
for(;;) {
for(i=prevBuffSize; i<minBufferSize; ++i) {
order = ucol_IGetNextCE(coll, &s, status);
if(order == 0) {
continue;
}
if(order == UCOL_NO_MORE_CES) {
finished = TRUE;
break;
}
notIsContinuation = !isContinuation(order);
if(notIsContinuation) {
tertiary = (uint8_t)((order & tertiaryMask));
} else {
tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
}
secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
primary1 = (uint8_t)(order >> 8);
if(primary1 != UCOL_IGNORABLE) {
if(notIsContinuation) {
if(leadPrimary == primary1) {
*primaries++ = primary2;
} else {
if(leadPrimary != 0) {
*primaries++ = (uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);
}
if(primary2 == UCOL_IGNORABLE) {
*primaries++ = primary1;
leadPrimary = 0;
} else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
(primary1 > maxRegularPrimary && primary1 < minImplicitPrimary)) {
leadPrimary = 0;
*primaries++ = primary1;
*primaries++ = primary2;
} else {
*primaries++ = leadPrimary = primary1;
*primaries++ = primary2;
}
}
} else {
*primaries++ = primary1;
if(primary2 != UCOL_IGNORABLE) {
*primaries++ = primary2;
}
}
}
if(secondary > 0) {
if (secondary == UCOL_COMMON2 && notIsContinuation) {
++count2;
} else {
if (count2 > 0) {
if (secondary > UCOL_COMMON2) { while (count2 > UCOL_TOP_COUNT2) {
*secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
count2 -= (uint32_t)UCOL_TOP_COUNT2;
}
*secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - (count2-1));
} else {
while (count2 > UCOL_BOT_COUNT2) {
*secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
count2 -= (uint32_t)UCOL_BOT_COUNT2;
}
*secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));
}
count2 = 0;
}
*secondaries++ = secondary;
}
}
if(notIsContinuation) {
tertiary ^= caseSwitch;
}
if(tertiary > 0) {
if (tertiary == tertiaryCommon && notIsContinuation) {
++count3;
} else {
if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
tertiary += tertiaryAddition;
} else if (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
tertiary -= tertiaryAddition;
}
if (count3 > 0) {
if ((tertiary > tertiaryCommon)) {
while (count3 > coll->tertiaryTopCount) {
*tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount);
count3 -= (uint32_t)coll->tertiaryTopCount;
}
*tertiaries++ = (uint8_t)(tertiaryTop - (count3-1));
} else {
while (count3 > coll->tertiaryBottomCount) {
*tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount);
count3 -= (uint32_t)coll->tertiaryBottomCount;
}
*tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));
}
count3 = 0;
}
*tertiaries++ = tertiary;
}
}
if(primaries > primarySafeEnd) {
if(allocateSKBuffer == FALSE) {
IInit_collIterate(coll, (UChar *)source, len, &s);
if(source == normSource) {
s.flags &= ~UCOL_ITER_NORM;
}
sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len);
*status = U_BUFFER_OVERFLOW_ERROR;
finished = TRUE;
break;
} else {
int32_t sks = sortKeySize+(primaries - primStart)+(secondaries - secStart)+(tertiaries - terStart);
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sks, status);
if(U_SUCCESS(*status)) {
*result = primStart;
primarySafeEnd = primStart + resultLength - 2;
} else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
finished = TRUE;
break;
}
}
}
}
if(finished) {
break;
} else {
prevBuffSize = minBufferSize;
secStart = reallocateBuffer(&secondaries, secStart, second, &secSize, 2*secSize, status);
terStart = reallocateBuffer(&tertiaries, terStart, tert, &terSize, 2*terSize, status);
minBufferSize *= 2;
if(U_FAILURE(*status)) {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
break;
}
}
}
if(U_SUCCESS(*status)) {
sortKeySize += (primaries - primStart);
if (count2 > 0) {
while (count2 > UCOL_BOT_COUNT2) {
*secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
count2 -= (uint32_t)UCOL_BOT_COUNT2;
}
*secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));
}
uint32_t secsize = secondaries-secStart;
sortKeySize += secsize;
if(sortKeySize <= resultLength) {
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, secStart, secsize);
primaries += secsize;
} else {
if(allocateSKBuffer == TRUE) {
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
if(U_SUCCESS(*status)) {
*(primaries++) = UCOL_LEVELTERMINATOR;
*result = primStart;
uprv_memcpy(primaries, secStart, secsize);
}
else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
goto cleanup;
}
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
}
if (count3 > 0) {
if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) {
while (count3 >= coll->tertiaryTopCount) {
*tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCount);
count3 -= (uint32_t)coll->tertiaryTopCount;
}
*tertiaries++ = (uint8_t)(tertiaryTop - count3);
} else {
while (count3 > coll->tertiaryBottomCount) {
*tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBottomCount);
count3 -= (uint32_t)coll->tertiaryBottomCount;
}
*tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));
}
}
uint32_t tersize = tertiaries - terStart;
sortKeySize += tersize;
if(sortKeySize <= resultLength) {
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, terStart, tersize);
primaries += tersize;
} else {
if(allocateSKBuffer == TRUE) {
primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sortKeySize, status);
if(U_SUCCESS(*status)) {
*result = primStart;
*(primaries++) = UCOL_LEVELTERMINATOR;
uprv_memcpy(primaries, terStart, tersize);
}
else {
sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;
goto cleanup;
}
} else {
*status = U_MEMORY_ALLOCATION_ERROR;
}
}
*(primaries++) = '\0';
}
if(allocateSKBuffer == TRUE) {
*result = (uint8_t*)uprv_malloc(sortKeySize);
if (*result == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
goto cleanup;
}
uprv_memcpy(*result, primStart, sortKeySize);
if(primStart != prim) {
uprv_free(primStart);
}
}
cleanup:
if (allocateSKBuffer == FALSE && resultLength > 0 && U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) {
**result = 0;
}
if(terStart != tert) {
uprv_free(terStart);
uprv_free(secStart);
}
freeOffsetBuffer(&s);
if(normSource != normBuffer) {
uprv_free(normSource);
}
return sortKeySize;
}
static inline
UBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) {
UBool notIsContinuation = !isContinuation(CE);
uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF);
if(LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0)
|| (!notIsContinuation && *wasShifted))
|| (*wasShifted && primary1 == 0))
{
if(primary1 != 0) {
*wasShifted = TRUE;
}
return TRUE;
} else {
*wasShifted = FALSE;
return FALSE;
}
}
static inline
void terminatePSKLevel(int32_t level, int32_t maxLevel, int32_t &i, uint8_t *dest) {
if(level < maxLevel) {
dest[i++] = UCOL_LEVELTERMINATOR;
} else {
dest[i++] = 0;
}
}
enum {
UCOL_PSK_PRIMARY = 0,
UCOL_PSK_SECONDARY = 1,
UCOL_PSK_CASE = 2,
UCOL_PSK_TERTIARY = 3,
UCOL_PSK_QUATERNARY = 4,
UCOL_PSK_QUIN = 5,
UCOL_PSK_IDENTICAL = 6,
UCOL_PSK_NULL = 7,
UCOL_PSK_LIMIT
};
enum {
UCOL_PSK_LEVEL_SHIFT = 0,
UCOL_PSK_LEVEL_MASK = 7,
UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3,
UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,
UCOL_PSK_WAS_SHIFTED_SHIFT = 4,
UCOL_PSK_WAS_SHIFTED_MASK = 1,
UCOL_PSK_USED_FRENCH_SHIFT = 5,
UCOL_PSK_USED_FRENCH_MASK = 3,
UCOL_PSK_BOCSU_BYTES_SHIFT = 7,
UCOL_PSK_BOCSU_BYTES_MASK = 3,
UCOL_PSK_CONSUMED_CES_SHIFT = 9,
UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF
};
#define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn
U_CAPI int32_t U_EXPORT2
ucol_nextSortKeyPart(const UCollator *coll,
UCharIterator *iter,
uint32_t state[2],
uint8_t *dest, int32_t count,
UErrorCode *status)
{
if(status==NULL || U_FAILURE(*status)) {
return 0;
}
UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
if( coll==NULL || iter==NULL ||
state==NULL ||
count<0 || (count>0 && dest==NULL)
) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
UTRACE_EXIT_STATUS(status);
return 0;
}
UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
coll, iter, state[0], state[1], dest, count);
if(count==0) {
UTRACE_EXIT_VALUE(0);
return 0;
}
uint32_t iterState = state[0];
UBool wasShifted = ((state[1] >> UCOL_PSK_WAS_SHIFTED_SHIFT) & UCOL_PSK_WAS_SHIFTED_MASK)?TRUE:FALSE;
int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK;
int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK;
int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USED_FRENCH_MASK;
int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK_BOCSU_BYTES_MASK;
int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED_CES_MASK;
int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status);
int32_t maxLevel = 0;
if(strength < UCOL_TERTIARY) {
if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
maxLevel = UCOL_PSK_CASE;
} else {
maxLevel = strength;
}
} else {
if(strength == UCOL_TERTIARY) {
maxLevel = UCOL_PSK_TERTIARY;
} else if(strength == UCOL_QUATERNARY) {
maxLevel = UCOL_PSK_QUATERNARY;
} else { maxLevel = UCOL_IDENTICAL;
}
}
uint8_t UCOL_HIRAGANA_QUAD =
(ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON)?0xFE:0xFF;
uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopValue<<16):0;
UBool doingFrench = (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, status) == UCOL_ON);
UBool notIsContinuation = FALSE;
uint32_t CE = UCOL_NO_MORE_CES;
collIterate s;
IInit_collIterate(coll, NULL, -1, &s);
s.iterator = iter;
s.flags |= UCOL_USE_ITERATOR;
UBool doingIdenticalFromStart = FALSE;
UAlignedMemory stackNormIter[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
UNormIterator *normIter = NULL;
if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON && level < UCOL_PSK_IDENTICAL) {
normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status);
s.flags &= ~UCOL_ITER_NORM;
if(U_FAILURE(*status)) {
UTRACE_EXIT_STATUS(*status);
return 0;
}
} else if(level == UCOL_PSK_IDENTICAL) {
normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
s.flags &= ~UCOL_ITER_NORM;
if(U_FAILURE(*status)) {
UTRACE_EXIT_STATUS(*status);
return 0;
}
doingIdenticalFromStart = TRUE;
}
uint32_t newState = 0;
if(iterState == 0) {
if(level == UCOL_PSK_SECONDARY && doingFrench && !byteCountOrFrenchDone) {
s.iterator->move(s.iterator, 0, UITER_LIMIT);
} else {
s.iterator->move(s.iterator, 0, UITER_START);
}
} else {
s.iterator->setState(s.iterator, iterState, status);
if(U_FAILURE(*status)) {
UTRACE_EXIT_STATUS(*status);
return 0;
}
}
UBool canUpdateState = TRUE;
int32_t counter = cces;
if(level < UCOL_PSK_IDENTICAL) {
while(counter-->0) {
if(level == UCOL_PSK_SECONDARY && doingFrench) {
CE = ucol_IGetPrevCE(coll, &s, status);
} else {
CE = ucol_IGetNextCE(coll, &s, status);
}
if(CE==UCOL_NO_MORE_CES) {
*status=U_INTERNAL_PROGRAM_ERROR;
UTRACE_EXIT_STATUS(*status);
return 0;
}
if(uprv_numAvailableExpCEs(s)) {
canUpdateState = FALSE;
}
}
} else {
while(counter-->0) {
uiter_next32(s.iterator);
}
}
UBool wasDoingPrimary = FALSE;
int32_t i = 0;
int32_t j = 0;
switch(level) {
case UCOL_PSK_PRIMARY:
wasDoingPrimary = TRUE;
for(;;) {
if(i==count) {
goto saveState;
}
if(canUpdateState && byteCountOrFrenchDone == 0) {
newState = s.iterator->getState(s.iterator);
if(newState != UITER_NO_STATE) {
iterState = newState;
cces = 0;
}
}
CE = ucol_IGetNextCE(coll, &s, status);
cces++;
if(CE==UCOL_NO_MORE_CES) {
terminatePSKLevel(level, maxLevel, i, dest);
byteCountOrFrenchDone=0;
s.iterator->move(s.iterator, 0, UITER_START);
cces = 0;
level = UCOL_PSK_SECONDARY;
break;
}
if(!isShiftedCE(CE, LVT, &wasShifted)) {
CE >>= UCOL_PRIMARYORDERSHIFT;
if(CE != 0) {
if(byteCountOrFrenchDone == 0) {
dest[i++]=(uint8_t)(CE >> 8);
} else {
byteCountOrFrenchDone = 0;
}
if((CE &=0xff)!=0) {
if(i==count) {
byteCountOrFrenchDone = 1;
cces--;
goto saveState;
}
dest[i++]=(uint8_t)CE;
}
}
}
if(uprv_numAvailableExpCEs(s)) {
canUpdateState = FALSE;
} else {
canUpdateState = TRUE;
}
}
case UCOL_PSK_SECONDARY:
if(strength >= UCOL_SECONDARY) {
if(!doingFrench) {
for(;;) {
if(i == count) {
goto saveState;
}
if(canUpdateState) {
newState = s.iterator->getState(s.iterator);
if(newState != UITER_NO_STATE) {
iterState = newState;
cces = 0;
}
}
CE = ucol_IGetNextCE(coll, &s, status);
cces++;
if(CE==UCOL_NO_MORE_CES) {
terminatePSKLevel(level, maxLevel, i, dest);
byteCountOrFrenchDone = 0;
s.iterator->move(s.iterator, 0, UITER_START);
cces = 0;
level = UCOL_PSK_CASE;
break;
}
if(!isShiftedCE(CE, LVT, &wasShifted)) {
CE >>= 8;
if(CE != 0) {
dest[i++]=(uint8_t)CE;
}
}
if(uprv_numAvailableExpCEs(s)) {
canUpdateState = FALSE;
} else {
canUpdateState = TRUE;
}
}
} else { uint8_t frenchBuff[UCOL_MAX_BUFFER];
int32_t frenchIndex = 0;
if(wasDoingPrimary) {
s.iterator->move(s.iterator, 0, UITER_LIMIT);
cces = 0;
}
for(;;) {
if(i == count) {
goto saveState;
}
if(canUpdateState) {
newState = s.iterator->getState(s.iterator);
if(newState != UITER_NO_STATE) {
iterState = newState;
cces = 0;
}
}
CE = ucol_IGetPrevCE(coll, &s, status);
cces++;
if(CE==UCOL_NO_MORE_CES) {
terminatePSKLevel(level, maxLevel, i, dest);
byteCountOrFrenchDone = 0;
s.iterator->move(s.iterator, 0, UITER_START);
level = UCOL_PSK_CASE;
break;
}
if(isContinuation(CE)) { CE >>= 8;
frenchBuff[frenchIndex++] = (uint8_t)CE;
} else if(!isShiftedCE(CE, LVT, &wasShifted)) {
CE >>= 8;
if(!frenchIndex) {
if(CE != 0) {
dest[i++]=(uint8_t)CE;
}
} else {
frenchBuff[frenchIndex++] = (uint8_t)CE;
frenchIndex -= usedFrench;
usedFrench = 0;
while(i < count && frenchIndex) {
dest[i++] = frenchBuff[--frenchIndex];
usedFrench++;
}
}
}
if(uprv_numAvailableExpCEs(s)) {
canUpdateState = FALSE;
} else {
canUpdateState = TRUE;
}
}
}
} else {
level = UCOL_PSK_CASE;
}
case UCOL_PSK_CASE:
if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
uint32_t caseShift = UCOL_CASE_SHIFT_START;
uint8_t caseByte = UCOL_CASE_BYTE_START;
uint8_t caseBits = 0;
for(;;) {
if(i == count) {
goto saveState;
}
if(canUpdateState) {
newState = s.iterator->getState(s.iterator);
if(newState != UITER_NO_STATE) {
iterState = newState;
cces = 0;
}
}
CE = ucol_IGetNextCE(coll, &s, status);
cces++;
if(CE==UCOL_NO_MORE_CES) {
if(caseShift != UCOL_CASE_SHIFT_START) {
dest[i++] = caseByte;
}
cces = 0;
if(i < count) {
terminatePSKLevel(level, maxLevel, i, dest);
s.iterator->move(s.iterator, 0, UITER_START);
level = UCOL_PSK_TERTIARY;
} else {
canUpdateState = FALSE;
}
break;
}
if(!isShiftedCE(CE, LVT, &wasShifted)) {
if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 || strength > UCOL_PRIMARY)) {
CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
caseBits = (uint8_t)(CE & 0xC0);
if(CE != 0) {
if(coll->caseFirst == UCOL_UPPER_FIRST) {
if((caseBits & 0xC0) == 0) {
caseByte |= 1 << (--caseShift);
} else {
caseByte |= 0 << (--caseShift);
if(caseShift == 0) {
dest[i++] = caseByte;
caseShift = UCOL_CASE_SHIFT_START;
caseByte = UCOL_CASE_BYTE_START;
}
caseByte |= ((caseBits>>6)&1) << (--caseShift);
}
} else {
if((caseBits & 0xC0) == 0) {
caseByte |= 0 << (--caseShift);
} else {
caseByte |= 1 << (--caseShift);
if(caseShift == 0) {
dest[i++] = caseByte;
caseShift = UCOL_CASE_SHIFT_START;
caseByte = UCOL_CASE_BYTE_START;
}
caseByte |= ((caseBits>>7)&1) << (--caseShift);
}
}
}
}
}
if(uprv_numAvailableExpCEs(s)) {
canUpdateState = FALSE;
} else {
canUpdateState = TRUE;
}
}
} else {
level = UCOL_PSK_TERTIARY;
}
case UCOL_PSK_TERTIARY:
if(strength >= UCOL_TERTIARY) {
for(;;) {
if(i == count) {
goto saveState;
}
if(canUpdateState) {
newState = s.iterator->getState(s.iterator);
if(newState != UITER_NO_STATE) {
iterState = newState;
cces = 0;
}
}
CE = ucol_IGetNextCE(coll, &s, status);
cces++;
if(CE==UCOL_NO_MORE_CES) {
terminatePSKLevel(level, maxLevel, i, dest);
byteCountOrFrenchDone = 0;
s.iterator->move(s.iterator, 0, UITER_START);
cces = 0;
level = UCOL_PSK_QUATERNARY;
break;
}
if(!isShiftedCE(CE, LVT, &wasShifted)) {
notIsContinuation = !isContinuation(CE);
if(notIsContinuation) {
CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
CE ^= coll->caseSwitch;
CE &= coll->tertiaryMask;
} else {
CE = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
}
if(CE != 0) {
dest[i++]=(uint8_t)CE;
}
}
if(uprv_numAvailableExpCEs(s)) {
canUpdateState = FALSE;
} else {
canUpdateState = TRUE;
}
}
} else {
level = UCOL_PSK_NULL;
}
case UCOL_PSK_QUATERNARY:
if(strength >= UCOL_QUATERNARY) {
for(;;) {
if(i == count) {
goto saveState;
}
if(canUpdateState) {
newState = s.iterator->getState(s.iterator);
if(newState != UITER_NO_STATE) {
iterState = newState;
cces = 0;
}
}
CE = ucol_IGetNextCE(coll, &s, status);
cces++;
if(CE==UCOL_NO_MORE_CES) {
terminatePSKLevel(level, maxLevel, i, dest);
byteCountOrFrenchDone = 0;
s.iterator->move(s.iterator, 0, UITER_START);
cces = 0;
level = UCOL_PSK_QUIN;
break;
}
if(CE==0)
continue;
if(isShiftedCE(CE, LVT, &wasShifted)) {
CE >>= 16;
if(CE != 0) {
if(byteCountOrFrenchDone == 0) {
dest[i++]=(uint8_t)(CE >> 8);
} else {
byteCountOrFrenchDone = 0;
}
if((CE &=0xff)!=0) {
if(i==count) {
byteCountOrFrenchDone = 1;
goto saveState;
}
dest[i++]=(uint8_t)CE;
}
}
} else {
notIsContinuation = !isContinuation(CE);
if(notIsContinuation) {
if(s.flags & UCOL_WAS_HIRAGANA) { dest[i++] = UCOL_HIRAGANA_QUAD;
} else {
dest[i++] = 0xFF;
}
}
}
if(uprv_numAvailableExpCEs(s)) {
canUpdateState = FALSE;
} else {
canUpdateState = TRUE;
}
}
} else {
level = UCOL_PSK_NULL;
}
case UCOL_PSK_QUIN:
level = UCOL_PSK_IDENTICAL;
case UCOL_PSK_IDENTICAL:
if(strength >= UCOL_IDENTICAL) {
UChar32 first, second;
int32_t bocsuBytesWritten = 0;
if(normIter == NULL) {
normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
} else if(!doingIdenticalFromStart) {
iter->move(iter, 0, UITER_START);
s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
}
if(U_FAILURE(*status)) {
UTRACE_EXIT_STATUS(*status);
return 0;
}
first = uiter_previous32(s.iterator);
if(first == U_SENTINEL) {
first = 0;
} else {
uiter_next32(s.iterator);
}
j = 0;
for(;;) {
if(i == count) {
if(j+1 < bocsuBytesWritten) {
bocsuBytesUsed = j+1;
}
goto saveState;
}
newState = s.iterator->getState(s.iterator);
if(newState != UITER_NO_STATE) {
iterState = newState;
cces = 0;
}
uint8_t buff[4];
second = uiter_next32(s.iterator);
cces++;
if(second == U_SENTINEL) {
terminatePSKLevel(level, maxLevel, i, dest);
level = UCOL_PSK_NULL;
break;
}
bocsuBytesWritten = u_writeIdenticalLevelRunTwoChars(first, second, buff);
first = second;
j = 0;
if(bocsuBytesUsed != 0) {
while(bocsuBytesUsed-->0) {
j++;
}
}
while(i < count && j < bocsuBytesWritten) {
dest[i++] = buff[j++];
}
}
} else {
level = UCOL_PSK_NULL;
}
case UCOL_PSK_NULL:
j = i;
while(j<count) {
dest[j++]=0;
}
break;
default:
*status = U_INTERNAL_PROGRAM_ERROR;
UTRACE_EXIT_STATUS(*status);
return 0;
}
saveState:
if(byteCountOrFrenchDone
|| canUpdateState == FALSE
|| (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE)
{
state[0] = iterState;
} else {
state[0] = s.iterator->getState(s.iterator);
cces = 0;
}
if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) {
*status = U_INDEX_OUTOFBOUNDS_ERROR;
}
state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BYTES_SHIFT;
state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT);
if(level == UCOL_PSK_SECONDARY && doingFrench) {
state[1] |= (((state[0] == 0) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
} else {
state[1] |= ((byteCountOrFrenchDone & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
}
if(wasShifted) {
state[1] |= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT;
}
if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) {
*status = U_INDEX_OUTOFBOUNDS_ERROR;
}
state[1] |= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SHIFT);
if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) {
*status = U_INDEX_OUTOFBOUNDS_ERROR;
}
state[1] |= ((usedFrench & UCOL_PSK_USED_FRENCH_MASK) << UCOL_PSK_USED_FRENCH_SHIFT);
if(normIter != NULL) {
unorm_closeIter(normIter);
}
freeOffsetBuffer(&s);
UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
dest,i, state[0], state[1]);
UTRACE_EXIT_VALUE(i);
return i;
}
U_CAPI int32_t U_EXPORT2
ucol_getBound(const uint8_t *source,
int32_t sourceLength,
UColBoundMode boundType,
uint32_t noOfLevels,
uint8_t *result,
int32_t resultLength,
UErrorCode *status)
{
if(status == NULL || U_FAILURE(*status)) {
return 0;
}
if(source == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
int32_t sourceIndex = 0;
do {
sourceIndex++;
if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
noOfLevels--;
}
} while (noOfLevels > 0
&& (source[sourceIndex] != 0 || sourceIndex < sourceLength));
if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
&& noOfLevels > 0) {
*status = U_SORT_KEY_TOO_SHORT_WARNING;
}
if(result != NULL && resultLength >= sourceIndex+boundType) {
uprv_memcpy(result, source, sourceIndex);
switch(boundType) {
case UCOL_BOUND_LOWER: break;
case UCOL_BOUND_UPPER: result[sourceIndex++] = 2;
break;
case UCOL_BOUND_UPPER_LONG: result[sourceIndex++] = 0xFF;
result[sourceIndex++] = 0xFF;
break;
default:
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
result[sourceIndex++] = 0;
return sourceIndex;
} else {
return sourceIndex+boundType+1;
}
}
static inline void
ucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,
int32_t *primShift, int32_t *secShift, int32_t *terShift)
{
uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
UBool reverseSecondary = FALSE;
if(!isContinuation(CE)) {
tertiary = (uint8_t)((CE & coll->tertiaryMask));
tertiary ^= coll->caseSwitch;
reverseSecondary = TRUE;
} else {
tertiary = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
tertiary &= UCOL_REMOVE_CASE;
reverseSecondary = FALSE;
}
secondary = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
primary2 = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
primary1 = (uint8_t)(CE >> 8);
if(primary1 != 0) {
coll->latinOneCEs[ch] |= (primary1 << *primShift);
*primShift -= 8;
}
if(primary2 != 0) {
if(*primShift < 0) {
coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
return;
}
coll->latinOneCEs[ch] |= (primary2 << *primShift);
*primShift -= 8;
}
if(secondary != 0) {
if(reverseSecondary && coll->frenchCollation == UCOL_ON) { coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << 24);
} else { coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << *secShift);
}
*secShift -= 8;
}
if(tertiary != 0) {
coll->latinOneCEs[2*coll->latinOneTableLen+ch] |= (tertiary << *terShift);
*terShift -= 8;
}
}
static inline UBool
ucol_resizeLatinOneTable(UCollator *coll, int32_t size, UErrorCode *status) {
uint32_t *newTable = (uint32_t *)uprv_malloc(size*sizeof(uint32_t)*3);
if(newTable == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
coll->latinOneFailed = TRUE;
return FALSE;
}
int32_t sizeToCopy = ((size<coll->latinOneTableLen)?size:coll->latinOneTableLen)*sizeof(uint32_t);
uprv_memset(newTable, 0, size*sizeof(uint32_t)*3);
uprv_memcpy(newTable, coll->latinOneCEs, sizeToCopy);
uprv_memcpy(newTable+size, coll->latinOneCEs+coll->latinOneTableLen, sizeToCopy);
uprv_memcpy(newTable+2*size, coll->latinOneCEs+2*coll->latinOneTableLen, sizeToCopy);
coll->latinOneTableLen = size;
uprv_free(coll->latinOneCEs);
coll->latinOneCEs = newTable;
return TRUE;
}
static UBool
ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
UBool result = TRUE;
if(coll->latinOneCEs == NULL) {
coll->latinOneCEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*UCOL_LATINONETABLELEN*3);
if(coll->latinOneCEs == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
coll->latinOneTableLen = UCOL_LATINONETABLELEN;
}
UChar ch = 0;
UCollationElements *it = ucol_openElements(coll, &ch, 1, status);
if (U_FAILURE(*status)) {
return FALSE;
}
uprv_memset(coll->latinOneCEs, 0, sizeof(uint32_t)*coll->latinOneTableLen*3);
int32_t primShift = 24, secShift = 24, terShift = 24;
uint32_t CE = 0;
int32_t contractionOffset = UCOL_ENDOFLATINONERANGE+1;
for(ch = 0; ch <= UCOL_ENDOFLATINONERANGE; ch++) {
primShift = 24; secShift = 24; terShift = 24;
if(ch < 0x100) {
CE = coll->latinOneMapping[ch];
} else {
CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
if(CE == UCOL_NOT_FOUND && coll->UCA) {
CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
}
}
if(CE < UCOL_NOT_FOUND) {
ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
} else {
switch (getCETag(CE)) {
case EXPANSION_TAG:
case DIGIT_TAG:
ucol_setText(it, &ch, 1, status);
while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) {
if(primShift < 0 || secShift < 0 || terShift < 0) {
coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
break;
}
ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
}
break;
case CONTRACTION_TAG:
{
if((CE & 0x00FFF000) != 0) {
*status = U_UNSUPPORTED_ERROR;
goto cleanup_after_failure;
}
const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE);
CE |= (contractionOffset & 0xFFF) << 12;
coll->latinOneCEs[ch] = CE;
coll->latinOneCEs[coll->latinOneTableLen+ch] = CE;
coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE;
do {
CE = *(coll->contractionCEs +
(UCharOffset - coll->contractionIndex));
if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) {
uint32_t size;
uint32_t i;
uint32_t *CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE);
size = getExpansionCount(CE);
if(size != 0) {
for(i = 0; i<size; i++) {
if(primShift < 0 || secShift < 0 || terShift < 0) {
coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
break;
}
ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
}
} else {
while(*CEOffset != 0) {
if(primShift < 0 || secShift < 0 || terShift < 0) {
coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
break;
}
ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
}
}
contractionOffset++;
} else if(CE < UCOL_NOT_FOUND) {
ucol_addLatinOneEntry(coll, (UChar)contractionOffset++, CE, &primShift, &secShift, &terShift);
} else {
coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
contractionOffset++;
}
UCharOffset++;
primShift = 24; secShift = 24; terShift = 24;
if(contractionOffset == coll->latinOneTableLen) { if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneTableLen, status)) {
goto cleanup_after_failure;
}
}
} while(*UCharOffset != 0xFFFF);
}
break;;
case SPEC_PROC_TAG:
{
if (ch==0xb7) {
ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
}
else {
goto cleanup_after_failure;
}
}
break;
default:
goto cleanup_after_failure;
}
}
}
if(contractionOffset < coll->latinOneTableLen) {
if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) {
goto cleanup_after_failure;
}
}
ucol_closeElements(it);
return result;
cleanup_after_failure:
coll->latinOneFailed = TRUE;
ucol_closeElements(it);
return FALSE;
}
void ucol_updateInternalState(UCollator *coll, UErrorCode *status) {
if(U_SUCCESS(*status)) {
if(coll->caseFirst == UCOL_UPPER_FIRST) {
coll->caseSwitch = UCOL_CASE_SWITCH;
} else {
coll->caseSwitch = UCOL_NO_CASE_SWITCH;
}
if(coll->caseLevel == UCOL_ON || coll->caseFirst == UCOL_OFF) {
coll->tertiaryMask = UCOL_REMOVE_CASE;
coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
coll->tertiaryAddition = (int8_t)UCOL_FLAG_BIT_MASK_CASE_SW_OFF;
coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF;
coll->tertiaryBottom = UCOL_COMMON_BOT3;
} else {
coll->tertiaryMask = UCOL_KEEP_CASE;
coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON;
if(coll->caseFirst == UCOL_UPPER_FIRST) {
coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST;
coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER;
coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER;
} else {
coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER;
coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER;
}
}
uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - UCOL_COMMON_BOT3-1);
coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3*tertiaryTotal);
coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopCount);
if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY
&& coll->frenchCollation == UCOL_OFF && coll->alternateHandling == UCOL_NON_IGNORABLE)
{
coll->sortKeyGen = ucol_calcSortKeySimpleTertiary;
} else {
coll->sortKeyGen = ucol_calcSortKey;
}
if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && coll->numericCollation == UCOL_OFF
&& coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed)
{
if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) {
if(ucol_setUpLatinOne(coll, status)) { coll->latinOneUse = TRUE;
} else {
coll->latinOneUse = FALSE;
}
if(*status == U_UNSUPPORTED_ERROR) {
*status = U_ZERO_ERROR;
}
} else { coll->latinOneUse = TRUE;
}
} else {
coll->latinOneUse = FALSE;
}
}
}
U_CAPI uint32_t U_EXPORT2
ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) {
if(U_FAILURE(*status) || coll == NULL) {
return 0;
}
if(len == -1) {
len = u_strlen(varTop);
}
if(len == 0) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
collIterate s;
IInit_collIterate(coll, varTop, len, &s);
uint32_t CE = ucol_IGetNextCE(coll, &s, status);
if(s.pos != s.endp || CE == UCOL_NO_MORE_CES) {
*status = U_CE_NOT_FOUND_ERROR;
return 0;
}
uint32_t nextCE = ucol_IGetNextCE(coll, &s, status);
if(isContinuation(nextCE) && (nextCE & UCOL_PRIMARYMASK) != 0) {
*status = U_PRIMARY_TOO_LONG_ERROR;
return 0;
}
if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) {
coll->variableTopValueisDefault = FALSE;
coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;
}
freeOffsetBuffer(&s);
return CE & UCOL_PRIMARYMASK;
}
U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
if(U_FAILURE(*status) || coll == NULL) {
return 0;
}
return coll->variableTopValue<<16;
}
U_CAPI void U_EXPORT2
ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
if(U_FAILURE(*status) || coll == NULL) {
return;
}
if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) {
coll->variableTopValueisDefault = FALSE;
coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;
}
}
U_CAPI void U_EXPORT2
ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
if(U_FAILURE(*status) || coll == NULL) {
return;
}
UColAttributeValue oldFrench = coll->frenchCollation;
UColAttributeValue oldCaseFirst = coll->caseFirst;
switch(attr) {
case UCOL_NUMERIC_COLLATION:
if(value == UCOL_ON) {
coll->numericCollation = UCOL_ON;
coll->numericCollationisDefault = FALSE;
} else if (value == UCOL_OFF) {
coll->numericCollation = UCOL_OFF;
coll->numericCollationisDefault = FALSE;
} else if (value == UCOL_DEFAULT) {
coll->numericCollationisDefault = TRUE;
coll->numericCollation = (UColAttributeValue)coll->options->numericCollation;
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
break;
case UCOL_HIRAGANA_QUATERNARY_MODE:
if(value == UCOL_ON) {
coll->hiraganaQ = UCOL_ON;
coll->hiraganaQisDefault = FALSE;
} else if (value == UCOL_OFF) {
coll->hiraganaQ = UCOL_OFF;
coll->hiraganaQisDefault = FALSE;
} else if (value == UCOL_DEFAULT) {
coll->hiraganaQisDefault = TRUE;
coll->hiraganaQ = (UColAttributeValue)coll->options->hiraganaQ;
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
break;
case UCOL_FRENCH_COLLATION:
if(value == UCOL_ON) {
coll->frenchCollation = UCOL_ON;
coll->frenchCollationisDefault = FALSE;
} else if (value == UCOL_OFF) {
coll->frenchCollation = UCOL_OFF;
coll->frenchCollationisDefault = FALSE;
} else if (value == UCOL_DEFAULT) {
coll->frenchCollationisDefault = TRUE;
coll->frenchCollation = (UColAttributeValue)coll->options->frenchCollation;
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR ;
}
break;
case UCOL_ALTERNATE_HANDLING:
if(value == UCOL_SHIFTED) {
coll->alternateHandling = UCOL_SHIFTED;
coll->alternateHandlingisDefault = FALSE;
} else if (value == UCOL_NON_IGNORABLE) {
coll->alternateHandling = UCOL_NON_IGNORABLE;
coll->alternateHandlingisDefault = FALSE;
} else if (value == UCOL_DEFAULT) {
coll->alternateHandlingisDefault = TRUE;
coll->alternateHandling = (UColAttributeValue)coll->options->alternateHandling ;
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR ;
}
break;
case UCOL_CASE_FIRST:
if(value == UCOL_LOWER_FIRST) {
coll->caseFirst = UCOL_LOWER_FIRST;
coll->caseFirstisDefault = FALSE;
} else if (value == UCOL_UPPER_FIRST) {
coll->caseFirst = UCOL_UPPER_FIRST;
coll->caseFirstisDefault = FALSE;
} else if (value == UCOL_OFF) {
coll->caseFirst = UCOL_OFF;
coll->caseFirstisDefault = FALSE;
} else if (value == UCOL_DEFAULT) {
coll->caseFirst = (UColAttributeValue)coll->options->caseFirst;
coll->caseFirstisDefault = TRUE;
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR ;
}
break;
case UCOL_CASE_LEVEL:
if(value == UCOL_ON) {
coll->caseLevel = UCOL_ON;
coll->caseLevelisDefault = FALSE;
} else if (value == UCOL_OFF) {
coll->caseLevel = UCOL_OFF;
coll->caseLevelisDefault = FALSE;
} else if (value == UCOL_DEFAULT) {
coll->caseLevel = (UColAttributeValue)coll->options->caseLevel;
coll->caseLevelisDefault = TRUE;
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR ;
}
break;
case UCOL_NORMALIZATION_MODE:
if(value == UCOL_ON) {
coll->normalizationMode = UCOL_ON;
coll->normalizationModeisDefault = FALSE;
} else if (value == UCOL_OFF) {
coll->normalizationMode = UCOL_OFF;
coll->normalizationModeisDefault = FALSE;
} else if (value == UCOL_DEFAULT) {
coll->normalizationModeisDefault = TRUE;
coll->normalizationMode = (UColAttributeValue)coll->options->normalizationMode;
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR ;
}
break;
case UCOL_STRENGTH:
if (value == UCOL_DEFAULT) {
coll->strengthisDefault = TRUE;
coll->strength = (UColAttributeValue)coll->options->strength;
} else if (value <= UCOL_IDENTICAL) {
coll->strengthisDefault = FALSE;
coll->strength = value;
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR ;
}
break;
case UCOL_ATTRIBUTE_COUNT:
default:
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
if(oldFrench != coll->frenchCollation || oldCaseFirst != coll->caseFirst) {
coll->latinOneRegenTable = TRUE;
} else {
coll->latinOneRegenTable = FALSE;
}
ucol_updateInternalState(coll, status);
}
U_CAPI UColAttributeValue U_EXPORT2
ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
if(U_FAILURE(*status) || coll == NULL) {
return UCOL_DEFAULT;
}
switch(attr) {
case UCOL_NUMERIC_COLLATION:
return coll->numericCollation;
case UCOL_HIRAGANA_QUATERNARY_MODE:
return coll->hiraganaQ;
case UCOL_FRENCH_COLLATION:
return coll->frenchCollation;
case UCOL_ALTERNATE_HANDLING:
return coll->alternateHandling;
case UCOL_CASE_FIRST:
return coll->caseFirst;
case UCOL_CASE_LEVEL:
return coll->caseLevel;
case UCOL_NORMALIZATION_MODE:
return coll->normalizationMode;
case UCOL_STRENGTH:
return coll->strength;
case UCOL_ATTRIBUTE_COUNT:
default:
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
return UCOL_DEFAULT;
}
U_CAPI void U_EXPORT2
ucol_setStrength( UCollator *coll,
UCollationStrength strength)
{
UErrorCode status = U_ZERO_ERROR;
ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
}
U_CAPI UCollationStrength U_EXPORT2
ucol_getStrength(const UCollator *coll)
{
UErrorCode status = U_ZERO_ERROR;
return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
}
U_CAPI void U_EXPORT2
ucol_getVersion(const UCollator* coll,
UVersionInfo versionInfo)
{
uint8_t rtVersion = UCOL_RUNTIME_VERSION;
uint8_t bdVersion = coll->image->version[0];
uint8_t csVersion = 0;
uint16_t cmbVersion = (uint16_t)((rtVersion<<11) | (bdVersion<<6) | (csVersion));
versionInfo[0] = (uint8_t)(cmbVersion>>8);
versionInfo[1] = (uint8_t)cmbVersion;
versionInfo[2] = coll->image->version[1];
if(coll->UCA) {
versionInfo[3] = coll->UCA->image->UCAVersion[0];
} else {
versionInfo[3] = 0;
}
}
U_CAPI UBool U_EXPORT2
ucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) {
if(U_FAILURE(*status) || coll == NULL || coll == coll->UCA) {
return FALSE;
}
uint32_t CE = UCOL_NOT_FOUND;
const UChar *ContractionStart = NULL;
if(u < 0x100) {
CE = coll->latinOneMapping[u];
if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) {
return FALSE;
}
} else {
CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u);
}
if(isContraction(CE)) {
ContractionStart = (UChar *)coll->image+getContractOffset(CE);
CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex));
}
return (UBool)(CE != UCOL_NOT_FOUND);
}
static
UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBool normalize, UErrorCode *status)
{
UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
int32_t comparison;
int32_t sLen = 0;
UChar *sBuf = NULL;
int32_t tLen = 0;
UChar *tBuf = NULL;
UBool freeSBuf = FALSE, freeTBuf = FALSE;
if (sColl->flags & UCOL_USE_ITERATOR) {
UNormIterator *sNIt = NULL, *tNIt = NULL;
sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
sColl->iterator->move(sColl->iterator, 0, UITER_START);
tColl->iterator->move(tColl->iterator, 0, UITER_START);
UCharIterator *sIt = unorm_setIter(sNIt, sColl->iterator, UNORM_NFD, status);
UCharIterator *tIt = unorm_setIter(tNIt, tColl->iterator, UNORM_NFD, status);
comparison = u_strCompareIter(sIt, tIt, TRUE);
unorm_closeIter(sNIt);
unorm_closeIter(tNIt);
} else {
sLen = (sColl->flags & UCOL_ITER_HASLEN) ? sColl->endp - sColl->string : -1;
sBuf = sColl->string;
tLen = (tColl->flags & UCOL_ITER_HASLEN) ? tColl->endp - tColl->string : -1;
tBuf = tColl->string;
if (normalize) {
*status = U_ZERO_ERROR;
if (unorm_quickCheck(sBuf, sLen, UNORM_NFD, status) != UNORM_YES) {
sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize,
sBuf, sLen,
FALSE, 0,
status);
if(*status == U_BUFFER_OVERFLOW_ERROR) {
if(!u_growBufferFromStatic(sColl->stackWritableBuffer,
&sColl->writableBuffer,
(int32_t *)&sColl->writableBufSize, sLen,
0)
)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return UCOL_LESS;
}
*status = U_ZERO_ERROR;
sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize,
sBuf, sLen,
FALSE, 0,
status);
}
if(freeSBuf) {
uprv_free(sBuf);
freeSBuf = FALSE;
}
sBuf = sColl->writableBuffer;
if (sBuf != sColl->stackWritableBuffer) {
sColl->flags |= UCOL_ITER_ALLOCATED;
}
}
*status = U_ZERO_ERROR;
if (unorm_quickCheck(tBuf, tLen, UNORM_NFD, status) != UNORM_YES) {
tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize,
tBuf, tLen,
FALSE, 0,
status);
if(*status == U_BUFFER_OVERFLOW_ERROR) {
if(!u_growBufferFromStatic(tColl->stackWritableBuffer,
&tColl->writableBuffer,
(int32_t *)&tColl->writableBufSize, tLen,
0)
)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return UCOL_LESS;
}
*status = U_ZERO_ERROR;
tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize,
tBuf, tLen,
FALSE, 0,
status);
}
if(freeTBuf) {
uprv_free(tBuf);
freeTBuf = FALSE;
}
tBuf = tColl->writableBuffer;
if (tBuf != tColl->stackWritableBuffer) {
tColl->flags |= UCOL_ITER_ALLOCATED;
}
}
}
if (sLen == -1 && tLen == -1) {
comparison = u_strcmpCodePointOrder(sBuf, tBuf);
} else {
if (sLen == -1) {
sLen = u_strlen(sBuf);
}
if (tLen == -1) {
tLen = u_strlen(tBuf);
}
comparison = u_memcmpCodePointOrder(sBuf, tBuf, uprv_min(sLen, tLen));
if (comparison == 0) {
comparison = sLen - tLen;
}
}
}
if (comparison < 0) {
return UCOL_LESS;
} else if (comparison == 0) {
return UCOL_EQUAL;
} else {
return UCOL_GREATER;
}
}
#define UCOL_CEBUF_SIZE 512
typedef struct ucol_CEBuf {
uint32_t *buf;
uint32_t *endp;
uint32_t *pos;
uint32_t localArray[UCOL_CEBUF_SIZE];
} ucol_CEBuf;
static
inline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {
(b)->buf = (b)->pos = (b)->localArray;
(b)->endp = (b)->buf + UCOL_CEBUF_SIZE;
}
static
void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci, UErrorCode *status) {
uint32_t oldSize;
uint32_t newSize;
uint32_t *newBuf;
ci->flags |= UCOL_ITER_ALLOCATED;
oldSize = b->pos - b->buf;
newSize = oldSize * 2;
newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t));
if(newBuf == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
}
else {
uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));
if (b->buf != b->localArray) {
uprv_free(b->buf);
}
b->buf = newBuf;
b->endp = b->buf + newSize;
b->pos = b->buf + oldSize;
}
}
static
inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci, UErrorCode *status) {
if (b->pos == b->endp) {
ucol_CEBuf_Expand(b, ci, status);
}
if (U_SUCCESS(*status)) {
*(b)->pos++ = ce;
}
}
static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
collIterate *tColl,
UErrorCode *status)
{
uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER];
uint8_t *sourceKeyP = sourceKey;
uint8_t *targetKeyP = targetKey;
int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER;
const UCollator *coll = sColl->coll;
UChar *source = NULL;
UChar *target = NULL;
int32_t result = UCOL_EQUAL;
UChar sStackBuf[256], tStackBuf[256];
int32_t sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(sColl->endp-sColl->string):-1;
int32_t targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(tColl->endp-tColl->string):-1;
if(sColl->flags & UCOL_USE_ITERATOR) {
sColl->iterator->move(sColl->iterator, 0, UITER_START);
tColl->iterator->move(tColl->iterator, 0, UITER_START);
source = sStackBuf;
UChar *sBufp = source;
target = tStackBuf;
UChar *tBufp = target;
while(sColl->iterator->hasNext(sColl->iterator)) {
*sBufp++ = (UChar)sColl->iterator->next(sColl->iterator);
}
while(tColl->iterator->hasNext(tColl->iterator)) {
*tBufp++ = (UChar)tColl->iterator->next(tColl->iterator);
}
sourceLength = sBufp - source;
targetLength = tBufp - target;
} else { sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(sColl->endp-sColl->string):-1;
targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(tColl->endp-tColl->string):-1;
source = sColl->string;
target = tColl->string;
}
sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
if(sourceKeyLen > UCOL_MAX_BUFFER) {
sourceKeyP = (uint8_t*)uprv_malloc(sourceKeyLen*sizeof(uint8_t));
if(sourceKeyP == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
goto cleanup_and_do_compare;
}
sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
}
targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
if(targetKeyLen > UCOL_MAX_BUFFER) {
targetKeyP = (uint8_t*)uprv_malloc(targetKeyLen*sizeof(uint8_t));
if(targetKeyP == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
goto cleanup_and_do_compare;
}
targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
}
result = uprv_strcmp((const char*)sourceKeyP, (const char*)targetKeyP);
cleanup_and_do_compare:
if(sourceKeyP != NULL && sourceKeyP != sourceKey) {
uprv_free(sourceKeyP);
}
if(targetKeyP != NULL && targetKeyP != targetKey) {
uprv_free(targetKeyP);
}
if(result<0) {
return UCOL_LESS;
} else if(result>0) {
return UCOL_GREATER;
} else {
return UCOL_EQUAL;
}
}
static inline UCollationResult
ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
UErrorCode *status)
{
U_ALIGN_CODE(16);
const UCollator *coll = sColl->coll;
UColAttributeValue strength = coll->strength;
UBool initialCheckSecTer = (strength >= UCOL_SECONDARY);
UBool checkSecTer = initialCheckSecTer;
UBool checkTertiary = (strength >= UCOL_TERTIARY);
UBool checkQuad = (strength >= UCOL_QUATERNARY);
UBool checkIdent = (strength == UCOL_IDENTICAL);
UBool checkCase = (coll->caseLevel == UCOL_ON);
UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && checkSecTer;
UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
UBool qShifted = shifted && checkQuad;
UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad;
if(doHiragana && shifted) {
return (ucol_compareUsingSortKeys(sColl, tColl, status));
}
uint8_t caseSwitch = coll->caseSwitch;
uint8_t tertiaryMask = coll->tertiaryMask;
uint32_t LVT = (shifted)?(coll->variableTopValue<<16):0;
UCollationResult result = UCOL_EQUAL;
UCollationResult hirResult = UCOL_EQUAL;
ucol_CEBuf sCEs;
ucol_CEBuf tCEs;
UCOL_INIT_CEBUF(&sCEs);
UCOL_INIT_CEBUF(&tCEs);
uint32_t secS = 0, secT = 0;
uint32_t sOrder=0, tOrder=0;
if(!shifted) {
for(;;) {
do {
sOrder = ucol_IGetNextCE(coll, sColl, status);
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
sOrder &= UCOL_PRIMARYMASK;
} while(sOrder == 0);
do {
tOrder = ucol_IGetNextCE(coll, tColl, status);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
tOrder &= UCOL_PRIMARYMASK;
} while(tOrder == 0);
if(sOrder == tOrder) {
if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {
break;
}
if(doHiragana && hirResult == UCOL_EQUAL) {
if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCOL_WAS_HIRAGANA)) {
hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA))
? UCOL_LESS:UCOL_GREATER;
}
}
} else {
result = (sOrder < tOrder) ? UCOL_LESS: UCOL_GREATER;
goto commonReturn;
}
} } else { for(;;) {
UBool sInShifted = FALSE;
UBool tInShifted = FALSE;
for(;;) {
sOrder = ucol_IGetNextCE(coll, sColl, status);
if(sOrder == UCOL_NO_MORE_CES) {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
break;
} else if(sOrder == 0 || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) {
continue;
} else if(isContinuation(sOrder)) {
if((sOrder & UCOL_PRIMARYMASK) > 0) {
if(sInShifted) {
sOrder = (sOrder & UCOL_PRIMARYMASK) | 0xC0;
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
continue;
} else {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
break;
}
} else {
if(sInShifted) {
continue;
} else {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
continue;
}
}
} else {
if((sOrder & UCOL_PRIMARYMASK) > LVT) {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
break;
} else {
if((sOrder & UCOL_PRIMARYMASK) > 0) {
sInShifted = TRUE;
sOrder &= UCOL_PRIMARYMASK;
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
continue;
} else {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
sInShifted = FALSE;
continue;
}
}
}
}
sOrder &= UCOL_PRIMARYMASK;
sInShifted = FALSE;
for(;;) {
tOrder = ucol_IGetNextCE(coll, tColl, status);
if(tOrder == UCOL_NO_MORE_CES) {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
break;
} else if(tOrder == 0 || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) {
continue;
} else if(isContinuation(tOrder)) {
if((tOrder & UCOL_PRIMARYMASK) > 0) {
if(tInShifted) {
tOrder = (tOrder & UCOL_PRIMARYMASK) | 0xC0;
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
continue;
} else {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
break;
}
} else {
if(tInShifted) {
continue;
} else {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
continue;
}
}
} else {
if((tOrder & UCOL_PRIMARYMASK) > LVT) {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
break;
} else {
if((tOrder & UCOL_PRIMARYMASK) > 0) {
tInShifted = TRUE;
tOrder &= UCOL_PRIMARYMASK;
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
continue;
} else {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
tInShifted = FALSE;
continue;
}
}
}
}
tOrder &= UCOL_PRIMARYMASK;
tInShifted = FALSE;
if(sOrder == tOrder) {
if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {
break;
} else {
sOrder = 0;
tOrder = 0;
continue;
}
} else {
result = (sOrder < tOrder) ? UCOL_LESS : UCOL_GREATER;
goto commonReturn;
}
}
}
uint32_t *sCE;
uint32_t *tCE;
if(checkSecTer) {
if(!isFrenchSec) {
sCE = sCEs.buf;
tCE = tCEs.buf;
for(;;) {
while (secS == 0) {
secS = *(sCE++) & UCOL_SECONDARYMASK;
}
while(secT == 0) {
secT = *(tCE++) & UCOL_SECONDARYMASK;
}
if(secS == secT) {
if(secS == UCOL_NO_MORE_CES_SECONDARY) {
break;
} else {
secS = 0; secT = 0;
continue;
}
} else {
result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
goto commonReturn;
}
}
} else {
uint32_t *sCESave = NULL;
uint32_t *tCESave = NULL;
sCE = sCEs.pos-2;
tCE = tCEs.pos-2;
for(;;) {
while (secS == 0 && sCE >= sCEs.buf) {
if(sCESave == 0) {
secS = *(sCE--);
if(isContinuation(secS)) {
while(isContinuation(secS = *(sCE--)))
;
sCESave = sCE;
sCE+=2;
}
} else {
secS = *(sCE++);
if(!isContinuation(secS)) {
sCE = sCESave;
sCESave = 0;
continue;
}
}
secS &= UCOL_SECONDARYMASK;
}
while(secT == 0 && tCE >= tCEs.buf) {
if(tCESave == 0) {
secT = *(tCE--);
if(isContinuation(secT)) {
while(isContinuation(secT = *(tCE--)))
;
tCESave = tCE;
tCE+=2;
}
} else {
secT = *(tCE++);
if(!isContinuation(secT)) {
tCE = tCESave;
tCESave = 0;
continue;
}
}
secT &= UCOL_SECONDARYMASK;
}
if(secS == secT) {
if(secS == UCOL_NO_MORE_CES_SECONDARY || (sCE < sCEs.buf && tCE < tCEs.buf)) {
break;
} else {
secS = 0; secT = 0;
continue;
}
} else {
result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
goto commonReturn;
}
}
}
}
if(checkCase) {
sCE = sCEs.buf;
tCE = tCEs.buf;
for(;;) {
while((secS & UCOL_REMOVE_CASE) == 0) {
if(!isContinuation(*sCE++)) {
secS =*(sCE-1);
if(((secS & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {
secS &= UCOL_TERT_CASE_MASK;
secS ^= caseSwitch;
} else {
secS = 0;
}
} else {
secS = 0;
}
}
while((secT & UCOL_REMOVE_CASE) == 0) {
if(!isContinuation(*tCE++)) {
secT = *(tCE-1);
if(((secT & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {
secT &= UCOL_TERT_CASE_MASK;
secT ^= caseSwitch;
} else {
secT = 0;
}
} else {
secT = 0;
}
}
if((secS & UCOL_CASE_BIT_MASK) < (secT & UCOL_CASE_BIT_MASK)) {
result = UCOL_LESS;
goto commonReturn;
} else if((secS & UCOL_CASE_BIT_MASK) > (secT & UCOL_CASE_BIT_MASK)) {
result = UCOL_GREATER;
goto commonReturn;
}
if((secS & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY || (secT & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY ) {
break;
} else {
secS = 0;
secT = 0;
}
}
}
if(checkTertiary) {
secS = 0;
secT = 0;
sCE = sCEs.buf;
tCE = tCEs.buf;
for(;;) {
while((secS & UCOL_REMOVE_CASE) == 0) {
secS = *(sCE++) & tertiaryMask;
if(!isContinuation(secS)) {
secS ^= caseSwitch;
} else {
secS &= UCOL_REMOVE_CASE;
}
}
while((secT & UCOL_REMOVE_CASE) == 0) {
secT = *(tCE++) & tertiaryMask;
if(!isContinuation(secT)) {
secT ^= caseSwitch;
} else {
secT &= UCOL_REMOVE_CASE;
}
}
if(secS == secT) {
if((secS & UCOL_REMOVE_CASE) == 1) {
break;
} else {
secS = 0; secT = 0;
continue;
}
} else {
result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
goto commonReturn;
}
}
}
if(qShifted ) {
UBool sInShifted = TRUE;
UBool tInShifted = TRUE;
secS = 0;
secT = 0;
sCE = sCEs.buf;
tCE = tCEs.buf;
for(;;) {
while(secS == 0 && secS != UCOL_NO_MORE_CES || (isContinuation(secS) && !sInShifted)) {
secS = *(sCE++);
if(isContinuation(secS)) {
if(!sInShifted) {
continue;
}
} else if(secS > LVT || (secS & UCOL_PRIMARYMASK) == 0) {
secS = UCOL_PRIMARYMASK;
sInShifted = FALSE;
} else {
sInShifted = TRUE;
}
}
secS &= UCOL_PRIMARYMASK;
while(secT == 0 && secT != UCOL_NO_MORE_CES || (isContinuation(secT) && !tInShifted)) {
secT = *(tCE++);
if(isContinuation(secT)) {
if(!tInShifted) {
continue;
}
} else if(secT > LVT || (secT & UCOL_PRIMARYMASK) == 0) {
secT = UCOL_PRIMARYMASK;
tInShifted = FALSE;
} else {
tInShifted = TRUE;
}
}
secT &= UCOL_PRIMARYMASK;
if(secS == secT) {
if(secS == UCOL_NO_MORE_CES_PRIMARY) {
break;
} else {
secS = 0; secT = 0;
continue;
}
} else {
result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
goto commonReturn;
}
}
} else if(doHiragana && hirResult != UCOL_EQUAL) {
result = hirResult;
goto commonReturn;
}
if(checkIdent)
{
result = ucol_checkIdent(sColl, tColl, TRUE, status);
}
commonReturn:
if ((sColl->flags | tColl->flags) & UCOL_ITER_ALLOCATED) {
freeHeapWritableBuffer(sColl);
freeHeapWritableBuffer(tColl);
if (sCEs.buf != sCEs.localArray ) {
uprv_free(sCEs.buf);
}
if (tCEs.buf != tCEs.localArray ) {
uprv_free(tCEs.buf);
}
}
return result;
}
static inline uint32_t
ucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
uint32_t CE, const UChar *s, int32_t *index, int32_t len)
{
const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
int32_t latinOneOffset = (CE & 0x00FFF000) >> 12;
int32_t offset = 1;
UChar schar = 0, tchar = 0;
for(;;) {
if(len == -1) {
if(s[*index] == 0) { return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
} else {
schar = s[*index];
}
} else {
if(*index == len) {
return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
} else {
schar = s[*index];
}
}
while(schar > (tchar = *(UCharOffset+offset))) {
offset++;
}
if (schar == tchar) {
(*index)++;
return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset+offset]);
}
else
{
if(schar & 0xFF00 ) {
return UCOL_BAIL_OUT_CE;
}
uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
if(isZeroCE == 0) { (*index)++;
continue;
}
return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
}
}
}
static inline UCollationResult
ucol_strcollUseLatin1( const UCollator *coll,
const UChar *source,
int32_t sLen,
const UChar *target,
int32_t tLen,
UErrorCode *status)
{
U_ALIGN_CODE(16);
int32_t strength = coll->strength;
int32_t sIndex = 0, tIndex = 0;
UChar sChar = 0, tChar = 0;
uint32_t sOrder=0, tOrder=0;
UBool endOfSource = FALSE;
uint32_t *elements = coll->latinOneCEs;
UBool haveContractions = FALSE;
for(;;) {
while(sOrder==0) { if(sLen==-1) { sChar=source[sIndex++];
if(sChar==0) {
endOfSource = TRUE;
break;
}
} else { if(sIndex==sLen) {
endOfSource = TRUE;
break;
}
sChar=source[sIndex++];
}
if(sChar&0xFF00) { goto returnRegular;
}
sOrder = elements[sChar];
if(sOrder >= UCOL_NOT_FOUND) { if(getCETag(sOrder) == CONTRACTION_TAG) {
sOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, sOrder, source, &sIndex, sLen);
haveContractions = TRUE; }
if(sOrder >= UCOL_NOT_FOUND ) {
goto returnRegular;
}
}
}
while(tOrder==0) { if(tLen==-1) { tChar=target[tIndex++];
if(tChar==0) {
if(endOfSource) { goto endOfPrimLoop;
} else {
return UCOL_GREATER;
}
}
} else { if(tIndex==tLen) {
if(endOfSource) {
goto endOfPrimLoop;
} else {
return UCOL_GREATER;
}
}
tChar=target[tIndex++];
}
if(tChar&0xFF00) { goto returnRegular;
}
tOrder = elements[tChar];
if(tOrder >= UCOL_NOT_FOUND) {
if(getCETag(tOrder) == CONTRACTION_TAG) {
tOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, tOrder, target, &tIndex, tLen);
haveContractions = TRUE;
}
if(tOrder >= UCOL_NOT_FOUND ) {
goto returnRegular;
}
}
}
if(endOfSource) { return UCOL_LESS;
}
if(sOrder == tOrder) { sOrder = 0; tOrder = 0;
continue;
} else {
if(((sOrder^tOrder)&0xFF000000)!=0) {
if(sOrder < tOrder) {
return UCOL_LESS;
} else if(sOrder > tOrder) {
return UCOL_GREATER;
}
}
sOrder<<=8;
tOrder<<=8;
}
}
endOfPrimLoop:
sLen = sIndex; tLen = tIndex;
if(strength >= UCOL_SECONDARY) {
elements += coll->latinOneTableLen;
endOfSource = FALSE;
if(coll->frenchCollation == UCOL_OFF) { sIndex = 0; tIndex = 0;
for(;;) {
while(sOrder==0) {
if(sIndex==sLen) {
endOfSource = TRUE;
break;
}
sChar=source[sIndex++];
sOrder = elements[sChar];
if(sOrder > UCOL_NOT_FOUND) {
sOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, sOrder, source, &sIndex, sLen);
}
}
while(tOrder==0) {
if(tIndex==tLen) {
if(endOfSource) {
goto endOfSecLoop;
} else {
return UCOL_GREATER;
}
}
tChar=target[tIndex++];
tOrder = elements[tChar];
if(tOrder > UCOL_NOT_FOUND) {
tOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, tOrder, target, &tIndex, tLen);
}
}
if(endOfSource) {
return UCOL_LESS;
}
if(sOrder == tOrder) {
sOrder = 0; tOrder = 0;
continue;
} else {
if(((sOrder^tOrder)&0xFF000000)!=0) {
if(sOrder < tOrder) {
return UCOL_LESS;
} else if(sOrder > tOrder) {
return UCOL_GREATER;
}
}
sOrder<<=8;
tOrder<<=8;
}
}
} else { if(haveContractions) { goto returnRegular;
}
sIndex = sLen; tIndex = tLen;
for(;;) {
while(sOrder==0) {
if(sIndex==0) {
endOfSource = TRUE;
break;
}
sChar=source[--sIndex];
sOrder = elements[sChar];
}
while(tOrder==0) {
if(tIndex==0) {
if(endOfSource) {
goto endOfSecLoop;
} else {
return UCOL_GREATER;
}
}
tChar=target[--tIndex];
tOrder = elements[tChar];
}
if(endOfSource) {
return UCOL_LESS;
}
if(sOrder == tOrder) {
sOrder = 0; tOrder = 0;
continue;
} else {
if(((sOrder^tOrder)&0xFF000000)!=0) {
if(sOrder < tOrder) {
return UCOL_LESS;
} else if(sOrder > tOrder) {
return UCOL_GREATER;
}
}
sOrder<<=8;
tOrder<<=8;
}
}
}
}
endOfSecLoop:
if(strength >= UCOL_TERTIARY) {
elements += coll->latinOneTableLen;
sIndex = 0; tIndex = 0;
endOfSource = FALSE;
for(;;) {
while(sOrder==0) {
if(sIndex==sLen) {
endOfSource = TRUE;
break;
}
sChar=source[sIndex++];
sOrder = elements[sChar];
if(sOrder > UCOL_NOT_FOUND) {
sOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, sOrder, source, &sIndex, sLen);
}
}
while(tOrder==0) {
if(tIndex==tLen) {
if(endOfSource) {
return UCOL_EQUAL; } else {
return UCOL_GREATER;
}
}
tChar=target[tIndex++];
tOrder = elements[tChar];
if(tOrder > UCOL_NOT_FOUND) {
tOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, tOrder, target, &tIndex, tLen);
}
}
if(endOfSource) {
return UCOL_LESS;
}
if(sOrder == tOrder) {
sOrder = 0; tOrder = 0;
continue;
} else {
if(((sOrder^tOrder)&0xff000000)!=0) {
if(sOrder < tOrder) {
return UCOL_LESS;
} else if(sOrder > tOrder) {
return UCOL_GREATER;
}
}
sOrder<<=8;
tOrder<<=8;
}
}
}
return UCOL_EQUAL;
returnRegular:
collIterate sColl, tColl;
IInit_collIterate(coll, source, sLen, &sColl);
IInit_collIterate(coll, target, tLen, &tColl);
return ucol_strcollRegular(&sColl, &tColl, status);
}
U_CAPI UCollationResult U_EXPORT2
ucol_strcollIter( const UCollator *coll,
UCharIterator *sIter,
UCharIterator *tIter,
UErrorCode *status)
{
if(!status || U_FAILURE(*status)) {
return UCOL_EQUAL;
}
UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
if (sIter == tIter) {
UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
return UCOL_EQUAL;
}
if(sIter == NULL || tIter == NULL || coll == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
return UCOL_EQUAL;
}
UCollationResult result = UCOL_EQUAL;
collIterate sColl, tColl;
UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
UNormIterator *sNormIter = NULL, *tNormIter = NULL;
IInit_collIterate(coll, NULL, -1, &sColl);
sColl.iterator = sIter;
sColl.flags |= UCOL_USE_ITERATOR;
IInit_collIterate(coll, NULL, -1, &tColl);
tColl.flags |= UCOL_USE_ITERATOR;
tColl.iterator = tIter;
if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) {
sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status);
sColl.flags &= ~UCOL_ITER_NORM;
tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status);
tColl.flags &= ~UCOL_ITER_NORM;
}
UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL;
while((sChar = sColl.iterator->next(sColl.iterator)) ==
(tChar = tColl.iterator->next(tColl.iterator))) {
if(sChar == U_SENTINEL) {
result = UCOL_EQUAL;
goto end_compare;
}
}
if(sChar == U_SENTINEL) {
tChar = tColl.iterator->previous(tColl.iterator);
}
if(tChar == U_SENTINEL) {
sChar = sColl.iterator->previous(sColl.iterator);
}
sChar = sColl.iterator->previous(sColl.iterator);
tChar = tColl.iterator->previous(tColl.iterator);
if (ucol_unsafeCP((UChar)sChar, coll) || ucol_unsafeCP((UChar)tChar, coll))
{
do
{
sChar = sColl.iterator->previous(sColl.iterator);
tChar = tColl.iterator->previous(tColl.iterator);
}
while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll));
}
if(U_SUCCESS(*status)) {
result = ucol_strcollRegular(&sColl, &tColl, status);
}
end_compare:
if(sNormIter || tNormIter) {
unorm_closeIter(sNormIter);
unorm_closeIter(tNormIter);
}
UTRACE_EXIT_VALUE_STATUS(result, *status)
return result;
}
U_CAPI UCollationResult U_EXPORT2
ucol_strcoll( const UCollator *coll,
const UChar *source,
int32_t sourceLength,
const UChar *target,
int32_t targetLength)
{
U_ALIGN_CODE(16);
UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
}
if(source == NULL || target == NULL) {
UTRACE_EXIT_VALUE(UCOL_EQUAL);
return UCOL_EQUAL;
}
if (source==target && sourceLength==targetLength) {
UTRACE_EXIT_VALUE(UCOL_EQUAL);
return UCOL_EQUAL;
}
const UChar *pSrc = source;
const UChar *pTarg = target;
int32_t equalLength;
if (sourceLength == -1 && targetLength == -1) {
while (*pSrc == *pTarg && *pSrc != 0) {
pSrc++;
pTarg++;
}
if (*pSrc == 0 && *pTarg == 0) {
UTRACE_EXIT_VALUE(UCOL_EQUAL);
return UCOL_EQUAL;
}
equalLength = pSrc - source;
}
else
{
const UChar *pSrcEnd = source + sourceLength;
const UChar *pTargEnd = target + targetLength;
for (;;) {
if (pSrc == pSrcEnd || pTarg == pTargEnd) {
break;
}
if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLength == -1)) {
break;
}
if (*pSrc != *pTarg) {
break;
}
pSrc++;
pTarg++;
}
equalLength = pSrc - source;
if ((pSrc ==pSrcEnd || (pSrcEnd <pSrc && *pSrc==0)) &&
(pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)))
{
UTRACE_EXIT_VALUE(UCOL_EQUAL);
return UCOL_EQUAL;
}
}
if (equalLength > 0) {
if (pSrc != source+sourceLength && ucol_unsafeCP(*pSrc, coll) ||
pTarg != target+targetLength && ucol_unsafeCP(*pTarg, coll))
{
do
{
equalLength--;
pSrc--;
}
while (equalLength>0 && ucol_unsafeCP(*pSrc, coll));
}
source += equalLength;
target += equalLength;
if (sourceLength > 0) {
sourceLength -= equalLength;
}
if (targetLength > 0) {
targetLength -= equalLength;
}
}
UErrorCode status = U_ZERO_ERROR;
UCollationResult returnVal;
if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLength > 0 && *target&0xff00)) {
collIterate sColl, tColl;
IInit_collIterate(coll, source, sourceLength, &sColl);
IInit_collIterate(coll, target, targetLength, &tColl);
returnVal = ucol_strcollRegular(&sColl, &tColl, &status);
} else {
returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status);
}
UTRACE_EXIT_VALUE(returnVal);
return returnVal;
}
U_CAPI UBool U_EXPORT2
ucol_greater( const UCollator *coll,
const UChar *source,
int32_t sourceLength,
const UChar *target,
int32_t targetLength)
{
return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
== UCOL_GREATER);
}
U_CAPI UBool U_EXPORT2
ucol_greaterOrEqual( const UCollator *coll,
const UChar *source,
int32_t sourceLength,
const UChar *target,
int32_t targetLength)
{
return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
!= UCOL_LESS);
}
U_CAPI UBool U_EXPORT2
ucol_equal( const UCollator *coll,
const UChar *source,
int32_t sourceLength,
const UChar *target,
int32_t targetLength)
{
return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
== UCOL_EQUAL);
}
U_CAPI void U_EXPORT2
ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
if(coll && coll->UCA) {
uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo));
}
}
#endif