#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#include "unicode/ucoleitr.h"
#include "unicode/uchar.h"
#include "ucol_bld.h"
#include "ucln_in.h"
#include "umutex.h"
#include "unicode/uniset.h"
static const InverseUCATableHeader* _staticInvUCA = NULL;
static UDataMemory* invUCA_DATA_MEM = NULL;
U_CDECL_BEGIN
static UBool U_CALLCONV
isAcceptableInvUCA(void * ,
const char * , const char * ,
const UDataInfo *pInfo){
if( pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->dataFormat[0]==INVUCA_DATA_FORMAT_0 &&
pInfo->dataFormat[1]==INVUCA_DATA_FORMAT_1 &&
pInfo->dataFormat[2]==INVUCA_DATA_FORMAT_2 &&
pInfo->dataFormat[3]==INVUCA_DATA_FORMAT_3 &&
pInfo->formatVersion[0]==INVUCA_FORMAT_VERSION_0 &&
pInfo->formatVersion[1]>=INVUCA_FORMAT_VERSION_1 ) {
UVersionInfo UCDVersion;
u_getUnicodeVersion(UCDVersion);
if(pInfo->dataVersion[0]==UCDVersion[0] &&
pInfo->dataVersion[1]==UCDVersion[1]) {
return TRUE;
} else {
return FALSE;
}
} else {
return FALSE;
}
}
U_CDECL_END
static int32_t compareCEs(uint32_t source0, uint32_t source1, uint32_t target0, uint32_t target1) {
uint32_t s1 = source0, s2, t1 = target0, t2;
if(isContinuation(source1)) {
s2 = source1;
} else {
s2 = 0;
}
if(isContinuation(target1)) {
t2 = target1;
} else {
t2 = 0;
}
uint32_t s = 0, t = 0;
if(s1 == t1 && s2 == t2) {
return 0;
}
s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
if(s < t) {
return -1;
} else if(s > t) {
return 1;
} else {
s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
if(s < t) {
return -1;
} else if(s > t) {
return 1;
} else {
s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
if(s < t) {
return -1;
} else {
return 1;
}
}
}
}
static
int32_t ucol_inv_findCE(const UColTokenParser *src, uint32_t CE, uint32_t SecondCE) {
uint32_t bottom = 0, top = src->invUCA->tableSize;
uint32_t i = 0;
uint32_t first = 0, second = 0;
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
int32_t res = 0;
while(bottom < top-1) {
i = (top+bottom)/2;
first = *(CETable+3*i);
second = *(CETable+3*i+1);
res = compareCEs(first, second, CE, SecondCE);
if(res > 0) {
top = i;
} else if(res < 0) {
bottom = i;
} else {
break;
}
}
return i;
}
static const uint32_t strengthMask[UCOL_CE_STRENGTH_LIMIT] = {
0xFFFF0000,
0xFFFFFF00,
0xFFFFFFFF
};
U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
uint32_t CE, uint32_t contCE,
uint32_t *nextCE, uint32_t *nextContCE,
uint32_t strength) {
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
int32_t iCE;
iCE = ucol_inv_findCE(src, CE, contCE);
if(iCE<0) {
*nextCE = UCOL_NOT_FOUND;
return -1;
}
CE &= strengthMask[strength];
contCE &= strengthMask[strength];
*nextCE = CE;
*nextContCE = contCE;
while((*nextCE & strengthMask[strength]) == CE
&& (*nextContCE & strengthMask[strength]) == contCE) {
*nextCE = (*(CETable+3*(++iCE)));
*nextContCE = (*(CETable+3*(iCE)+1));
}
return iCE;
}
U_CAPI int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
uint32_t CE, uint32_t contCE,
uint32_t *prevCE, uint32_t *prevContCE,
uint32_t strength) {
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
int32_t iCE;
iCE = ucol_inv_findCE(src, CE, contCE);
if(iCE<0) {
*prevCE = UCOL_NOT_FOUND;
return -1;
}
CE &= strengthMask[strength];
contCE &= strengthMask[strength];
*prevCE = CE;
*prevContCE = contCE;
while((*prevCE & strengthMask[strength]) == CE
&& (*prevContCE & strengthMask[strength])== contCE
&& iCE > 0) {
*prevCE = (*(CETable+3*(--iCE)));
*prevContCE = (*(CETable+3*(iCE)+1));
}
return iCE;
}
U_CAPI uint32_t U_EXPORT2 ucol_getCEStrengthDifference(uint32_t CE, uint32_t contCE,
uint32_t prevCE, uint32_t prevContCE)
{
if(prevCE == CE && prevContCE == contCE) {
return UCOL_IDENTICAL;
}
if((prevCE & strengthMask[UCOL_PRIMARY]) != (CE & strengthMask[UCOL_PRIMARY])
|| (prevContCE & strengthMask[UCOL_PRIMARY]) != (contCE & strengthMask[UCOL_PRIMARY])) {
return UCOL_PRIMARY;
}
if((prevCE & strengthMask[UCOL_SECONDARY]) != (CE & strengthMask[UCOL_SECONDARY])
|| (prevContCE & strengthMask[UCOL_SECONDARY]) != (contCE & strengthMask[UCOL_SECONDARY])) {
return UCOL_SECONDARY;
}
return UCOL_TERTIARY;
}
static
inline int32_t ucol_inv_getPrevious(UColTokenParser *src, UColTokListHeader *lh, uint32_t strength) {
uint32_t CE = lh->baseCE;
uint32_t SecondCE = lh->baseContCE;
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
uint32_t previousCE, previousContCE;
int32_t iCE;
iCE = ucol_inv_findCE(src, CE, SecondCE);
if(iCE<0) {
return -1;
}
CE &= strengthMask[strength];
SecondCE &= strengthMask[strength];
previousCE = CE;
previousContCE = SecondCE;
while((previousCE & strengthMask[strength]) == CE && (previousContCE & strengthMask[strength])== SecondCE) {
previousCE = (*(CETable+3*(--iCE)));
previousContCE = (*(CETable+3*(iCE)+1));
}
lh->previousCE = previousCE;
lh->previousContCE = previousContCE;
return iCE;
}
static
inline int32_t ucol_inv_getNext(UColTokenParser *src, UColTokListHeader *lh, uint32_t strength) {
uint32_t CE = lh->baseCE;
uint32_t SecondCE = lh->baseContCE;
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
uint32_t nextCE, nextContCE;
int32_t iCE;
iCE = ucol_inv_findCE(src, CE, SecondCE);
if(iCE<0) {
return -1;
}
CE &= strengthMask[strength];
SecondCE &= strengthMask[strength];
nextCE = CE;
nextContCE = SecondCE;
while((nextCE & strengthMask[strength]) == CE
&& (nextContCE & strengthMask[strength]) == SecondCE) {
nextCE = (*(CETable+3*(++iCE)));
nextContCE = (*(CETable+3*(iCE)+1));
}
lh->nextCE = nextCE;
lh->nextContCE = nextContCE;
return iCE;
}
U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *lh, UErrorCode *status) {
int32_t i = 0;
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
uint32_t st = 0;
uint32_t t1, t2;
int32_t pos;
UColToken *tok = lh->first;
uint32_t tokStrength = tok->strength;
for(i = 0; i<3; i++) {
lh->gapsHi[3*i] = 0;
lh->gapsHi[3*i+1] = 0;
lh->gapsHi[3*i+2] = 0;
lh->gapsLo[3*i] = 0;
lh->gapsLo[3*i+1] = 0;
lh->gapsLo[3*i+2] = 0;
lh->numStr[i] = 0;
lh->fStrToken[i] = NULL;
lh->lStrToken[i] = NULL;
lh->pos[i] = -1;
}
UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts);
if((lh->baseCE & 0xFF000000)>= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && (lh->baseCE & 0xFF000000) <= (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) {
lh->pos[0] = 0;
t1 = lh->baseCE;
t2 = lh->baseContCE & UCOL_REMOVE_CONTINUATION;
lh->gapsLo[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
lh->gapsLo[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
lh->gapsLo[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
uint32_t primaryCE = t1 & UCOL_PRIMARYMASK | (t2 & UCOL_PRIMARYMASK) >> 16;
primaryCE = uprv_uca_getImplicitFromRaw(uprv_uca_getRawFromImplicit(primaryCE)+1);
t1 = primaryCE & UCOL_PRIMARYMASK | 0x0505;
t2 = (primaryCE << 16) & UCOL_PRIMARYMASK;
lh->gapsHi[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
lh->gapsHi[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
lh->gapsHi[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
} else if(lh->indirect == TRUE && lh->nextCE != 0) {
lh->pos[0] = 0;
t1 = lh->baseCE;
t2 = lh->baseContCE&UCOL_REMOVE_CONTINUATION;
lh->gapsLo[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
lh->gapsLo[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
lh->gapsLo[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
t1 = lh->nextCE;
t2 = lh->nextContCE&UCOL_REMOVE_CONTINUATION;
lh->gapsHi[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
lh->gapsHi[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
lh->gapsHi[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
} else {
for(;;) {
if(tokStrength < UCOL_CE_STRENGTH_LIMIT) {
if((lh->pos[tokStrength] = ucol_inv_getNext(src, lh, tokStrength)) >= 0) {
lh->fStrToken[tokStrength] = tok;
} else {
*status = U_INTERNAL_PROGRAM_ERROR;
}
}
while(tok != NULL && tok->strength >= tokStrength) {
if(tokStrength < UCOL_CE_STRENGTH_LIMIT) {
lh->lStrToken[tokStrength] = tok;
}
tok = tok->next;
}
if(tokStrength < UCOL_CE_STRENGTH_LIMIT-1) {
if(lh->pos[tokStrength] == lh->pos[tokStrength+1]) {
lh->fStrToken[tokStrength] = lh->fStrToken[tokStrength+1];
lh->fStrToken[tokStrength+1] = NULL;
lh->lStrToken[tokStrength+1] = NULL;
lh->pos[tokStrength+1] = -1;
}
}
if(tok != NULL) {
tokStrength = tok->strength;
} else {
break;
}
}
for(st = 0; st < 3; st++) {
if((pos = lh->pos[st]) >= 0) {
t1 = *(CETable+3*(pos));
t2 = *(CETable+3*(pos)+1);
lh->gapsHi[3*st] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
lh->gapsHi[3*st+1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
lh->gapsHi[3*st+2] = (t1&0x3f) << 24 | (t2&0x3f) << 16;
t1 = lh->baseCE;
t2 = lh->baseContCE;
lh->gapsLo[3*st] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
lh->gapsLo[3*st+1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
lh->gapsLo[3*st+2] = (t1&0x3f) << 24 | (t2&0x3f) << 16;
}
}
}
}
#define ucol_countBytes(value, noOfBytes) \
{ \
uint32_t mask = 0xFFFFFFFF; \
(noOfBytes) = 0; \
while(mask != 0) { \
if(((value) & mask) != 0) { \
(noOfBytes)++; \
} \
mask >>= 8; \
} \
}
U_CFUNC uint32_t ucol_getNextGenerated(ucolCEGenerator *g, UErrorCode *status) {
if(U_SUCCESS(*status)) {
g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);
}
return g->current;
}
U_CFUNC uint32_t ucol_getSimpleCEGenerator(ucolCEGenerator *g, UColToken *tok, uint32_t strength, UErrorCode *status) {
uint32_t high, low, count=1;
uint32_t maxByte = (strength == UCOL_TERTIARY)?0x3F:0xFF;
if(strength == UCOL_SECONDARY) {
low = UCOL_COMMON_TOP2<<24;
high = 0xFFFFFFFF;
count = 0xFF - UCOL_COMMON_TOP2;
} else {
low = UCOL_BYTE_COMMON << 24; high = 0x40000000;
count = 0x40 - UCOL_BYTE_COMMON;
}
if(tok->next != NULL && tok->next->strength == strength) {
count = tok->next->toInsert;
}
g->noOfRanges = ucol_allocWeights(low, high, count, maxByte, g->ranges);
g->current = UCOL_BYTE_COMMON<<24;
if(g->noOfRanges == 0) {
*status = U_INTERNAL_PROGRAM_ERROR;
}
return g->current;
}
U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_t* highs, UColToken *tok, uint32_t fStrength, UErrorCode *status) {
uint32_t strength = tok->strength;
uint32_t low = lows[fStrength*3+strength];
uint32_t high = highs[fStrength*3+strength];
uint32_t maxByte = 0;
if(strength == UCOL_TERTIARY) {
maxByte = 0x3F;
} else if(strength == UCOL_PRIMARY) {
maxByte = 0xFE;
} else {
maxByte = 0xFF;
}
uint32_t count = tok->toInsert;
if(low >= high && strength > UCOL_PRIMARY) {
int32_t s = strength;
for(;;) {
s--;
if(lows[fStrength*3+s] != highs[fStrength*3+s]) {
if(strength == UCOL_SECONDARY) {
low = UCOL_COMMON_TOP2<<24;
high = 0xFFFFFFFF;
} else {
high = 0x40000000;
}
break;
}
if(s<0) {
*status = U_INTERNAL_PROGRAM_ERROR;
return 0;
}
}
}
if(low == 0) {
low = 0x01000000;
}
if(strength == UCOL_SECONDARY) {
if(low >= (UCOL_COMMON_BOT2<<24) && low < (uint32_t)(UCOL_COMMON_TOP2<<24)) {
low = UCOL_COMMON_TOP2<<24;
}
if(high > (UCOL_COMMON_BOT2<<24) && high < (uint32_t)(UCOL_COMMON_TOP2<<24)) {
high = UCOL_COMMON_TOP2<<24;
}
if(low < (UCOL_COMMON_BOT2<<24)) {
g->noOfRanges = ucol_allocWeights(UCOL_BYTE_UNSHIFTED_MIN<<24, high, count, maxByte, g->ranges);
g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);
return g->current;
}
}
g->noOfRanges = ucol_allocWeights(low, high, count, maxByte, g->ranges);
if(g->noOfRanges == 0) {
*status = U_INTERNAL_PROGRAM_ERROR;
}
g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);
return g->current;
}
static
uint32_t u_toLargeKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) {
uint32_t i = 0;
UChar c;
if(U_FAILURE(*status)) {
return 0;
}
if(sourceLen > resLen) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
for(i = 0; i < sourceLen; i++) {
c = source[i];
if(0x3042 < c && c < 0x30ef) {
switch(c - 0x3000) {
case 0x41: case 0x43: case 0x45: case 0x47: case 0x49: case 0x63: case 0x83: case 0x85: case 0x8E:
case 0xA1: case 0xA3: case 0xA5: case 0xA7: case 0xA9: case 0xC3: case 0xE3: case 0xE5: case 0xEE:
c++;
break;
case 0xF5:
c = 0x30AB;
break;
case 0xF6:
c = 0x30B1;
break;
}
}
resBuf[i] = c;
}
return sourceLen;
}
static
uint32_t u_toSmallKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) {
uint32_t i = 0;
UChar c;
if(U_FAILURE(*status)) {
return 0;
}
if(sourceLen > resLen) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
for(i = 0; i < sourceLen; i++) {
c = source[i];
if(0x3042 < c && c < 0x30ef) {
switch(c - 0x3000) {
case 0x42: case 0x44: case 0x46: case 0x48: case 0x4A: case 0x64: case 0x84: case 0x86: case 0x8F:
case 0xA2: case 0xA4: case 0xA6: case 0xA8: case 0xAA: case 0xC4: case 0xE4: case 0xE6: case 0xEF:
c--;
break;
case 0xAB:
c = 0x30F5;
break;
case 0xB1:
c = 0x30F6;
break;
}
}
resBuf[i] = c;
}
return sourceLen;
}
static
uint8_t ucol_uprv_getCaseBits(const UCollator *UCA, const UChar *src, uint32_t len, UErrorCode *status) {
uint32_t i = 0;
UChar n[128];
uint32_t nLen = 0;
uint32_t uCount = 0, lCount = 0;
collIterate s;
uint32_t order = 0;
if(U_FAILURE(*status)) {
return UCOL_LOWER_CASE;
}
nLen = unorm_normalize(src, len, UNORM_NFKD, 0, n, 128, status);
if(U_SUCCESS(*status)) {
for(i = 0; i < nLen; i++) {
uprv_init_collIterate(UCA, &n[i], 1, &s);
order = ucol_getNextCE(UCA, &s, status);
if(isContinuation(order)) {
*status = U_INTERNAL_PROGRAM_ERROR;
return UCOL_LOWER_CASE;
}
if((order&UCOL_CASE_BIT_MASK)== UCOL_UPPER_CASE) {
uCount++;
} else {
if(u_islower(n[i])) {
lCount++;
} else {
UChar sk[1], lk[1];
u_toSmallKana(&n[i], 1, sk, 1, status);
u_toLargeKana(&n[i], 1, lk, 1, status);
if(sk[0] == n[i] && lk[0] != n[i]) {
lCount++;
}
}
}
}
}
if(uCount != 0 && lCount != 0) {
return UCOL_MIXED_CASE;
} else if(uCount != 0) {
return UCOL_UPPER_CASE;
} else {
return UCOL_LOWER_CASE;
}
}
U_CFUNC void ucol_doCE(UColTokenParser *src, uint32_t *CEparts, UColToken *tok, UErrorCode *status) {
uint32_t noOfBytes[3];
uint32_t i;
for(i = 0; i<3; i++) {
ucol_countBytes(CEparts[i], noOfBytes[i]);
}
uint32_t CEi = 0;
uint32_t value = 0;
while(2*CEi<noOfBytes[0] || CEi<noOfBytes[1] || CEi<noOfBytes[2]) {
if(CEi > 0) {
value = UCOL_CONTINUATION_MARKER;
} else {
value = 0;
}
if(2*CEi<noOfBytes[0]) {
value |= ((CEparts[0]>>(32-16*(CEi+1))) & 0xFFFF) << 16;
}
if(CEi<noOfBytes[1]) {
value |= ((CEparts[1]>>(32-8*(CEi+1))) & 0xFF) << 8;
}
if(CEi<noOfBytes[2]) {
value |= ((CEparts[2]>>(32-8*(CEi+1))) & 0x3F);
}
tok->CEs[CEi] = value;
CEi++;
}
if(CEi == 0) {
tok->noOfCEs = 1;
tok->CEs[0] = 0;
} else {
tok->noOfCEs = CEi;
}
if(tok->CEs[0] != 0) { tok->CEs[0] &= 0xFFFFFF3F; int32_t cSize = (tok->source & 0xFF000000) >> 24;
UChar *cPoints = (tok->source & 0x00FFFFFF) + src->source;
if(cSize > 1) {
tok->CEs[0] |= ucol_uprv_getCaseBits(src->UCA, cPoints, cSize, status);
} else {
uint32_t caseCE = ucol_getFirstCE(src->UCA, cPoints[0], status);
tok->CEs[0] |= (caseCE & 0xC0);
}
}
#if UCOL_DEBUG==2
fprintf(stderr, "%04X str: %i, [%08X, %08X, %08X]: tok: ", tok->debugSource, tok->strength, CEparts[0] >> (32-8*noOfBytes[0]), CEparts[1] >> (32-8*noOfBytes[1]), CEparts[2]>> (32-8*noOfBytes[2]));
for(i = 0; i<tok->noOfCEs; i++) {
fprintf(stderr, "%08X ", tok->CEs[i]);
}
fprintf(stderr, "\n");
#endif
}
U_CFUNC void ucol_initBuffers(UColTokenParser *src, UColTokListHeader *lh, UErrorCode *status) {
ucolCEGenerator Gens[UCOL_CE_STRENGTH_LIMIT];
uint32_t CEparts[UCOL_CE_STRENGTH_LIMIT];
UColToken *tok = lh->last;
uint32_t t[UCOL_STRENGTH_LIMIT];
uprv_memset(t, 0, UCOL_STRENGTH_LIMIT*sizeof(uint32_t));
tok->toInsert = 1;
t[tok->strength] = 1;
while(tok->previous != NULL) {
if(tok->previous->strength < tok->strength) {
t[tok->strength] = 0;
t[tok->previous->strength]++;
} else if(tok->previous->strength > tok->strength) {
t[tok->previous->strength] = 1;
} else {
t[tok->strength]++;
}
tok=tok->previous;
tok->toInsert = t[tok->strength];
}
tok->toInsert = t[tok->strength];
ucol_inv_getGapPositions(src, lh, status);
#if UCOL_DEBUG
fprintf(stderr, "BaseCE: %08X %08X\n", lh->baseCE, lh->baseContCE);
int32_t j = 2;
for(j = 2; j >= 0; j--) {
fprintf(stderr, "gapsLo[%i] [%08X %08X %08X]\n", j, lh->gapsLo[j*3], lh->gapsLo[j*3+1], lh->gapsLo[j*3+2]);
fprintf(stderr, "gapsHi[%i] [%08X %08X %08X]\n", j, lh->gapsHi[j*3], lh->gapsHi[j*3+1], lh->gapsHi[j*3+2]);
}
tok=lh->first[UCOL_TOK_POLARITY_POSITIVE];
do {
fprintf(stderr,"%i", tok->strength);
tok = tok->next;
} while(tok != NULL);
fprintf(stderr, "\n");
tok=lh->first[UCOL_TOK_POLARITY_POSITIVE];
do {
fprintf(stderr,"%i", tok->toInsert);
tok = tok->next;
} while(tok != NULL);
#endif
tok = lh->first;
uint32_t fStrength = UCOL_IDENTICAL;
uint32_t initStrength = UCOL_IDENTICAL;
CEparts[UCOL_PRIMARY] = (lh->baseCE & UCOL_PRIMARYMASK) | (lh->baseContCE & UCOL_PRIMARYMASK) >> 16;
CEparts[UCOL_SECONDARY] = (lh->baseCE & UCOL_SECONDARYMASK) << 16 | (lh->baseContCE & UCOL_SECONDARYMASK) << 8;
CEparts[UCOL_TERTIARY] = (UCOL_TERTIARYORDER(lh->baseCE)) << 24 | (UCOL_TERTIARYORDER(lh->baseContCE)) << 16;
while (tok != NULL && U_SUCCESS(*status)) {
fStrength = tok->strength;
if(fStrength < initStrength) {
initStrength = fStrength;
if(lh->pos[fStrength] == -1) {
while(lh->pos[fStrength] == -1 && fStrength > 0) {
fStrength--;
}
if(lh->pos[fStrength] == -1) {
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
}
if(initStrength == UCOL_TERTIARY) {
CEparts[UCOL_PRIMARY] = lh->gapsLo[fStrength*3];
CEparts[UCOL_SECONDARY] = lh->gapsLo[fStrength*3+1];
CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[UCOL_TERTIARY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);
} else if(initStrength == UCOL_SECONDARY) {
CEparts[UCOL_PRIMARY] = lh->gapsLo[fStrength*3];
CEparts[UCOL_SECONDARY] = ucol_getCEGenerator(&Gens[UCOL_SECONDARY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);
CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TERTIARY], tok, UCOL_TERTIARY, status);
} else {
CEparts[UCOL_PRIMARY] = ucol_getCEGenerator(&Gens[UCOL_PRIMARY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);
CEparts[UCOL_SECONDARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_SECONDARY], tok, UCOL_SECONDARY, status);
CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TERTIARY], tok, UCOL_TERTIARY, status);
}
} else {
if(tok->strength == UCOL_TERTIARY) {
CEparts[UCOL_TERTIARY] = ucol_getNextGenerated(&Gens[UCOL_TERTIARY], status);
} else if(tok->strength == UCOL_SECONDARY) {
CEparts[UCOL_SECONDARY] = ucol_getNextGenerated(&Gens[UCOL_SECONDARY], status);
CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TERTIARY], tok, UCOL_TERTIARY, status);
} else if(tok->strength == UCOL_PRIMARY) {
CEparts[UCOL_PRIMARY] = ucol_getNextGenerated(&Gens[UCOL_PRIMARY], status);
CEparts[UCOL_SECONDARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_SECONDARY], tok, UCOL_SECONDARY, status);
CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TERTIARY], tok, UCOL_TERTIARY, status);
}
}
ucol_doCE(src, CEparts, tok, status);
tok = tok->next;
}
}
U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokListHeader *lh, UErrorCode *status) {
UCAElements el;
UColToken *tok = lh->first;
UColToken *expt = NULL;
uint32_t i = 0, j = 0;
while(tok != NULL && U_SUCCESS(*status)) {
if(tok->expansion != 0) {
uint32_t len = tok->expansion >> 24;
uint32_t currentSequenceLen = len;
uint32_t expOffset = tok->expansion & 0x00FFFFFF;
UColToken exp;
exp.source = currentSequenceLen | expOffset;
exp.rulesToParse = src->source;
while(len > 0) {
currentSequenceLen = len;
while(currentSequenceLen > 0) {
exp.source = (currentSequenceLen << 24) | expOffset;
if((expt = (UColToken *)uhash_get(src->tailored, &exp)) != NULL && expt->strength != UCOL_TOK_RESET) {
uint32_t noOfCEsToCopy = expt->noOfCEs;
for(j = 0; j<noOfCEsToCopy; j++) {
tok->expCEs[tok->noOfExpCEs + j] = expt->CEs[j];
}
tok->noOfExpCEs += noOfCEsToCopy;
expOffset += currentSequenceLen; len -= currentSequenceLen; break;
} else {
currentSequenceLen--;
}
}
if(currentSequenceLen == 0) {
collIterate s;
uint32_t order = 0;
uprv_init_collIterate(src->UCA, expOffset + src->source, 1, &s);
for(;;) {
order = ucol_getNextCE(src->UCA, &s, status);
if(order == UCOL_NO_MORE_CES) {
break;
}
tok->expCEs[tok->noOfExpCEs++] = order;
}
expOffset++;
len--;
}
}
} else {
tok->noOfExpCEs = 0;
}
el.noOfCEs = tok->noOfCEs + tok->noOfExpCEs;
for(i = 0; i<tok->noOfCEs; i++) {
el.CEs[i] = tok->CEs[i];
}
for(i = 0; i<tok->noOfExpCEs; i++) {
el.CEs[i+tok->noOfCEs] = tok->expCEs[i];
}
el.prefix = el.prefixChars;
el.cPoints = el.uchars;
if(tok->prefix != 0) { el.prefixSize = tok->prefix>>24;
uprv_memcpy(el.prefix, src->source + (tok->prefix & 0x00FFFFFF), el.prefixSize*sizeof(UChar));
el.cSize = (tok->source >> 24)-(tok->prefix>>24);
uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF)+(tok->prefix>>24) + src->source, el.cSize*sizeof(UChar));
} else {
el.prefixSize = 0;
*el.prefix = 0;
el.cSize = (tok->source >> 24);
uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el.cSize*sizeof(UChar));
}
if(src->UCA != NULL) {
for(i = 0; i<el.cSize; i++) {
if(UCOL_ISJAMO(el.cPoints[i])) {
t->image->jamoSpecial = TRUE;
}
}
}
#if UCOL_DEBUG==2
fprintf(stderr, "Adding: %04X with %08X\n", el.cPoints[0], el.CEs[0]);
#endif
uprv_uca_addAnElement(t, &el, status);
#if UCOL_DEBUG_DUPLICATES
if(*status != U_ZERO_ERROR) {
fprintf(stderr, "replaced CE for %04X with CE for %04X\n", el.cPoints[0], tok->debugSource);
*status = U_ZERO_ERROR;
}
#endif
tok = tok->next;
}
}
U_CDECL_BEGIN
static UBool U_CALLCONV
_processUCACompleteIgnorables(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
UErrorCode status = U_ZERO_ERROR;
tempUCATable *t = (tempUCATable *)context;
if(value == 0) {
while(start < limit) {
uint32_t CE = utrie_get32(t->mapping, start, NULL);
if(CE == UCOL_NOT_FOUND) {
UCAElements el;
el.isThai = FALSE;
el.prefixSize = 0;
el.prefixChars[0] = 0;
el.prefix = el.prefixChars;
el.cPoints = el.uchars;
el.cSize = 0;
UTF_APPEND_CHAR(el.uchars, el.cSize, 1024, start);
el.noOfCEs = 1;
el.CEs[0] = 0;
uprv_uca_addAnElement(t, &el, &status);
}
start++;
}
}
if(U_FAILURE(status)) {
return FALSE;
} else {
return TRUE;
}
}
U_CDECL_END
static void
ucol_uprv_bld_copyRangeFromUCA(UColTokenParser *src, tempUCATable *t,
UChar32 start, UChar32 end,
UErrorCode *status) {
uint32_t CE = UCOL_NOT_FOUND;
UChar32 u = 0;
UCAElements el;
el.isThai = FALSE;
el.prefixSize = 0;
el.prefixChars[0] = 0;
collIterate colIt;
if(U_SUCCESS(*status)) {
for(u = start; u<=end; u++) {
if((CE = utrie_get32(t->mapping, u, NULL)) == UCOL_NOT_FOUND
|| ((isCntTableElement(CE)) &&
(uprv_cnttab_getCE(t->contractions, CE, 0, status) == UCOL_NOT_FOUND))
) {
el.cSize = 0;
U16_APPEND_UNSAFE(el.uchars, el.cSize, u);
el.cPoints = el.uchars;
el.noOfCEs = 0;
el.prefix = el.prefixChars;
el.prefixSize = 0;
{
uprv_init_collIterate(src->UCA, el.uchars, el.cSize, &colIt);
while(CE != UCOL_NO_MORE_CES) {
CE = ucol_getNextCE(src->UCA, &colIt, status);
if(CE != UCOL_NO_MORE_CES) {
el.CEs[el.noOfCEs++] = CE;
}
}
}
uprv_uca_addAnElement(t, &el, status);
}
}
}
}
UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *status) {
uint32_t i = 0;
if(U_FAILURE(*status)) {
return NULL;
}
UCATableHeader *image = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
if (image == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(image, src->UCA->image, sizeof(UCATableHeader));
for(i = 0; i<src->resultLen; i++) {
if(U_SUCCESS(*status)) {
if(src->lh[i].first) { ucol_initBuffers(src, &src->lh[i], status);
}
}
if(U_FAILURE(*status)) {
return NULL;
}
}
if(src->varTop != NULL) {
src->opts->variableTopValue = (*(src->varTop->CEs))>>16;
if(src->varTop->listHeader->first == src->varTop) {
src->varTop->listHeader->first = src->varTop->next;
}
if(src->varTop->listHeader->last == src->varTop) {
src->varTop->listHeader->last = src->varTop->previous;
}
if(src->varTop->next != NULL) {
src->varTop->next->previous = src->varTop->previous;
}
if(src->varTop->previous != NULL) {
src->varTop->previous->next = src->varTop->next;
}
}
tempUCATable *t = uprv_uca_initTempTable(image, src->opts, src->UCA, NOT_FOUND_TAG, NOT_FOUND_TAG, status);
for(i = 0; i<src->resultLen; i++) {
if(U_SUCCESS(*status)) {
ucol_createElements(src, t, &src->lh[i], status);
}
}
UCAElements el;
el.isThai = FALSE;
el.prefixSize = 0;
el.prefixChars[0] = 0;
ucol_uprv_bld_copyRangeFromUCA(src, t, 0, 0xFF, status);
if(src->copySet != NULL) {
int32_t i = 0;
UnicodeSet *set = (UnicodeSet *)src->copySet;
for(i = 0; i < set->getRangeCount(); i++) {
ucol_uprv_bld_copyRangeFromUCA(src, t, set->getRangeStart(i), set->getRangeEnd(i), status);
}
}
if(U_SUCCESS(*status)) {
uint32_t tailoredCE = UCOL_NOT_FOUND;
UChar *conts = (UChar *)((uint8_t *)src->UCA->image + src->UCA->image->contractionUCACombos);
UCollationElements *ucaEl = ucol_openElements(src->UCA, NULL, 0, status);
while(*conts != 0) {
tailoredCE = utrie_get32(t->mapping, *conts, NULL);
if(tailoredCE != UCOL_NOT_FOUND) {
UBool needToAdd = TRUE;
if(isCntTableElement(tailoredCE)) {
if(uprv_cnttab_isTailored(t->contractions, tailoredCE, conts+1, status) == TRUE) {
needToAdd = FALSE;
}
}
if(src->removeSet != NULL && uset_contains(src->removeSet, *conts)) {
needToAdd = FALSE;
}
if(needToAdd == TRUE) { el.prefix = el.prefixChars;
el.prefixSize = 0;
el.cPoints = el.uchars;
el.noOfCEs = 0;
el.uchars[0] = *conts;
el.uchars[1] = *(conts+1);
if(*(conts+2)!=0) {
el.uchars[2] = *(conts+2);
el.cSize = 3;
} else {
el.cSize = 2;
}
ucol_setText(ucaEl, el.uchars, el.cSize, status);
while ((int32_t)(el.CEs[el.noOfCEs] = ucol_next(ucaEl, status)) != UCOL_NULLORDER) {
el.noOfCEs++;
}
uprv_uca_addAnElement(t, &el, status);
}
} else if(src->removeSet != NULL && uset_contains(src->removeSet, *conts)) {
ucol_uprv_bld_copyRangeFromUCA(src, t, *conts, *conts, status);
}
conts+=3;
}
ucol_closeElements(ucaEl);
}
utrie_enum(&t->UCA->mapping, NULL, _processUCACompleteIgnorables, t);
uprv_uca_canonicalClosure(t, status);
UCATableHeader *myData = uprv_uca_assembleTable(t, status);
uprv_uca_closeTempTable(t);
uprv_free(image);
return myData;
}
U_CDECL_BEGIN
static UBool U_CALLCONV
ucol_bld_cleanup(void)
{
udata_close(invUCA_DATA_MEM);
invUCA_DATA_MEM = NULL;
_staticInvUCA = NULL;
return TRUE;
}
U_CDECL_END
U_CAPI const InverseUCATableHeader * U_EXPORT2
ucol_initInverseUCA(UErrorCode *status)
{
if(U_FAILURE(*status)) return NULL;
umtx_lock(NULL);
UBool f = (_staticInvUCA == NULL);
umtx_unlock(NULL);
if(f) {
InverseUCATableHeader *newInvUCA = NULL;
UDataMemory *result = udata_openChoice(NULL, INVC_DATA_TYPE, INVC_DATA_NAME, isAcceptableInvUCA, NULL, status);
if(U_FAILURE(*status)) {
if (result) {
udata_close(result);
}
}
if(result != NULL) {
newInvUCA = (InverseUCATableHeader *)udata_getMemory(result);
UCollator *UCA = ucol_initUCA(status);
if(uprv_memcmp(newInvUCA->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0) {
*status = U_INVALID_FORMAT_ERROR;
udata_close(result);
return NULL;
}
umtx_lock(NULL);
if(_staticInvUCA == NULL) {
_staticInvUCA = newInvUCA;
invUCA_DATA_MEM = result;
result = NULL;
newInvUCA = NULL;
}
umtx_unlock(NULL);
if(newInvUCA != NULL) {
udata_close(result);
}
else {
ucln_i18n_registerCleanup(UCLN_I18N_UCOL_BLD, ucol_bld_cleanup);
}
}
}
return _staticInvUCA;
}
#endif