utrie2_builder.cpp [plain text]
#ifdef UTRIE2_DEBUG
# include <stdio.h>
#endif
#include "unicode/utypes.h"
#include "cmemory.h"
#include "utrie2.h"
#include "utrie2_impl.h"
#include "utrie.h"
enum {
UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
};
#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
static int32_t
allocIndex2Block(UNewTrie2 *trie);
U_CAPI UTrie2 * U_EXPORT2
utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
UTrie2 *trie;
UNewTrie2 *newTrie;
uint32_t *data;
int32_t i, j;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
if(trie==NULL || newTrie==NULL || data==NULL) {
uprv_free(trie);
uprv_free(newTrie);
uprv_free(data);
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return 0;
}
uprv_memset(trie, 0, sizeof(UTrie2));
trie->initialValue=initialValue;
trie->errorValue=errorValue;
trie->highStart=0x110000;
trie->newTrie=newTrie;
newTrie->data=data;
newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
newTrie->initialValue=initialValue;
newTrie->errorValue=errorValue;
newTrie->highStart=0x110000;
newTrie->firstFreeBlock=0;
newTrie->isCompacted=FALSE;
for(i=0; i<0x80; ++i) {
newTrie->data[i]=initialValue;
}
for(; i<0xc0; ++i) {
newTrie->data[i]=errorValue;
}
for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
newTrie->data[i]=initialValue;
}
newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
newTrie->index2[i]=j;
newTrie->map[i]=1;
}
for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
newTrie->map[i]=0;
}
newTrie->map[i++]=
(0x110000>>UTRIE2_SHIFT_2)-
(0x80>>UTRIE2_SHIFT_2)+
1+
UTRIE2_LSCP_INDEX_2_LENGTH;
j+=UTRIE2_DATA_BLOCK_LENGTH;
for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
newTrie->map[i]=0;
}
for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
}
for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
}
for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
}
newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
for(i=0, j=0;
i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
) {
newTrie->index1[i]=j;
}
for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
}
for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
utrie2_set32(trie, i, initialValue, pErrorCode);
}
return trie;
}
static UNewTrie2 *
cloneBuilder(const UNewTrie2 *other) {
UNewTrie2 *trie;
trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
if(trie==NULL) {
return NULL;
}
trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
if(trie->data==NULL) {
uprv_free(trie);
return NULL;
}
trie->dataCapacity=other->dataCapacity;
uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
uprv_memcpy(trie->index2, other->index2, other->index2Length*4);
trie->index2NullOffset=other->index2NullOffset;
trie->index2Length=other->index2Length;
uprv_memcpy(trie->data, other->data, other->dataLength*4);
trie->dataNullOffset=other->dataNullOffset;
trie->dataLength=other->dataLength;
if(other->isCompacted) {
trie->firstFreeBlock=0;
} else {
uprv_memcpy(trie->map, other->map, (other->dataLength>>UTRIE2_SHIFT_2)*4);
trie->firstFreeBlock=other->firstFreeBlock;
}
trie->initialValue=other->initialValue;
trie->errorValue=other->errorValue;
trie->highStart=other->highStart;
trie->isCompacted=other->isCompacted;
return trie;
}
U_CAPI UTrie2 * U_EXPORT2
utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
UTrie2 *trie;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
if(trie==NULL) {
return NULL;
}
uprv_memcpy(trie, other, sizeof(UTrie2));
if(other->memory!=NULL) {
trie->memory=uprv_malloc(other->length);
if(trie->memory!=NULL) {
trie->isMemoryOwned=TRUE;
uprv_memcpy(trie->memory, other->memory, other->length);
trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
if(other->data16!=NULL) {
trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
}
if(other->data32!=NULL) {
trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
}
}
} else {
trie->newTrie=cloneBuilder(other->newTrie);
}
if(trie->memory==NULL && trie->newTrie==NULL) {
uprv_free(trie);
trie=NULL;
}
return trie;
}
typedef struct NewTrieAndStatus {
UTrie2 *trie;
UErrorCode errorCode;
UBool exclusiveLimit;
} NewTrieAndStatus;
static UBool U_CALLCONV
copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
if(value!=nt->trie->initialValue) {
if(nt->exclusiveLimit) {
--end;
}
if(start==end) {
utrie2_set32(nt->trie, start, value, &nt->errorCode);
} else {
utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
}
return U_SUCCESS(nt->errorCode);
} else {
return TRUE;
}
}
#ifdef UTRIE2_DEBUG
static void
utrie_printLengths(const UTrie *trie) {
long indexLength=trie->indexLength;
long dataLength=(long)trie->dataLength;
long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n",
indexLength, dataLength, totalLength);
}
static void
utrie2_printLengths(const UTrie2 *trie, const char *which) {
long indexLength=trie->indexLength;
long dataLength=(long)trie->dataLength;
long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
printf("**UTrie2Lengths(%s)** index:%6ld data:%6ld serialized:%6ld\n",
which, indexLength, dataLength, totalLength);
}
#endif
U_CAPI UTrie2 * U_EXPORT2
utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
NewTrieAndStatus context;
UChar lead;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
return utrie2_clone(other, pErrorCode);
}
context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
context.exclusiveLimit=FALSE;
context.errorCode=*pErrorCode;
utrie2_enum(other, NULL, copyEnumRange, &context);
*pErrorCode=context.errorCode;
for(lead=0xd800; lead<0xdc00; ++lead) {
uint32_t value;
if(other->data32==NULL) {
value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
} else {
value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
}
if(value!=other->initialValue) {
utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
}
}
if(U_FAILURE(*pErrorCode)) {
utrie2_close(context.trie);
context.trie=NULL;
}
return context.trie;
}
U_CAPI UTrie2 * U_EXPORT2
utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
NewTrieAndStatus context;
UChar lead;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(trie1==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
context.exclusiveLimit=TRUE;
context.errorCode=*pErrorCode;
utrie_enum(trie1, NULL, copyEnumRange, &context);
*pErrorCode=context.errorCode;
for(lead=0xd800; lead<0xdc00; ++lead) {
uint32_t value;
if(trie1->data32==NULL) {
value=UTRIE_GET16_FROM_LEAD(trie1, lead);
} else {
value=UTRIE_GET32_FROM_LEAD(trie1, lead);
}
if(value!=trie1->initialValue) {
utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
}
}
if(U_SUCCESS(*pErrorCode)) {
utrie2_freeze(context.trie,
trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
pErrorCode);
}
#ifdef UTRIE2_DEBUG
if(U_SUCCESS(*pErrorCode)) {
utrie_printLengths(trie1);
utrie2_printLengths(context.trie, "fromUTrie");
}
#endif
if(U_FAILURE(*pErrorCode)) {
utrie2_close(context.trie);
context.trie=NULL;
}
return context.trie;
}
static inline UBool
isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
int32_t i2, block;
if(U_IS_LEAD(c) && forLSCP) {
i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
(c>>UTRIE2_SHIFT_2);
} else {
i2=trie->index1[c>>UTRIE2_SHIFT_1]+
((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
}
block=trie->index2[i2];
return (UBool)(block==trie->dataNullOffset);
}
static int32_t
allocIndex2Block(UNewTrie2 *trie) {
int32_t newBlock, newTop;
newBlock=trie->index2Length;
newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
if(newTop>UPRV_LENGTHOF(trie->index2)) {
return -1;
}
trie->index2Length=newTop;
uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
return newBlock;
}
static int32_t
getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
int32_t i1, i2;
if(U_IS_LEAD(c) && forLSCP) {
return UTRIE2_LSCP_INDEX_2_OFFSET;
}
i1=c>>UTRIE2_SHIFT_1;
i2=trie->index1[i1];
if(i2==trie->index2NullOffset) {
i2=allocIndex2Block(trie);
if(i2<0) {
return -1;
}
trie->index1[i1]=i2;
}
return i2;
}
static int32_t
allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
int32_t newBlock, newTop;
if(trie->firstFreeBlock!=0) {
newBlock=trie->firstFreeBlock;
trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
} else {
newBlock=trie->dataLength;
newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
if(newTop>trie->dataCapacity) {
int32_t capacity;
uint32_t *data;
if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
} else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
capacity=UNEWTRIE2_MAX_DATA_LENGTH;
} else {
return -1;
}
data=(uint32_t *)uprv_malloc(capacity*4);
if(data==NULL) {
return -1;
}
uprv_memcpy(data, trie->data, trie->dataLength*4);
uprv_free(trie->data);
trie->data=data;
trie->dataCapacity=capacity;
}
trie->dataLength=newTop;
}
uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
return newBlock;
}
static void
releaseDataBlock(UNewTrie2 *trie, int32_t block) {
trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
trie->firstFreeBlock=block;
}
static inline UBool
isWritableBlock(UNewTrie2 *trie, int32_t block) {
return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
}
static inline void
setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
int32_t oldBlock;
++trie->map[block>>UTRIE2_SHIFT_2];
oldBlock=trie->index2[i2];
if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
releaseDataBlock(trie, oldBlock);
}
trie->index2[i2]=block;
}
static int32_t
getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
int32_t i2, oldBlock, newBlock;
i2=getIndex2Block(trie, c, forLSCP);
if(i2<0) {
return -1;
}
i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
oldBlock=trie->index2[i2];
if(isWritableBlock(trie, oldBlock)) {
return oldBlock;
}
newBlock=allocDataBlock(trie, oldBlock);
if(newBlock<0) {
return -1;
}
setIndex2Entry(trie, i2, newBlock);
return newBlock;
}
static void
set32(UNewTrie2 *trie,
UChar32 c, UBool forLSCP, uint32_t value,
UErrorCode *pErrorCode) {
int32_t block;
if(trie==NULL || trie->isCompacted) {
*pErrorCode=U_NO_WRITE_PERMISSION;
return;
}
block=getDataBlock(trie, c, forLSCP);
if(block<0) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
}
U_CAPI void U_EXPORT2
utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
return;
}
if((uint32_t)c>0x10ffff) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
set32(trie->newTrie, c, TRUE, value, pErrorCode);
}
U_CAPI void U_EXPORT2
utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
UChar32 c, uint32_t value,
UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
return;
}
if(!U_IS_LEAD(c)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
set32(trie->newTrie, c, FALSE, value, pErrorCode);
}
static void
writeBlock(uint32_t *block, uint32_t value) {
uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
while(block<limit) {
*block++=value;
}
}
static void
fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
uint32_t value, uint32_t initialValue, UBool overwrite) {
uint32_t *pLimit;
pLimit=block+limit;
block+=start;
if(overwrite) {
while(block<pLimit) {
*block++=value;
}
} else {
while(block<pLimit) {
if(*block==initialValue) {
*block=value;
}
++block;
}
}
}
U_CAPI void U_EXPORT2
utrie2_setRange32(UTrie2 *trie,
UChar32 start, UChar32 end,
uint32_t value, UBool overwrite,
UErrorCode *pErrorCode) {
UNewTrie2 *newTrie;
int32_t block, rest, repeatBlock;
UChar32 limit;
if(U_FAILURE(*pErrorCode)) {
return;
}
if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
newTrie=trie->newTrie;
if(newTrie==NULL || newTrie->isCompacted) {
*pErrorCode=U_NO_WRITE_PERMISSION;
return;
}
if(!overwrite && value==newTrie->initialValue) {
return;
}
limit=end+1;
if(start&UTRIE2_DATA_MASK) {
UChar32 nextStart;
block=getDataBlock(newTrie, start, TRUE);
if(block<0) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK;
if(nextStart<=limit) {
fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
value, newTrie->initialValue, overwrite);
start=nextStart;
} else {
fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
value, newTrie->initialValue, overwrite);
return;
}
}
rest=limit&UTRIE2_DATA_MASK;
limit&=~UTRIE2_DATA_MASK;
if(value==newTrie->initialValue) {
repeatBlock=newTrie->dataNullOffset;
} else {
repeatBlock=-1;
}
while(start<limit) {
int32_t i2;
UBool setRepeatBlock=FALSE;
if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
start+=UTRIE2_DATA_BLOCK_LENGTH;
continue;
}
i2=getIndex2Block(newTrie, start, TRUE);
if(i2<0) {
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
return;
}
i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
block=newTrie->index2[i2];
if(isWritableBlock(newTrie, block)) {
if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
setRepeatBlock=TRUE;
} else {
fillBlock(newTrie->data+block,
0, UTRIE2_DATA_BLOCK_LENGTH,
value, newTrie->initialValue, overwrite);
}
} else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
setRepeatBlock=TRUE;
}
if(setRepeatBlock) {
if(repeatBlock>=0) {
setIndex2Entry(newTrie, i2, repeatBlock);
} else {
repeatBlock=getDataBlock(newTrie, start, TRUE);
if(repeatBlock<0) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
writeBlock(newTrie->data+repeatBlock, value);
}
}
start+=UTRIE2_DATA_BLOCK_LENGTH;
}
if(rest>0) {
block=getDataBlock(newTrie, start, TRUE);
if(block<0) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
}
return;
}
static inline UBool
equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
while(length>0 && *s==*t) {
++s;
++t;
--length;
}
return (UBool)(length==0);
}
static inline UBool
equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
while(length>0 && *s==*t) {
++s;
++t;
--length;
}
return (UBool)(length==0);
}
static int32_t
findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
int32_t block;
index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
for(block=0; block<=index2Length; ++block) {
if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
return block;
}
}
return -1;
}
static int32_t
findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
int32_t block;
dataLength-=blockLength;
for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
if(equal_uint32(data+block, data+otherBlock, blockLength)) {
return block;
}
}
return -1;
}
static UChar32
findHighStart(UNewTrie2 *trie, uint32_t highValue) {
const uint32_t *data32;
uint32_t value, initialValue;
UChar32 c, prev;
int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
data32=trie->data;
initialValue=trie->initialValue;
index2NullOffset=trie->index2NullOffset;
nullBlock=trie->dataNullOffset;
if(highValue==initialValue) {
prevI2Block=index2NullOffset;
prevBlock=nullBlock;
} else {
prevI2Block=-1;
prevBlock=-1;
}
prev=0x110000;
i1=UNEWTRIE2_INDEX_1_LENGTH;
c=prev;
while(c>0) {
i2Block=trie->index1[--i1];
if(i2Block==prevI2Block) {
c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
continue;
}
prevI2Block=i2Block;
if(i2Block==index2NullOffset) {
if(highValue!=initialValue) {
return c;
}
c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
} else {
for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
block=trie->index2[i2Block+ --i2];
if(block==prevBlock) {
c-=UTRIE2_DATA_BLOCK_LENGTH;
continue;
}
prevBlock=block;
if(block==nullBlock) {
if(highValue!=initialValue) {
return c;
}
c-=UTRIE2_DATA_BLOCK_LENGTH;
} else {
for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
value=data32[block+ --j];
if(value!=highValue) {
return c;
}
--c;
}
}
}
}
}
return 0;
}
static void
compactData(UNewTrie2 *trie) {
int32_t start, newStart, movedStart;
int32_t blockLength, overlap;
int32_t i, mapIndex, blockCount;
newStart=UTRIE2_DATA_START_OFFSET;
for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
trie->map[i]=start;
}
blockLength=64;
blockCount=blockLength>>UTRIE2_SHIFT_2;
for(start=newStart; start<trie->dataLength;) {
if(start==UNEWTRIE2_DATA_0800_OFFSET) {
blockLength=UTRIE2_DATA_BLOCK_LENGTH;
blockCount=1;
}
if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
start+=blockLength;
continue;
}
if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
>=0
) {
for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
trie->map[mapIndex++]=movedStart;
movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
}
start+=blockLength;
continue;
}
for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
overlap-=UTRIE2_DATA_GRANULARITY) {}
if(overlap>0 || newStart<start) {
movedStart=newStart-overlap;
for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
trie->map[mapIndex++]=movedStart;
movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
}
start+=overlap;
for(i=blockLength-overlap; i>0; --i) {
trie->data[newStart++]=trie->data[start++];
}
} else {
for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
trie->map[mapIndex++]=start;
start+=UTRIE2_DATA_BLOCK_LENGTH;
}
newStart=start;
}
}
for(i=0; i<trie->index2Length; ++i) {
if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
i+=UNEWTRIE2_INDEX_GAP_LENGTH;
}
trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
}
trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
trie->data[newStart++]=trie->initialValue;
}
#ifdef UTRIE2_DEBUG
printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n",
(long)trie->dataLength, (long)newStart);
#endif
trie->dataLength=newStart;
}
static void
compactIndex2(UNewTrie2 *trie) {
int32_t i, start, newStart, movedStart, overlap;
newStart=UTRIE2_INDEX_2_BMP_LENGTH;
for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
trie->map[i]=start;
}
newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
>=0
) {
trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
continue;
}
for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
--overlap) {}
if(overlap>0 || newStart<start) {
trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
start+=overlap;
for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
trie->index2[newStart++]=trie->index2[start++];
}
} else {
trie->map[start>>UTRIE2_SHIFT_1_2]=start;
start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
newStart=start;
}
}
for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
}
trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
}
#ifdef UTRIE2_DEBUG
printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n",
(long)trie->index2Length, (long)newStart);
#endif
trie->index2Length=newStart;
}
static void
compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
UNewTrie2 *newTrie;
UChar32 highStart, suppHighStart;
uint32_t highValue;
newTrie=trie->newTrie;
highValue=utrie2_get32(trie, 0x10ffff);
highStart=findHighStart(newTrie, highValue);
highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
if(highStart==0x110000) {
highValue=trie->errorValue;
}
trie->highStart=newTrie->highStart=highStart;
#ifdef UTRIE2_DEBUG
printf("UTrie2: highStart U+%04lx highValue 0x%lx initialValue 0x%lx\n",
(long)highStart, (long)highValue, (long)trie->initialValue);
#endif
if(highStart<0x110000) {
suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
}
compactData(newTrie);
if(highStart>0x10000) {
compactIndex2(newTrie);
#ifdef UTRIE2_DEBUG
} else {
printf("UTrie2: highStart U+%04lx count of 16-bit index-2 words %lu->%lu\n",
(long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
#endif
}
newTrie->data[newTrie->dataLength++]=highValue;
while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
newTrie->data[newTrie->dataLength++]=trie->initialValue;
}
newTrie->isCompacted=TRUE;
}
#define UTRIE2_MAX_INDEX_LENGTH 0xffff
#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
U_CAPI void U_EXPORT2
utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
UNewTrie2 *newTrie;
UTrie2Header *header;
uint32_t *p;
uint16_t *dest16;
int32_t i, length;
int32_t allIndexesLength;
int32_t dataMove;
UChar32 highStart;
if(U_FAILURE(*pErrorCode)) {
return;
}
if( trie==NULL ||
valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
newTrie=trie->newTrie;
if(newTrie==NULL) {
UTrie2ValueBits frozenValueBits=
trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
if(valueBits!=frozenValueBits) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
}
return;
}
if(!newTrie->isCompacted) {
compactTrie(trie, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
}
highStart=trie->highStart;
if(highStart<=0x10000) {
allIndexesLength=UTRIE2_INDEX_1_OFFSET;
} else {
allIndexesLength=newTrie->index2Length;
}
if(valueBits==UTRIE2_16_VALUE_BITS) {
dataMove=allIndexesLength;
} else {
dataMove=0;
}
if(
allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
(dataMove+newTrie->dataNullOffset)>0xffff ||
(dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
(dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
length=sizeof(UTrie2Header)+allIndexesLength*2;
if(valueBits==UTRIE2_16_VALUE_BITS) {
length+=newTrie->dataLength*2;
} else {
length+=newTrie->dataLength*4;
}
trie->memory=uprv_malloc(length);
if(trie->memory==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
trie->length=length;
trie->isMemoryOwned=TRUE;
trie->indexLength=allIndexesLength;
trie->dataLength=newTrie->dataLength;
if(highStart<=0x10000) {
trie->index2NullOffset=0xffff;
} else {
trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset;
}
trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
header=(UTrie2Header *)trie->memory;
header->signature=UTRIE2_SIG;
header->options=(uint16_t)valueBits;
header->indexLength=(uint16_t)trie->indexLength;
header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
header->index2NullOffset=trie->index2NullOffset;
header->dataNullOffset=trie->dataNullOffset;
header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
dest16=(uint16_t *)(header+1);
trie->index=dest16;
p=(uint32_t *)newTrie->index2;
for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
*dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
}
for(i=0; i<(0xc2-0xc0); ++i) {
*dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
}
for(; i<(0xe0-0xc0); ++i) {
*dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
}
if(highStart>0x10000) {
int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
for(i=index1Length; i>0; --i) {
*dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
}
p=(uint32_t *)newTrie->index2+index2Offset;
for(i=newTrie->index2Length-index2Offset; i>0; --i) {
*dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
}
}
switch(valueBits) {
case UTRIE2_16_VALUE_BITS:
trie->data16=dest16;
trie->data32=NULL;
p=newTrie->data;
for(i=newTrie->dataLength; i>0; --i) {
*dest16++=(uint16_t)*p++;
}
break;
case UTRIE2_32_VALUE_BITS:
trie->data16=NULL;
trie->data32=(uint32_t *)dest16;
uprv_memcpy(dest16, newTrie->data, newTrie->dataLength*4);
break;
default:
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
uprv_free(newTrie->data);
uprv_free(newTrie);
trie->newTrie=NULL;
}
U_CAPI int32_t U_EXPORT2
utrie2_swapAnyVersion(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode) {
if(U_SUCCESS(*pErrorCode)) {
switch(utrie2_getVersion(inData, length, TRUE)) {
case 1:
return utrie_swap(ds, inData, length, outData, pErrorCode);
case 2:
return utrie2_swap(ds, inData, length, outData, pErrorCode);
default:
*pErrorCode=U_INVALID_FORMAT_ERROR;
return 0;
}
}
return 0;
}