#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/udata.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h"
#include "unicode/uiter.h"
#include "unicode/uniset.h"
#include "unicode/usetiter.h"
#include "unicode/unorm.h"
#include "ucln_cmn.h"
#include "unormimp.h"
#include "ucase.h"
#include "cmemory.h"
#include "umutex.h"
#include "utrie.h"
#include "unicode/uset.h"
#include "udataswp.h"
#include "putilimp.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
U_NAMESPACE_USE
enum {
_STACK_BUFFER_CAPACITY=100
};
enum {
_NORM_OPTIONS_NX_MASK=0x1f,
_NORM_OPTIONS_UNICODE_MASK=0x60,
_NORM_OPTIONS_SETS_MASK=0x7f,
_NORM_OPTIONS_UNICODE_SHIFT=5,
_NORM_OPTIONS_COMPAT=0x1000,
_NORM_OPTIONS_COMPOSE_CONTIGUOUS=0x2000
};
U_CDECL_BEGIN
static inline UBool
isHangulWithoutJamoT(UChar c) {
c-=HANGUL_BASE;
return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
}
static inline UBool
isNorm32Regular(uint32_t norm32) {
return norm32<_NORM_MIN_SPECIAL;
}
static inline UBool
isNorm32LeadSurrogate(uint32_t norm32) {
return _NORM_MIN_SPECIAL<=norm32 && norm32<_NORM_SURROGATES_TOP;
}
static inline UBool
isNorm32HangulOrJamo(uint32_t norm32) {
return norm32>=_NORM_MIN_HANGUL;
}
static inline UBool
isJamoVTNorm32JamoV(uint32_t norm32) {
return norm32<_NORM_JAMO_V_TOP;
}
static int32_t U_CALLCONV
getFoldingNormOffset(uint32_t norm32) {
if(isNorm32LeadSurrogate(norm32)) {
return
UTRIE_BMP_INDEX_LENGTH+
(((int32_t)norm32>>(_NORM_EXTRA_SHIFT-UTRIE_SURROGATE_BLOCK_BITS))&
(0x3ff<<UTRIE_SURROGATE_BLOCK_BITS));
} else {
return 0;
}
}
static int32_t U_CALLCONV
getFoldingAuxOffset(uint32_t data) {
return (int32_t)(data&_NORM_AUX_FNC_MASK)<<UTRIE_SURROGATE_BLOCK_BITS;
}
U_CDECL_END
#define UNORM_HARDCODE_DATA 1
#if UNORM_HARDCODE_DATA
#include "unorm_props_data.c"
static const UBool formatVersion_2_2=TRUE;
#else
#define DATA_NAME "unorm"
#define DATA_TYPE "icu"
static UDataMemory *normData=NULL;
static UErrorCode dataErrorCode=U_ZERO_ERROR;
static int8_t haveNormData=0;
static int32_t indexes[_NORM_INDEX_TOP]={ 0 };
static UTrie normTrie={ 0,0,0,0,0,0,0 }, fcdTrie={ 0,0,0,0,0,0,0 }, auxTrie={ 0,0,0,0,0,0,0 };
static const uint16_t *extraData=NULL,
*combiningTable=NULL,
*canonStartSets=NULL;
static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
static UBool formatVersion_2_1=FALSE, formatVersion_2_2=FALSE;
static UVersionInfo dataVersion={ 0, 0, 0, 0 };
#endif
static UnicodeSet *nxCache[_NORM_OPTIONS_SETS_MASK+1]={ NULL };
U_CDECL_BEGIN
static UBool U_CALLCONV
unorm_cleanup(void) {
int32_t i;
#if !UNORM_HARDCODE_DATA
if(normData!=NULL) {
udata_close(normData);
normData=NULL;
}
dataErrorCode=U_ZERO_ERROR;
haveNormData=0;
#endif
for(i=0; i<(int32_t)LENGTHOF(nxCache); ++i) {
if (nxCache[i]) {
delete nxCache[i];
nxCache[i] = 0;
}
}
return TRUE;
}
#if !UNORM_HARDCODE_DATA
static UBool U_CALLCONV
isAcceptable(void * ,
const char * , const char * ,
const UDataInfo *pInfo) {
if(
pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->dataFormat[0]==0x4e &&
pInfo->dataFormat[1]==0x6f &&
pInfo->dataFormat[2]==0x72 &&
pInfo->dataFormat[3]==0x6d &&
pInfo->formatVersion[0]==2 &&
pInfo->formatVersion[2]==UTRIE_SHIFT &&
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
) {
uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
return TRUE;
} else {
return FALSE;
}
}
#endif
static UBool U_CALLCONV
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 , uint32_t ) {
const USetAdder *sa=(const USetAdder *)context;
sa->add(sa->set, start);
return TRUE;
}
U_CDECL_END
#if !UNORM_HARDCODE_DATA
static int8_t
loadNormData(UErrorCode &errorCode) {
if(haveNormData==0) {
UTrie _normTrie={ 0,0,0,0,0,0,0 }, _fcdTrie={ 0,0,0,0,0,0,0 }, _auxTrie={ 0,0,0,0,0,0,0 };
UDataMemory *data;
const int32_t *p=NULL;
const uint8_t *pb;
if(&errorCode==NULL || U_FAILURE(errorCode)) {
return 0;
}
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode);
dataErrorCode=errorCode;
if(U_FAILURE(errorCode)) {
return haveNormData=-1;
}
p=(const int32_t *)udata_getMemory(data);
pb=(const uint8_t *)(p+_NORM_INDEX_TOP);
utrie_unserialize(&_normTrie, pb, p[_NORM_INDEX_TRIE_SIZE], &errorCode);
_normTrie.getFoldingOffset=getFoldingNormOffset;
pb+=p[_NORM_INDEX_TRIE_SIZE]+p[_NORM_INDEX_UCHAR_COUNT]*2+p[_NORM_INDEX_COMBINE_DATA_COUNT]*2;
if(p[_NORM_INDEX_FCD_TRIE_SIZE]!=0) {
utrie_unserialize(&_fcdTrie, pb, p[_NORM_INDEX_FCD_TRIE_SIZE], &errorCode);
}
pb+=p[_NORM_INDEX_FCD_TRIE_SIZE];
if(p[_NORM_INDEX_AUX_TRIE_SIZE]!=0) {
utrie_unserialize(&_auxTrie, pb, p[_NORM_INDEX_AUX_TRIE_SIZE], &errorCode);
_auxTrie.getFoldingOffset=getFoldingAuxOffset;
}
if(U_FAILURE(errorCode)) {
dataErrorCode=errorCode;
udata_close(data);
return haveNormData=-1;
}
umtx_lock(NULL);
if(normData==NULL) {
normData=data;
data=NULL;
uprv_memcpy(&indexes, p, sizeof(indexes));
uprv_memcpy(&normTrie, &_normTrie, sizeof(UTrie));
uprv_memcpy(&fcdTrie, &_fcdTrie, sizeof(UTrie));
uprv_memcpy(&auxTrie, &_auxTrie, sizeof(UTrie));
} else {
p=(const int32_t *)udata_getMemory(normData);
}
extraData=(uint16_t *)((uint8_t *)(p+_NORM_INDEX_TOP)+indexes[_NORM_INDEX_TRIE_SIZE]);
combiningTable=extraData+indexes[_NORM_INDEX_UCHAR_COUNT];
formatVersion_2_1=formatVersion[0]>2 || (formatVersion[0]==2 && formatVersion[1]>=1);
formatVersion_2_2=formatVersion[0]>2 || (formatVersion[0]==2 && formatVersion[1]>=2);
if(formatVersion_2_1) {
canonStartSets=combiningTable+
indexes[_NORM_INDEX_COMBINE_DATA_COUNT]+
(indexes[_NORM_INDEX_FCD_TRIE_SIZE]+indexes[_NORM_INDEX_AUX_TRIE_SIZE])/2;
}
haveNormData=1;
ucln_common_registerCleanup(UCLN_COMMON_UNORM, unorm_cleanup);
umtx_unlock(NULL);
if(data!=NULL) {
udata_close(data);
}
}
return haveNormData;
}
#endif
static inline UBool
_haveData(UErrorCode &errorCode) {
#if UNORM_HARDCODE_DATA
return U_SUCCESS(errorCode);
#else
if(U_FAILURE(errorCode)) {
return FALSE;
} else if(haveNormData>0) {
return TRUE;
} else if(haveNormData<0) {
errorCode=dataErrorCode;
return FALSE;
} else {
return (UBool)(loadNormData(errorCode)>0);
}
#endif
}
U_CAPI UBool U_EXPORT2
unorm_haveData(UErrorCode *pErrorCode) {
return _haveData(*pErrorCode);
}
U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrie(UErrorCode *pErrorCode) {
if(_haveData(*pErrorCode)) {
return fcdTrie.index;
} else {
return NULL;
}
}
static inline uint32_t
_getNorm32(UChar c) {
return UTRIE_GET32_FROM_LEAD(&normTrie, c);
}
static inline uint32_t
_getNorm32FromSurrogatePair(uint32_t norm32, UChar c2) {
norm32=
UTRIE_BMP_INDEX_LENGTH+
((norm32>>(_NORM_EXTRA_SHIFT-UTRIE_SURROGATE_BLOCK_BITS))&
(0x3ff<<UTRIE_SURROGATE_BLOCK_BITS));
return UTRIE_GET32_FROM_OFFSET_TRAIL(&normTrie, norm32, c2);
}
static inline uint32_t
_getNorm32(const UChar *p, uint32_t mask) {
uint32_t norm32=_getNorm32(*p);
if((norm32&mask) && isNorm32LeadSurrogate(norm32)) {
norm32=_getNorm32FromSurrogatePair(norm32, *(p+1));
}
return norm32;
}
static inline uint16_t
_getFCD16(UChar c) {
return UTRIE_GET16_FROM_LEAD(&fcdTrie, c);
}
static inline uint16_t
_getFCD16FromSurrogatePair(uint16_t fcd16, UChar c2) {
return UTRIE_GET16_FROM_OFFSET_TRAIL(&fcdTrie, fcd16, c2);
}
static inline const uint16_t *
_getExtraData(uint32_t norm32) {
return extraData+(norm32>>_NORM_EXTRA_SHIFT);
}
#if 0
static inline uint16_t
_getFCD16FromNormData(UChar32 c) {
uint32_t norm32, fcd;
norm32=_getNorm32(c);
if((norm32&_NORM_QC_NFD) && isNorm32Regular(norm32)) {
const uint16_t *nfd=_getExtraData(norm32);
if(*nfd&_NORM_DECOMP_FLAG_LENGTH_HAS_CC) {
fcd=nfd[1];
}
} else {
fcd=norm32&_NORM_CC_MASK;
if(fcd!=0) {
fcd|=fcd>>_NORM_CC_SHIFT;
}
}
return (uint16_t)fcd;
}
#endif
static const UnicodeSet *
internalGetNXHangul(UErrorCode &errorCode) {
UBool isCached;
UMTX_CHECK(NULL, (UBool)(nxCache[UNORM_NX_HANGUL]!=NULL), isCached);
if(!isCached) {
UnicodeSet *set=new UnicodeSet(0xac00, 0xd7a3);
if(set==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
set->compact();
umtx_lock(NULL);
if(nxCache[UNORM_NX_HANGUL]==NULL) {
nxCache[UNORM_NX_HANGUL]=set;
set=NULL;
ucln_common_registerCleanup(UCLN_COMMON_UNORM, unorm_cleanup);
}
umtx_unlock(NULL);
delete set;
}
return nxCache[UNORM_NX_HANGUL];
}
static const UnicodeSet *
internalGetSerializedNX(int32_t options, int32_t nxIndex, UErrorCode &errorCode) {
UBool isCached;
UMTX_CHECK(NULL, (UBool)(nxCache[options]!=NULL), isCached);
if( !isCached &&
canonStartSets!=NULL &&
canonStartSets[nxIndex]!=0 && canonStartSets[nxIndex+1]>canonStartSets[nxIndex]
) {
USerializedSet sset;
UnicodeSet *set;
UChar32 start, end;
int32_t i;
if( !uset_getSerializedSet(
&sset,
canonStartSets+canonStartSets[nxIndex],
canonStartSets[nxIndex+1]-canonStartSets[nxIndex])
) {
errorCode=U_INVALID_FORMAT_ERROR;
return NULL;
}
set=new UnicodeSet();
if(set==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
for(i=0; uset_getSerializedRange(&sset, i, &start, &end); ++i) {
set->add(start, end);
}
set->compact();
umtx_lock(NULL);
if(nxCache[options]==NULL) {
nxCache[options]=set;
set=NULL;
ucln_common_registerCleanup(UCLN_COMMON_UNORM, unorm_cleanup);
}
umtx_unlock(NULL);
delete set;
}
return nxCache[options];
}
static const UnicodeSet *
internalGetNXCJKCompat(UErrorCode &errorCode) {
return internalGetSerializedNX(
UNORM_NX_CJK_COMPAT,
_NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET,
errorCode);
}
static const UnicodeSet *
internalGetNXUnicode(uint32_t options, UErrorCode &errorCode) {
int32_t nxIndex;
options&=_NORM_OPTIONS_UNICODE_MASK;
switch(options) {
case 0:
return NULL;
case UNORM_UNICODE_3_2:
nxIndex=_NORM_SET_INDEX_NX_UNICODE32_OFFSET;
break;
default:
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
return internalGetSerializedNX(options, nxIndex, errorCode);
}
static const UnicodeSet *
internalGetNX(int32_t options, UErrorCode &errorCode) {
options&=_NORM_OPTIONS_SETS_MASK;
UBool isCached;
UMTX_CHECK(NULL, (UBool)(nxCache[options]!=NULL), isCached);
if(!isCached) {
if(options==UNORM_NX_HANGUL) {
return internalGetNXHangul(errorCode);
}
if(options==UNORM_NX_CJK_COMPAT) {
return internalGetNXCJKCompat(errorCode);
}
if((options&_NORM_OPTIONS_UNICODE_MASK)!=0 && (options&_NORM_OPTIONS_NX_MASK)==0) {
return internalGetNXUnicode(options, errorCode);
}
UnicodeSet *set;
const UnicodeSet *other;
set=new UnicodeSet();
if(set==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
if((options&UNORM_NX_HANGUL)!=0 && NULL!=(other=internalGetNXHangul(errorCode))) {
set->addAll(*other);
}
if((options&UNORM_NX_CJK_COMPAT)!=0 && NULL!=(other=internalGetNXCJKCompat(errorCode))) {
set->addAll(*other);
}
if((options&_NORM_OPTIONS_UNICODE_MASK)!=0 && NULL!=(other=internalGetNXUnicode(options, errorCode))) {
set->addAll(*other);
}
if(U_FAILURE(errorCode)) {
delete set;
return NULL;
}
set->compact();
umtx_lock(NULL);
if(nxCache[options]==NULL) {
nxCache[options]=set;
set=NULL;
ucln_common_registerCleanup(UCLN_COMMON_UNORM, unorm_cleanup);
}
umtx_unlock(NULL);
delete set;
}
return nxCache[options];
}
static inline const UnicodeSet *
getNX(int32_t options, UErrorCode &errorCode) {
if(U_FAILURE(errorCode) || (options&=_NORM_OPTIONS_SETS_MASK)==0) {
return NULL;
} else {
return internalGetNX(options, errorCode);
}
}
U_CFUNC const UnicodeSet *
unorm_getNX(int32_t options, UErrorCode *pErrorCode) {
return getNX(options, *pErrorCode);
}
static inline UBool
nx_contains(const UnicodeSet *nx, UChar32 c) {
return nx!=NULL && nx->contains(c);
}
static inline UBool
nx_contains(const UnicodeSet *nx, UChar c, UChar c2) {
return nx!=NULL && nx->contains(c2==0 ? c : U16_GET_SUPPLEMENTARY(c, c2));
}
static inline const UChar *
_decompose(uint32_t norm32, uint32_t qcMask, int32_t &length,
uint8_t &cc, uint8_t &trailCC) {
const UChar *p=(const UChar *)_getExtraData(norm32);
length=*p++;
if((norm32&qcMask&_NORM_QC_NFKD)!=0 && length>=0x100) {
p+=((length>>7)&1)+(length&_NORM_DECOMP_LENGTH_MASK);
length>>=8;
}
if(length&_NORM_DECOMP_FLAG_LENGTH_HAS_CC) {
UChar bothCCs=*p++;
cc=(uint8_t)(bothCCs>>8);
trailCC=(uint8_t)bothCCs;
} else {
cc=trailCC=0;
}
length&=_NORM_DECOMP_LENGTH_MASK;
return p;
}
static inline const UChar *
_decompose(uint32_t norm32, int32_t &length,
uint8_t &cc, uint8_t &trailCC) {
const UChar *p=(const UChar *)_getExtraData(norm32);
length=*p++;
if(length&_NORM_DECOMP_FLAG_LENGTH_HAS_CC) {
UChar bothCCs=*p++;
cc=(uint8_t)(bothCCs>>8);
trailCC=(uint8_t)bothCCs;
} else {
cc=trailCC=0;
}
length&=_NORM_DECOMP_LENGTH_MASK;
return p;
}
U_CFUNC const UChar *
unorm_getCanonicalDecomposition(UChar32 c, UChar buffer[4], int32_t *pLength) {
uint32_t norm32;
if(c<indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE]) {
return NULL;
}
UTRIE_GET32(&normTrie, c, norm32);
if(norm32&_NORM_QC_NFD) {
if(isNorm32HangulOrJamo(norm32)) {
UChar c2;
c-=HANGUL_BASE;
c2=(UChar)(c%JAMO_T_COUNT);
c/=JAMO_T_COUNT;
if(c2>0) {
buffer[2]=(UChar)(JAMO_T_BASE+c2);
*pLength=3;
} else {
*pLength=2;
}
buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
return buffer;
} else {
uint8_t cc, trailCC;
return _decompose(norm32, *pLength, cc, trailCC);
}
} else {
return 0;
}
}
static inline uint8_t
_getNextCC(const UChar *&p, const UChar *limit, UChar &c, UChar &c2) {
uint32_t norm32;
c=*p++;
norm32=_getNorm32(c);
if((norm32&_NORM_CC_MASK)==0) {
c2=0;
return 0;
} else {
if(!isNorm32LeadSurrogate(norm32)) {
c2=0;
} else {
if(p!=limit && UTF_IS_SECOND_SURROGATE(c2=*p)) {
++p;
norm32=_getNorm32FromSurrogatePair(norm32, c2);
} else {
c2=0;
return 0;
}
}
return (uint8_t)(norm32>>_NORM_CC_SHIFT);
}
}
static inline uint32_t
_getPrevNorm32(const UChar *start, const UChar *&src,
uint32_t minC, uint32_t mask,
UChar &c, UChar &c2) {
uint32_t norm32;
c=*--src;
c2=0;
if(c<minC) {
return 0;
} else if(!UTF_IS_SURROGATE(c)) {
return _getNorm32(c);
} else if(UTF_IS_SURROGATE_FIRST(c)) {
return 0;
} else if(src!=start && UTF_IS_FIRST_SURROGATE(c2=*(src-1))) {
--src;
norm32=_getNorm32(c2);
if((norm32&mask)==0) {
return 0;
} else {
return _getNorm32FromSurrogatePair(norm32, c);
}
} else {
c2=0;
return 0;
}
}
static inline uint8_t
_getPrevCC(const UChar *start, const UChar *&p) {
UChar c, c2;
return (uint8_t)(_getPrevNorm32(start, p, _NORM_MIN_WITH_LEAD_CC, _NORM_CC_MASK, c, c2)>>_NORM_CC_SHIFT);
}
static inline UBool
_isNFDSafe(uint32_t norm32, uint32_t ccOrQCMask, uint32_t decompQCMask) {
if((norm32&ccOrQCMask)==0) {
return TRUE;
}
if(isNorm32Regular(norm32) && (norm32&decompQCMask)!=0) {
int32_t length;
uint8_t cc, trailCC;
_decompose(norm32, decompQCMask, length, cc, trailCC);
return cc==0;
} else {
return (norm32&_NORM_CC_MASK)==0;
}
}
static inline UBool
_isTrueStarter(uint32_t norm32, uint32_t ccOrQCMask, uint32_t decompQCMask) {
if((norm32&ccOrQCMask)==0) {
return TRUE;
}
if((norm32&decompQCMask)!=0) {
const UChar *p;
int32_t length;
uint8_t cc, trailCC;
p=_decompose(norm32, decompQCMask, length, cc, trailCC);
if(cc==0) {
uint32_t qcMask=ccOrQCMask&_NORM_QC_MASK;
if((_getNorm32(p, qcMask)&qcMask)==0) {
return TRUE;
}
}
}
return FALSE;
}
U_CAPI uint8_t U_EXPORT2
u_getCombiningClass(UChar32 c) {
#if !UNORM_HARDCODE_DATA
UErrorCode errorCode=U_ZERO_ERROR;
if(_haveData(errorCode)) {
#endif
uint32_t norm32;
UTRIE_GET32(&normTrie, c, norm32);
return (uint8_t)(norm32>>_NORM_CC_SHIFT);
#if !UNORM_HARDCODE_DATA
} else {
return 0;
}
#endif
}
U_CFUNC UBool U_EXPORT2
unorm_internalIsFullCompositionExclusion(UChar32 c) {
#if UNORM_HARDCODE_DATA
if(auxTrie.index!=NULL) {
#else
UErrorCode errorCode=U_ZERO_ERROR;
if(_haveData(errorCode) && auxTrie.index!=NULL) {
#endif
uint16_t aux;
UTRIE_GET16(&auxTrie, c, aux);
return (UBool)((aux&_NORM_AUX_COMP_EX_MASK)!=0);
} else {
return FALSE;
}
}
U_CFUNC UBool U_EXPORT2
unorm_isCanonSafeStart(UChar32 c) {
#if UNORM_HARDCODE_DATA
if(auxTrie.index!=NULL) {
#else
UErrorCode errorCode=U_ZERO_ERROR;
if(_haveData(errorCode) && auxTrie.index!=NULL) {
#endif
uint16_t aux;
UTRIE_GET16(&auxTrie, c, aux);
return (UBool)((aux&_NORM_AUX_UNSAFE_MASK)==0);
} else {
return FALSE;
}
}
U_CAPI void U_EXPORT2
unorm_getUnicodeVersion(UVersionInfo *versionInfo, UErrorCode *pErrorCode){
if(unorm_haveData(pErrorCode)){
uprv_memcpy(*versionInfo, dataVersion, 4);
}
}
U_CAPI UBool U_EXPORT2
unorm_getCanonStartSet(UChar32 c, USerializedSet *fillSet) {
#if !UNORM_HARDCODE_DATA
UErrorCode errorCode=U_ZERO_ERROR;
#endif
if( fillSet!=NULL && (uint32_t)c<=0x10ffff &&
#if !UNORM_HARDCODE_DATA
_haveData(errorCode) &&
#endif
canonStartSets!=NULL
) {
const uint16_t *table;
int32_t i, start, limit;
if(c<=0xffff) {
table=canonStartSets+canonStartSets[_NORM_SET_INDEX_CANON_SETS_LENGTH];
start=0;
limit=canonStartSets[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH];
while(start<limit-2) {
i=(uint16_t)(((start+limit)/4)*2);
if(c<table[i]) {
limit=i;
} else {
start=i;
}
}
if(c==table[start]) {
i=table[start+1];
if((i&_NORM_CANON_SET_BMP_MASK)==_NORM_CANON_SET_BMP_IS_INDEX) {
i&=(_NORM_MAX_CANON_SETS-1);
return uset_getSerializedSet(fillSet,
canonStartSets+i,
canonStartSets[_NORM_SET_INDEX_CANON_SETS_LENGTH]-i);
} else {
uset_setSerializedToOne(fillSet, (UChar32)i);
return TRUE;
}
}
} else {
uint16_t high, low, h;
table=canonStartSets+canonStartSets[_NORM_SET_INDEX_CANON_SETS_LENGTH]+
canonStartSets[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH];
start=0;
limit=canonStartSets[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH];
high=(uint16_t)(c>>16);
low=(uint16_t)c;
while(start<limit-3) {
i=(uint16_t)(((start+limit)/6)*3);
h=table[i]&0x1f;
if(high<h || (high==h && low<table[i+1])) {
limit=i;
} else {
start=i;
}
}
h=table[start];
if(high==(h&0x1f) && low==table[start+1]) {
i=table[start+2];
if((h&0x8000)==0) {
return uset_getSerializedSet(fillSet,
canonStartSets+i,
canonStartSets[_NORM_SET_INDEX_CANON_SETS_LENGTH]-i);
} else {
i|=((int32_t)h&0x1f00)<<8;
uset_setSerializedToOne(fillSet, (UChar32)i);
return TRUE;
}
}
}
}
return FALSE;
}
U_CAPI int32_t U_EXPORT2
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
uint16_t aux;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(!_haveData(*pErrorCode) || auxTrie.index==NULL) {
return 0;
}
UTRIE_GET16(&auxTrie, c, aux);
aux&=_NORM_AUX_FNC_MASK;
if(aux!=0) {
const UChar *s;
int32_t length;
s=(const UChar *)(extraData+aux);
if(*s<0xff00) {
length=1;
} else {
length=*s&0xff;
++s;
}
if(0<length && length<=destCapacity) {
uprv_memcpy(dest, s, length*U_SIZEOF_UCHAR);
}
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
} else {
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
}
}
U_CAPI UBool U_EXPORT2
unorm_isNFSkippable(UChar32 c, UNormalizationMode mode) {
uint32_t norm32, mask;
uint16_t aux, fcd;
#if !UNORM_HARDCODE_DATA
UErrorCode errorCode=U_ZERO_ERROR;
if(!_haveData(errorCode)) {
return FALSE;
}
#endif
switch(mode) {
case UNORM_NONE:
return TRUE;
case UNORM_NFD:
mask=_NORM_CC_MASK|_NORM_QC_NFD;
break;
case UNORM_NFKD:
mask=_NORM_CC_MASK|_NORM_QC_NFKD;
break;
case UNORM_NFC:
mask=_NORM_CC_MASK|_NORM_COMBINES_ANY|(_NORM_QC_NFC&_NORM_QC_ANY_NO);
break;
case UNORM_NFKC:
mask=_NORM_CC_MASK|_NORM_COMBINES_ANY|(_NORM_QC_NFKC&_NORM_QC_ANY_NO);
break;
case UNORM_FCD:
if(fcdTrie.index!=NULL) {
UTRIE_GET16(&fcdTrie, c, fcd);
return fcd<=1;
} else {
return FALSE;
}
default:
return FALSE;
}
UTRIE_GET32(&normTrie, c, norm32);
if((norm32&mask)!=0) {
return FALSE;
}
if(mode<UNORM_NFC) {
return TRUE;
}
if((norm32&_NORM_QC_NFD)==0) {
return TRUE;
}
if(isNorm32HangulOrJamo(norm32)) {
return !isHangulWithoutJamoT((UChar)c);
}
if(!formatVersion_2_2 || auxTrie.index==NULL) {
return FALSE;
}
UTRIE_GET16(&auxTrie, c, aux);
return (aux&_NORM_AUX_NFC_SKIP_F_MASK)==0;
}
U_CAPI void U_EXPORT2
unorm_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
UChar c;
if(!_haveData(*pErrorCode)) {
return;
}
utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, sa);
if(fcdTrie.index!=NULL) {
utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, sa);
}
if(auxTrie.index!=NULL) {
utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, sa);
}
for(c=HANGUL_BASE; c<HANGUL_BASE+HANGUL_COUNT; c+=JAMO_T_COUNT) {
sa->add(sa->set, c);
sa->add(sa->set, c+1);
}
sa->add(sa->set, HANGUL_BASE+HANGUL_COUNT);
}
U_CFUNC UNormalizationCheckResult U_EXPORT2
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
static const uint32_t qcMask[UNORM_MODE_COUNT]={
0, 0, _NORM_QC_NFD, _NORM_QC_NFKD, _NORM_QC_NFC, _NORM_QC_NFKC
};
uint32_t norm32;
#if !UNORM_HARDCODE_DATA
UErrorCode errorCode=U_ZERO_ERROR;
if(!_haveData(errorCode)) {
return UNORM_YES;
}
#endif
UTRIE_GET32(&normTrie, c, norm32);
norm32&=qcMask[mode];
if(norm32==0) {
return UNORM_YES;
} else if(norm32&_NORM_QC_ANY_NO) {
return UNORM_NO;
} else {
return UNORM_MAYBE;
}
}
U_CFUNC uint16_t U_EXPORT2
unorm_getFCD16FromCodePoint(UChar32 c) {
uint16_t fcd;
#if !UNORM_HARDCODE_DATA
UErrorCode errorCode;
errorCode=U_ZERO_ERROR;
#endif
if(
#if !UNORM_HARDCODE_DATA
!_haveData(errorCode) ||
#endif
fcdTrie.index==NULL
) {
return 0;
}
UTRIE_GET16(&fcdTrie, c, fcd);
return fcd;
}
static uint8_t
_insertOrdered(const UChar *start, UChar *current, UChar *p,
UChar c, UChar c2, uint8_t cc) {
const UChar *pBack, *pPreBack;
UChar *r;
uint8_t prevCC, trailCC=cc;
if(start<current && cc!=0) {
pPreBack=pBack=current;
prevCC=_getPrevCC(start, pPreBack);
if(cc<prevCC) {
trailCC=prevCC;
pBack=pPreBack;
while(start<pPreBack) {
prevCC=_getPrevCC(start, pPreBack);
if(cc>=prevCC) {
break;
}
pBack=pPreBack;
}
r=p;
do {
*--r=*--current;
} while(pBack!=current);
}
}
*current=c;
if(c2!=0) {
*(current+1)=c2;
}
return trailCC;
}
static uint8_t
_mergeOrdered(UChar *start, UChar *current,
const UChar *next, const UChar *limit, UBool isOrdered=TRUE) {
UChar *r;
UChar c, c2;
uint8_t cc, trailCC=0;
UBool adjacent;
adjacent= current==next;
if(start!=current || !isOrdered) {
while(next<limit) {
cc=_getNextCC(next, limit, c, c2);
if(cc==0) {
trailCC=0;
if(adjacent) {
current=(UChar *)next;
} else {
*current++=c;
if(c2!=0) {
*current++=c2;
}
}
if(isOrdered) {
break;
} else {
start=current;
}
} else {
r=current+(c2==0 ? 1 : 2);
trailCC=_insertOrdered(start, current, r, c, c2, cc);
current=r;
}
}
}
if(next==limit) {
return trailCC;
} else {
if(!adjacent) {
do {
*current++=*next++;
} while(next!=limit);
limit=current;
}
return _getPrevCC(start, limit);
}
}
static const UChar *
_findPreviousStarter(const UChar *start, const UChar *src,
uint32_t ccOrQCMask, uint32_t decompQCMask, UChar minNoMaybe) {
uint32_t norm32;
UChar c, c2;
while(start<src) {
norm32=_getPrevNorm32(start, src, minNoMaybe, ccOrQCMask|decompQCMask, c, c2);
if(_isTrueStarter(norm32, ccOrQCMask, decompQCMask)) {
break;
}
}
return src;
}
static const UChar *
_findNextStarter(const UChar *src, const UChar *limit,
uint32_t qcMask, uint32_t decompQCMask, UChar minNoMaybe) {
const UChar *p;
uint32_t norm32, ccOrQCMask;
int32_t length;
UChar c, c2;
uint8_t cc, trailCC;
ccOrQCMask=_NORM_CC_MASK|qcMask;
for(;;) {
if(src==limit) {
break;
}
c=*src;
if(c<minNoMaybe) {
break;
}
norm32=_getNorm32(c);
if((norm32&ccOrQCMask)==0) {
break;
}
if(isNorm32LeadSurrogate(norm32)) {
if((src+1)==limit || !UTF_IS_SECOND_SURROGATE(c2=*(src+1))) {
break;
}
norm32=_getNorm32FromSurrogatePair(norm32, c2);
if((norm32&ccOrQCMask)==0) {
break;
}
} else {
c2=0;
}
if(norm32&decompQCMask) {
p=_decompose(norm32, decompQCMask, length, cc, trailCC);
if(cc==0 && (_getNorm32(p, qcMask)&qcMask)==0) {
break;
}
}
src+= c2==0 ? 1 : 2;
}
return src;
}
U_CAPI int32_t U_EXPORT2
unorm_getDecomposition(UChar32 c, UBool compat,
UChar *dest, int32_t destCapacity) {
#if !UNORM_HARDCODE_DATA
UErrorCode errorCode=U_ZERO_ERROR;
#endif
if( (uint32_t)c<=0x10ffff &&
#if !UNORM_HARDCODE_DATA
_haveData(errorCode) &&
#endif
((dest!=NULL && destCapacity>0) || destCapacity==0)
) {
uint32_t norm32, qcMask;
UChar32 minNoMaybe;
int32_t length;
if(!compat) {
minNoMaybe=(UChar32)indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE];
qcMask=_NORM_QC_NFD;
} else {
minNoMaybe=(UChar32)indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE];
qcMask=_NORM_QC_NFKD;
}
if(c<minNoMaybe) {
if(destCapacity>0) {
dest[0]=(UChar)c;
}
return -1;
}
UTRIE_GET32(&normTrie, c, norm32);
if((norm32&qcMask)==0) {
if(c<=0xffff) {
if(destCapacity>0) {
dest[0]=(UChar)c;
}
return -1;
} else {
if(destCapacity>=2) {
dest[0]=UTF16_LEAD(c);
dest[1]=UTF16_TRAIL(c);
}
return -2;
}
} else if(isNorm32HangulOrJamo(norm32)) {
UChar c2;
c-=HANGUL_BASE;
c2=(UChar)(c%JAMO_T_COUNT);
c/=JAMO_T_COUNT;
if(c2>0) {
if(destCapacity>=3) {
dest[2]=(UChar)(JAMO_T_BASE+c2);
}
length=3;
} else {
length=2;
}
if(destCapacity>=2) {
dest[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
dest[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
}
return length;
} else {
const UChar *p, *limit;
uint8_t cc, trailCC;
p=_decompose(norm32, qcMask, length, cc, trailCC);
if(length<=destCapacity) {
limit=p+length;
do {
*dest++=*p++;
} while(p<limit);
}
return length;
}
} else {
return 0;
}
}
static int32_t
_decompose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBool compat, const UnicodeSet *nx,
uint8_t &outTrailCC) {
UChar buffer[3];
const UChar *limit, *prevSrc, *p;
uint32_t norm32, ccOrQCMask, qcMask;
int32_t destIndex, reorderStartIndex, length;
UChar c, c2, minNoMaybe;
uint8_t cc, prevCC, trailCC;
if(!compat) {
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE];
qcMask=_NORM_QC_NFD;
} else {
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE];
qcMask=_NORM_QC_NFKD;
}
ccOrQCMask=_NORM_CC_MASK|qcMask;
destIndex=reorderStartIndex=0;
prevCC=0;
norm32=0;
c=0;
cc=0;
trailCC=0;
if(srcLength>=0) {
limit=src+srcLength;
} else {
limit=NULL;
}
U_ALIGN_CODE(16);
for(;;) {
prevSrc=src;
if(limit==NULL) {
while((c=*src)<minNoMaybe ? c!=0 : ((norm32=_getNorm32(c))&ccOrQCMask)==0) {
prevCC=0;
++src;
}
} else {
while(src!=limit && ((c=*src)<minNoMaybe || ((norm32=_getNorm32(c))&ccOrQCMask)==0)) {
prevCC=0;
++src;
}
}
if(src!=prevSrc) {
length=(int32_t)(src-prevSrc);
if((destIndex+length)<=destCapacity) {
uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR);
}
destIndex+=length;
reorderStartIndex=destIndex;
}
if(limit==NULL ? c==0 : src==limit) {
break;
}
++src;
if(isNorm32HangulOrJamo(norm32)) {
if(nx_contains(nx, c)) {
c2=0;
p=NULL;
length=1;
} else {
p=buffer;
cc=trailCC=0;
c-=HANGUL_BASE;
c2=(UChar)(c%JAMO_T_COUNT);
c/=JAMO_T_COUNT;
if(c2>0) {
buffer[2]=(UChar)(JAMO_T_BASE+c2);
length=3;
} else {
length=2;
}
buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
}
} else {
if(isNorm32Regular(norm32)) {
c2=0;
length=1;
} else {
if(src!=limit && UTF_IS_SECOND_SURROGATE(c2=*src)) {
++src;
length=2;
norm32=_getNorm32FromSurrogatePair(norm32, c2);
} else {
c2=0;
length=1;
norm32=0;
}
}
if(nx_contains(nx, c, c2)) {
cc=trailCC=0;
p=NULL;
} else if((norm32&qcMask)==0) {
cc=trailCC=(uint8_t)(norm32>>_NORM_CC_SHIFT);
p=NULL;
} else {
p=_decompose(norm32, qcMask, length, cc, trailCC);
if(length==1) {
c=*p;
c2=0;
p=NULL;
}
}
}
if((destIndex+length)<=destCapacity) {
UChar *reorderSplit=dest+destIndex;
if(p==NULL) {
if(cc!=0 && cc<prevCC) {
destIndex+=length;
trailCC=_insertOrdered(dest+reorderStartIndex, reorderSplit, dest+destIndex, c, c2, cc);
} else {
dest[destIndex++]=c;
if(c2!=0) {
dest[destIndex++]=c2;
}
}
} else {
if(cc!=0 && cc<prevCC) {
destIndex+=length;
trailCC=_mergeOrdered(dest+reorderStartIndex, reorderSplit, p, p+length);
} else {
do {
dest[destIndex++]=*p++;
} while(--length>0);
}
}
} else {
destIndex+=length;
}
prevCC=trailCC;
if(prevCC==0) {
reorderStartIndex=destIndex;
}
}
outTrailCC=prevCC;
return destIndex;
}
U_CAPI int32_t U_EXPORT2
unorm_decompose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBool compat, int32_t options,
UErrorCode *pErrorCode) {
const UnicodeSet *nx;
int32_t destIndex;
uint8_t trailCC;
if(!_haveData(*pErrorCode)) {
return 0;
}
nx=getNX(options, *pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
destIndex=_decompose(dest, destCapacity,
src, srcLength,
compat, nx,
trailCC);
return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
}
static inline uint32_t
_getNextCombining(UChar *&p, const UChar *limit,
UChar &c, UChar &c2,
uint16_t &combiningIndex, uint8_t &cc,
const UnicodeSet *nx) {
uint32_t norm32, combineFlags;
c=*p++;
norm32=_getNorm32(c);
c2=0;
combiningIndex=0;
cc=0;
if((norm32&(_NORM_CC_MASK|_NORM_COMBINES_ANY))==0) {
return 0;
} else {
if(isNorm32Regular(norm32)) {
} else if(isNorm32HangulOrJamo(norm32)) {
combiningIndex=(uint16_t)(0xfff0|(norm32>>_NORM_EXTRA_SHIFT));
return norm32&_NORM_COMBINES_ANY;
} else {
if(p!=limit && UTF_IS_SECOND_SURROGATE(c2=*p)) {
++p;
norm32=_getNorm32FromSurrogatePair(norm32, c2);
} else {
c2=0;
return 0;
}
}
if(nx_contains(nx, c, c2)) {
return 0;
}
cc=(uint8_t)(norm32>>_NORM_CC_SHIFT);
combineFlags=norm32&_NORM_COMBINES_ANY;
if(combineFlags!=0) {
combiningIndex=*(_getExtraData(norm32)-1);
}
return combineFlags;
}
}
static inline uint16_t
_getCombiningIndexFromStarter(UChar c, UChar c2) {
uint32_t norm32;
norm32=_getNorm32(c);
if(c2!=0) {
norm32=_getNorm32FromSurrogatePair(norm32, c2);
}
return *(_getExtraData(norm32)-1);
}
static inline uint16_t
_combine(const uint16_t *table, uint16_t combineBackIndex,
uint16_t &value, uint16_t &value2) {
uint16_t key;
for(;;) {
key=*table++;
if(key>=combineBackIndex) {
break;
}
table+= *table&0x8000 ? 2 : 1;
}
if((key&0x7fff)==combineBackIndex) {
value=*table;
key=(uint16_t)((value&0x2000)+1);
if(value&0x8000) {
if(value&0x4000) {
value=(uint16_t)((value&0x3ff)|0xd800);
value2=*(table+1);
} else {
value=*(table+1);
value2=0;
}
} else {
value&=0x1fff;
value2=0;
}
return key;
} else {
return 0;
}
}
static inline UBool
_composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UChar *limit,
UBool compat, UChar *dest, const UnicodeSet *nx) {
if(isJamoVTNorm32JamoV(norm32)) {
prev=(UChar)(prev-JAMO_L_BASE);
if(prev<JAMO_L_COUNT) {
c=(UChar)(HANGUL_BASE+(prev*JAMO_V_COUNT+(c-JAMO_V_BASE))*JAMO_T_COUNT);
if(src!=limit) {
UChar next, t;
next=*src;
if((t=(UChar)(next-JAMO_T_BASE))<JAMO_T_COUNT) {
++src;
c+=t;
} else if(compat) {
norm32=_getNorm32(next);
if(isNorm32Regular(norm32) && (norm32&_NORM_QC_NFKD)) {
const UChar *p;
int32_t length;
uint8_t cc, trailCC;
p=_decompose(norm32, _NORM_QC_NFKD, length, cc, trailCC);
if(length==1 && (t=(UChar)(*p-JAMO_T_BASE))<JAMO_T_COUNT) {
++src;
c+=t;
}
}
}
}
if(nx_contains(nx, c)) {
if(!isHangulWithoutJamoT(c)) {
--src;
}
return FALSE;
}
if(dest!=0) {
*dest=c;
}
return TRUE;
}
} else if(isHangulWithoutJamoT(prev)) {
c=(UChar)(prev+(c-JAMO_T_BASE));
if(nx_contains(nx, c)) {
return FALSE;
}
if(dest!=0) {
*dest=c;
}
return TRUE;
}
return FALSE;
}
static uint8_t
_recompose(UChar *p, UChar *&limit, int32_t options, const UnicodeSet *nx) {
UChar *starter, *pRemove, *q, *r;
uint32_t combineFlags;
UChar c, c2;
uint16_t combineFwdIndex, combineBackIndex;
uint16_t result, value, value2;
uint8_t cc, prevCC;
UBool starterIsSupplementary;
starter=NULL;
combineFwdIndex=0;
combineBackIndex=0;
value=value2=0;
starterIsSupplementary=FALSE;
prevCC=0;
for(;;) {
combineFlags=_getNextCombining(p, limit, c, c2, combineBackIndex, cc, nx);
if((combineFlags&_NORM_COMBINES_BACK) && starter!=NULL) {
if(combineBackIndex&0x8000) {
if((options&UNORM_BEFORE_PRI_29) || prevCC==0) {
pRemove=NULL;
combineFlags=0;
c2=*starter;
if(combineBackIndex==0xfff2) {
c2=(UChar)(c2-JAMO_L_BASE);
if(c2<JAMO_L_COUNT) {
pRemove=p-1;
c=(UChar)(HANGUL_BASE+(c2*JAMO_V_COUNT+(c-JAMO_V_BASE))*JAMO_T_COUNT);
if(p!=limit && (c2=(UChar)(*p-JAMO_T_BASE))<JAMO_T_COUNT) {
++p;
c+=c2;
} else {
combineFlags=_NORM_COMBINES_FWD;
}
if(!nx_contains(nx, c)) {
*starter=c;
} else {
if(!isHangulWithoutJamoT(c)) {
--p;
}
pRemove=NULL;
}
}
} else {
if(isHangulWithoutJamoT(c2)) {
c2+=(UChar)(c-JAMO_T_BASE);
if(!nx_contains(nx, c2)) {
pRemove=p-1;
*starter=c2;
}
}
}
if(pRemove!=NULL) {
q=pRemove;
r=p;
while(r<limit) {
*q++=*r++;
}
p=pRemove;
limit=q;
}
c2=0;
if(combineFlags!=0) {
if(p==limit) {
return prevCC;
}
combineFwdIndex=0xfff0;
continue;
}
}
} else if(
!(combineFwdIndex&0x8000) &&
((options&UNORM_BEFORE_PRI_29) ?
(prevCC!=cc || prevCC==0) :
(prevCC<cc || prevCC==0)) &&
0!=(result=_combine(combiningTable+combineFwdIndex, combineBackIndex, value, value2)) &&
!nx_contains(nx, value, value2)
) {
pRemove= c2==0 ? p-1 : p-2;
*starter=(UChar)value;
if(starterIsSupplementary) {
if(value2!=0) {
*(starter+1)=(UChar)value2;
} else {
starterIsSupplementary=FALSE;
q=starter+1;
r=q+1;
while(r<pRemove) {
*q++=*r++;
}
--pRemove;
}
} else if(value2!=0) {
starterIsSupplementary=TRUE;
++starter;
q=pRemove;
r=++pRemove;
while(starter<q) {
*--r=*--q;
}
*starter=(UChar)value2;
--starter;
}
if(pRemove<p) {
q=pRemove;
r=p;
while(r<limit) {
*q++=*r++;
}
p=pRemove;
limit=q;
}
if(p==limit) {
return prevCC;
}
if(result>1) {
combineFwdIndex=_getCombiningIndexFromStarter((UChar)value, (UChar)value2);
} else {
starter=NULL;
}
continue;
}
}
prevCC=cc;
if(p==limit) {
return prevCC;
}
if(cc==0) {
if(combineFlags&_NORM_COMBINES_FWD) {
if(c2==0) {
starterIsSupplementary=FALSE;
starter=p-1;
} else {
starterIsSupplementary=TRUE;
starter=p-2;
}
combineFwdIndex=combineBackIndex;
} else {
starter=NULL;
}
} else if(options&_NORM_OPTIONS_COMPOSE_CONTIGUOUS) {
starter=NULL;
}
}
}
static const UChar *
_composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_t &length,
const UChar *prevStarter, const UChar *src,
uint8_t &prevCC,
int32_t options, const UnicodeSet *nx,
UErrorCode *pErrorCode) {
UChar *recomposeLimit;
uint8_t trailCC;
UBool compat;
compat=(UBool)((options&_NORM_OPTIONS_COMPAT)!=0);
length=_decompose(buffer, bufferCapacity,
prevStarter, (int32_t)(src-prevStarter),
compat, nx,
trailCC);
if(length>bufferCapacity) {
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*length, 0)) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
length=_decompose(buffer, bufferCapacity,
prevStarter, (int32_t)(src-prevStarter),
compat, nx,
trailCC);
}
recomposeLimit=buffer+length;
if(length>=2) {
prevCC=_recompose(buffer, recomposeLimit, options, nx);
}
length=(int32_t)(recomposeLimit-buffer);
return buffer;
}
static int32_t
_compose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
int32_t options, const UnicodeSet *nx,
UErrorCode *pErrorCode) {
UChar stackBuffer[_STACK_BUFFER_CAPACITY];
UChar *buffer;
int32_t bufferCapacity;
const UChar *limit, *prevSrc, *prevStarter;
uint32_t norm32, ccOrQCMask, qcMask;
int32_t destIndex, reorderStartIndex, length;
UChar c, c2, minNoMaybe;
uint8_t cc, prevCC;
if(options&_NORM_OPTIONS_COMPAT) {
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE];
qcMask=_NORM_QC_NFKC;
} else {
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE];
qcMask=_NORM_QC_NFC;
}
buffer=stackBuffer;
bufferCapacity=_STACK_BUFFER_CAPACITY;
prevStarter=src;
ccOrQCMask=_NORM_CC_MASK|qcMask;
destIndex=reorderStartIndex=0;
prevCC=0;
norm32=0;
c=0;
if(srcLength>=0) {
limit=src+srcLength;
} else {
limit=NULL;
}
U_ALIGN_CODE(16);
for(;;) {
prevSrc=src;
if(limit==NULL) {
while((c=*src)<minNoMaybe ? c!=0 : ((norm32=_getNorm32(c))&ccOrQCMask)==0) {
prevCC=0;
++src;
}
} else {
while(src!=limit && ((c=*src)<minNoMaybe || ((norm32=_getNorm32(c))&ccOrQCMask)==0)) {
prevCC=0;
++src;
}
}
if(src!=prevSrc) {
length=(int32_t)(src-prevSrc);
if((destIndex+length)<=destCapacity) {
uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR);
}
destIndex+=length;
reorderStartIndex=destIndex;
prevStarter=src-1;
if(UTF_IS_SECOND_SURROGATE(*prevStarter) && prevSrc<prevStarter && UTF_IS_FIRST_SURROGATE(*(prevStarter-1))) {
--prevStarter;
}
prevSrc=src;
}
if(limit==NULL ? c==0 : src==limit) {
break;
}
++src;
if(isNorm32HangulOrJamo(norm32)) {
prevCC=cc=0;
reorderStartIndex=destIndex;
if(
destIndex>0 &&
_composeHangul(
*(prevSrc-1), c, norm32, src, limit, (UBool)((options&_NORM_OPTIONS_COMPAT)!=0),
destIndex<=destCapacity ? dest+(destIndex-1) : 0,
nx)
) {
prevStarter=src;
continue;
}
c2=0;
length=1;
prevStarter=prevSrc;
} else {
if(isNorm32Regular(norm32)) {
c2=0;
length=1;
} else {
if(src!=limit && UTF_IS_SECOND_SURROGATE(c2=*src)) {
++src;
length=2;
norm32=_getNorm32FromSurrogatePair(norm32, c2);
} else {
c2=0;
length=1;
norm32=0;
}
}
if(nx_contains(nx, c, c2)) {
cc=0;
} else if((norm32&qcMask)==0) {
cc=(uint8_t)(norm32>>_NORM_CC_SHIFT);
} else {
const UChar *p;
uint32_t decompQCMask;
decompQCMask=(qcMask<<2)&0xf;
if(_isTrueStarter(norm32, ccOrQCMask, decompQCMask)) {
prevStarter=prevSrc;
} else {
destIndex-=(int32_t)(prevSrc-prevStarter);
}
src=_findNextStarter(src, limit, qcMask, decompQCMask, minNoMaybe);
p=_composePart(stackBuffer, buffer, bufferCapacity,
length,
prevStarter, src,
prevCC,
options, nx,
pErrorCode);
if(p==NULL) {
destIndex=0;
break;
}
if((destIndex+length)<=destCapacity) {
while(length>0) {
dest[destIndex++]=*p++;
--length;
}
} else {
destIndex+=length;
}
prevStarter=src;
continue;
}
}
if((destIndex+length)<=destCapacity) {
if(cc!=0 && cc<prevCC) {
UChar *reorderSplit=dest+destIndex;
destIndex+=length;
prevCC=_insertOrdered(dest+reorderStartIndex, reorderSplit, dest+destIndex, c, c2, cc);
} else {
dest[destIndex++]=c;
if(c2!=0) {
dest[destIndex++]=c2;
}
prevCC=cc;
}
} else {
destIndex+=length;
prevCC=cc;
}
}
if(buffer!=stackBuffer) {
uprv_free(buffer);
}
return destIndex;
}
U_CAPI int32_t U_EXPORT2
unorm_compose(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBool compat, int32_t options,
UErrorCode *pErrorCode) {
const UnicodeSet *nx;
int32_t destIndex;
if(!_haveData(*pErrorCode)) {
return 0;
}
nx=getNX(options, *pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
options&=~(_NORM_OPTIONS_SETS_MASK|_NORM_OPTIONS_COMPAT|_NORM_OPTIONS_COMPOSE_CONTIGUOUS);
if(compat) {
options|=_NORM_OPTIONS_COMPAT;
}
destIndex=_compose(dest, destCapacity,
src, srcLength,
options, nx,
pErrorCode);
return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
}
static const UChar *
_findSafeFCD(const UChar *src, const UChar *limit, uint16_t fcd16) {
UChar c, c2;
for(;;) {
if((fcd16&0xff)==0) {
break;
}
if(src==limit) {
break;
}
c=*src;
if(c<_NORM_MIN_WITH_LEAD_CC || (fcd16=_getFCD16(c))==0) {
break;
}
if(!UTF_IS_FIRST_SURROGATE(c)) {
if(fcd16<=0xff) {
break;
}
++src;
} else if((src+1)!=limit && (c2=*(src+1), UTF_IS_SECOND_SURROGATE(c2))) {
fcd16=_getFCD16FromSurrogatePair(fcd16, c2);
if(fcd16<=0xff) {
break;
}
src+=2;
} else {
break;
}
}
return src;
}
static uint8_t
_decomposeFCD(const UChar *src, const UChar *decompLimit,
UChar *dest, int32_t &destIndex, int32_t destCapacity,
const UnicodeSet *nx) {
const UChar *p;
uint32_t norm32;
int32_t reorderStartIndex, length;
UChar c, c2;
uint8_t cc, prevCC, trailCC;
reorderStartIndex=destIndex;
prevCC=0;
while(src<decompLimit) {
c=*src++;
norm32=_getNorm32(c);
if(isNorm32Regular(norm32)) {
c2=0;
length=1;
} else {
if(src!=decompLimit && UTF_IS_SECOND_SURROGATE(c2=*src)) {
++src;
length=2;
norm32=_getNorm32FromSurrogatePair(norm32, c2);
} else {
c2=0;
length=1;
norm32=0;
}
}
if(nx_contains(nx, c, c2)) {
cc=trailCC=0;
p=NULL;
} else if((norm32&_NORM_QC_NFD)==0) {
cc=trailCC=(uint8_t)(norm32>>_NORM_CC_SHIFT);
p=NULL;
} else {
p=_decompose(norm32, length, cc, trailCC);
if(length==1) {
c=*p;
c2=0;
p=NULL;
}
}
if((destIndex+length)<=destCapacity) {
UChar *reorderSplit=dest+destIndex;
if(p==NULL) {
if(cc!=0 && cc<prevCC) {
destIndex+=length;
trailCC=_insertOrdered(dest+reorderStartIndex, reorderSplit, dest+destIndex, c, c2, cc);
} else {
dest[destIndex++]=c;
if(c2!=0) {
dest[destIndex++]=c2;
}
}
} else {
if(cc!=0 && cc<prevCC) {
destIndex+=length;
trailCC=_mergeOrdered(dest+reorderStartIndex, reorderSplit, p, p+length);
} else {
do {
dest[destIndex++]=*p++;
} while(--length>0);
}
}
} else {
destIndex+=length;
}
prevCC=trailCC;
if(prevCC==0) {
reorderStartIndex=destIndex;
}
}
return prevCC;
}
static int32_t
unorm_makeFCD(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const UnicodeSet *nx,
UErrorCode *pErrorCode) {
const UChar *limit, *prevSrc, *decompStart;
int32_t destIndex, length;
UChar c, c2;
uint16_t fcd16;
int16_t prevCC, cc;
if(!_haveData(*pErrorCode)) {
return 0;
}
decompStart=src;
destIndex=0;
prevCC=0;
c=0;
fcd16=0;
if(srcLength>=0) {
limit=src+srcLength;
} else {
limit=NULL;
}
U_ALIGN_CODE(16);
for(;;) {
prevSrc=src;
if(limit==NULL) {
for(;;) {
c=*src;
if(c<_NORM_MIN_WITH_LEAD_CC) {
if(c==0) {
break;
}
prevCC=(int16_t)-c;
} else if((fcd16=_getFCD16(c))==0) {
prevCC=0;
} else {
break;
}
++src;
}
} else {
for(;;) {
if(src==limit) {
break;
} else if((c=*src)<_NORM_MIN_WITH_LEAD_CC) {
prevCC=(int16_t)-c;
} else if((fcd16=_getFCD16(c))==0) {
prevCC=0;
} else {
break;
}
++src;
}
}
if(src!=prevSrc) {
length=(int32_t)(src-prevSrc);
if((destIndex+length)<=destCapacity) {
uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR);
}
destIndex+=length;
prevSrc=src;
if(prevCC<0) {
if(!nx_contains(nx, (UChar32)-prevCC)) {
prevCC=(int16_t)(_getFCD16((UChar)-prevCC)&0xff);
} else {
prevCC=0;
}
decompStart=prevSrc-1;
}
}
if(limit==NULL ? c==0 : src==limit) {
break;
}
if(prevCC==0) {
decompStart=prevSrc;
}
++src;
if(UTF_IS_FIRST_SURROGATE(c)) {
if(src!=limit && UTF_IS_SECOND_SURROGATE(c2=*src)) {
++src;
fcd16=_getFCD16FromSurrogatePair(fcd16, c2);
} else {
c2=0;
fcd16=0;
}
} else {
c2=0;
}
if(nx_contains(nx, c, c2)) {
fcd16=0;
}
cc=(int16_t)(fcd16>>8);
if(cc==0 || cc>=prevCC) {
if(cc==0) {
decompStart=prevSrc;
}
prevCC=(int16_t)(fcd16&0xff);
length= c2==0 ? 1 : 2;
if((destIndex+length)<=destCapacity) {
dest[destIndex++]=c;
if(c2!=0) {
dest[destIndex++]=c2;
}
} else {
destIndex+=length;
}
} else {
destIndex-=(int32_t)(prevSrc-decompStart);
src=_findSafeFCD(src, limit, fcd16);
prevCC=_decomposeFCD(decompStart, src,
dest, destIndex, destCapacity,
nx);
decompStart=src;
}
}
return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
}
static UBool
unorm_checkFCD(const UChar *src, int32_t srcLength, const UnicodeSet *nx) {
const UChar *limit;
UChar c, c2;
uint16_t fcd16;
int16_t prevCC, cc;
prevCC=0;
if(srcLength>=0) {
limit=src+srcLength;
} else {
limit=NULL;
}
U_ALIGN_CODE(16);
for(;;) {
if(limit==NULL) {
for(;;) {
c=*src++;
if(c<_NORM_MIN_WITH_LEAD_CC) {
if(c==0) {
return TRUE;
}
prevCC=(int16_t)-c;
} else if((fcd16=_getFCD16(c))==0) {
prevCC=0;
} else {
break;
}
}
} else {
for(;;) {
if(src==limit) {
return TRUE;
} else if((c=*src++)<_NORM_MIN_WITH_LEAD_CC) {
prevCC=(int16_t)-c;
} else if((fcd16=_getFCD16(c))==0) {
prevCC=0;
} else {
break;
}
}
}
if(UTF_IS_FIRST_SURROGATE(c)) {
if(src!=limit && UTF_IS_SECOND_SURROGATE(c2=*src)) {
++src;
fcd16=_getFCD16FromSurrogatePair(fcd16, c2);
} else {
c2=0;
fcd16=0;
}
} else {
c2=0;
}
if(nx_contains(nx, c, c2)) {
prevCC=0;
continue;
}
cc=(int16_t)(fcd16>>8);
if(cc!=0) {
if(prevCC<0) {
if(!nx_contains(nx, (UChar32)-prevCC)) {
prevCC=(int16_t)(_getFCD16((UChar)-prevCC)&0xff);
} else {
prevCC=0;
}
}
if(cc<prevCC) {
return FALSE;
}
}
prevCC=(int16_t)(fcd16&0xff);
}
}
static UNormalizationCheckResult
_quickCheck(const UChar *src,
int32_t srcLength,
UNormalizationMode mode,
UBool allowMaybe,
const UnicodeSet *nx,
UErrorCode *pErrorCode) {
UChar stackBuffer[_STACK_BUFFER_CAPACITY];
UChar *buffer;
int32_t bufferCapacity;
const UChar *start, *limit;
uint32_t norm32, qcNorm32, ccOrQCMask, qcMask;
int32_t options;
UChar c, c2, minNoMaybe;
uint8_t cc, prevCC;
UNormalizationCheckResult result;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return UNORM_MAYBE;
}
if(src==NULL || srcLength<-1) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return UNORM_MAYBE;
}
if(!_haveData(*pErrorCode)) {
return UNORM_MAYBE;
}
switch(mode) {
case UNORM_NFC:
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE];
qcMask=_NORM_QC_NFC;
options=0;
break;
case UNORM_NFKC:
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE];
qcMask=_NORM_QC_NFKC;
options=_NORM_OPTIONS_COMPAT;
break;
case UNORM_NFD:
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE];
qcMask=_NORM_QC_NFD;
options=0;
break;
case UNORM_NFKD:
minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE];
qcMask=_NORM_QC_NFKD;
options=_NORM_OPTIONS_COMPAT;
break;
case UNORM_FCD:
if(fcdTrie.index==NULL) {
*pErrorCode=U_UNSUPPORTED_ERROR;
return UNORM_MAYBE;
}
return unorm_checkFCD(src, srcLength, nx) ? UNORM_YES : UNORM_NO;
default:
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return UNORM_MAYBE;
}
buffer=stackBuffer;
bufferCapacity=_STACK_BUFFER_CAPACITY;
ccOrQCMask=_NORM_CC_MASK|qcMask;
result=UNORM_YES;
prevCC=0;
start=src;
if(srcLength>=0) {
limit=src+srcLength;
} else {
limit=NULL;
}
U_ALIGN_CODE(16);
for(;;) {
if(limit==NULL) {
for(;;) {
c=*src++;
if(c<minNoMaybe) {
if(c==0) {
goto endloop;
}
} else if(((norm32=_getNorm32(c))&ccOrQCMask)!=0) {
break;
}
prevCC=0;
}
} else {
for(;;) {
if(src==limit) {
goto endloop;
} else if((c=*src++)>=minNoMaybe && ((norm32=_getNorm32(c))&ccOrQCMask)!=0) {
break;
}
prevCC=0;
}
}
if(isNorm32LeadSurrogate(norm32)) {
if(src!=limit && UTF_IS_SECOND_SURROGATE(c2=*src)) {
++src;
norm32=_getNorm32FromSurrogatePair(norm32, c2);
} else {
c2=0;
norm32=0;
}
} else {
c2=0;
}
if(nx_contains(nx, c, c2)) {
norm32=0;
}
cc=(uint8_t)(norm32>>_NORM_CC_SHIFT);
if(cc!=0 && cc<prevCC) {
result=UNORM_NO;
break;
}
prevCC=cc;
qcNorm32=norm32&qcMask;
if(qcNorm32&_NORM_QC_ANY_NO) {
result=UNORM_NO;
break;
} else if(qcNorm32!=0) {
if(allowMaybe) {
result=UNORM_MAYBE;
} else {
const UChar *prevStarter;
uint32_t decompQCMask;
int32_t length;
decompQCMask=(qcMask<<2)&0xf;
prevStarter=src-1;
if(UTF_IS_TRAIL(*prevStarter)) {
--prevStarter;
}
prevStarter=_findPreviousStarter(start, prevStarter, ccOrQCMask, decompQCMask, minNoMaybe);
src=_findNextStarter(src, limit, qcMask, decompQCMask, minNoMaybe);
_composePart(stackBuffer, buffer, bufferCapacity,
length,
prevStarter,
src,
prevCC,
options, nx, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
result=UNORM_MAYBE;
break;
}
if(0!=uprv_strCompare(prevStarter, (int32_t)(src-prevStarter), buffer, length, FALSE, FALSE)) {
result=UNORM_NO;
break;
}
}
}
}
endloop:
if(buffer!=stackBuffer) {
uprv_free(buffer);
}
return result;
}
U_CAPI UNormalizationCheckResult U_EXPORT2
unorm_quickCheck(const UChar *src,
int32_t srcLength,
UNormalizationMode mode,
UErrorCode *pErrorCode) {
return _quickCheck(src, srcLength, mode, TRUE, NULL, pErrorCode);
}
U_CAPI UNormalizationCheckResult U_EXPORT2
unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode) {
return _quickCheck(src, srcLength, mode, TRUE, getNX(options, *pErrorCode), pErrorCode);
}
U_CFUNC UNormalizationCheckResult
unorm_internalQuickCheck(const UChar *src,
int32_t srcLength,
UNormalizationMode mode,
UBool allowMaybe,
const UnicodeSet *nx,
UErrorCode *pErrorCode) {
return _quickCheck(src, srcLength, mode, allowMaybe, nx, pErrorCode);
}
U_CAPI UBool U_EXPORT2
unorm_isNormalized(const UChar *src, int32_t srcLength,
UNormalizationMode mode,
UErrorCode *pErrorCode) {
return (UBool)(UNORM_YES==_quickCheck(src, srcLength, mode, FALSE, NULL, pErrorCode));
}
U_CAPI UBool U_EXPORT2
unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode) {
return (UBool)(UNORM_YES==_quickCheck(src, srcLength, mode, FALSE, getNX(options, *pErrorCode), pErrorCode));
}
U_CFUNC int32_t
unorm_internalNormalizeWithNX(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options, const UnicodeSet *nx,
UErrorCode *pErrorCode) {
int32_t destLength;
uint8_t trailCC;
switch(mode) {
case UNORM_NFD:
destLength=_decompose(dest, destCapacity,
src, srcLength,
FALSE, nx, trailCC);
break;
case UNORM_NFKD:
destLength=_decompose(dest, destCapacity,
src, srcLength,
TRUE, nx, trailCC);
break;
case UNORM_NFC:
destLength=_compose(dest, destCapacity,
src, srcLength,
options, nx, pErrorCode);
break;
case UNORM_NFKC:
destLength=_compose(dest, destCapacity,
src, srcLength,
options|_NORM_OPTIONS_COMPAT, nx, pErrorCode);
break;
case UNORM_FCD:
if(fcdTrie.index==NULL) {
*pErrorCode=U_UNSUPPORTED_ERROR;
return 0;
}
return unorm_makeFCD(dest, destCapacity,
src, srcLength,
nx,
pErrorCode);
#if 0
case UNORM_FCC:
destLength=_compose(dest, destCapacity,
src, srcLength,
options|_NORM_OPTIONS_COMPOSE_CONTIGUOUS, nx, pErrorCode);
break;
#endif
case UNORM_NONE:
if(srcLength==-1) {
srcLength=u_strlen(src);
}
if(srcLength>0 && srcLength<=destCapacity) {
uprv_memcpy(dest, src, srcLength*U_SIZEOF_UCHAR);
}
destLength=srcLength;
break;
default:
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode) {
const UnicodeSet *nx;
if(!_haveData(*pErrorCode)) {
return 0;
}
nx=getNX(options, *pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
options&=~(_NORM_OPTIONS_SETS_MASK|_NORM_OPTIONS_COMPAT|_NORM_OPTIONS_COMPOSE_CONTIGUOUS);
return unorm_internalNormalizeWithNX(dest, destCapacity,
src, srcLength,
mode, options, nx,
pErrorCode);
}
U_CAPI int32_t U_EXPORT2
unorm_normalize(const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if( destCapacity<0 || (dest==NULL && destCapacity>0) ||
src==NULL || srcLength<-1
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if( dest!=NULL &&
((src>=dest && src<(dest+destCapacity)) ||
(srcLength>0 && dest>=src && dest<(src+srcLength)))
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
return unorm_internalNormalize(dest, destCapacity,
src, srcLength,
mode, options,
pErrorCode);
}
static inline uint32_t
_getPrevNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
uint32_t norm32;
c=(UChar)src.previous(&src);
c2=0;
if(c<minC) {
return 0;
} else if(!UTF_IS_SURROGATE(c)) {
return _getNorm32(c);
} else if(UTF_IS_SURROGATE_FIRST(c) || !src.hasPrevious(&src)) {
return 0;
} else if(UTF_IS_FIRST_SURROGATE(c2=(UChar)src.previous(&src))) {
norm32=_getNorm32(c2);
if((norm32&mask)==0) {
return 0;
} else {
return _getNorm32FromSurrogatePair(norm32, c);
}
} else {
src.move(&src, 1, UITER_CURRENT);
c2=0;
return 0;
}
}
typedef UBool
IsPrevBoundaryFn(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
static UBool
_isPrevNFDSafe(UCharIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
return _isNFDSafe(_getPrevNorm32(src, minC, ccOrQCMask, c, c2), ccOrQCMask, ccOrQCMask&_NORM_QC_MASK);
}
static UBool
_isPrevTrueStarter(UCharIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
uint32_t norm32, decompQCMask;
decompQCMask=(ccOrQCMask<<2)&0xf;
norm32=_getPrevNorm32(src, minC, ccOrQCMask|decompQCMask, c, c2);
return _isTrueStarter(norm32, ccOrQCMask, decompQCMask);
}
static int32_t
_findPreviousIterationBoundary(UCharIterator &src,
IsPrevBoundaryFn *isPrevBoundary, uint32_t minC, uint32_t mask,
UChar *&buffer, int32_t &bufferCapacity,
int32_t &startIndex,
UErrorCode *pErrorCode) {
UChar *stackBuffer;
UChar c, c2;
UBool isBoundary;
stackBuffer=buffer;
startIndex=bufferCapacity;
while(src.hasPrevious(&src)) {
isBoundary=isPrevBoundary(src, minC, mask, c, c2);
if(startIndex < (c2==0 ? 1 : 2)) {
int32_t bufferLength=bufferCapacity;
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
src.move(&src, 0, UITER_START);
return 0;
}
uprv_memmove(buffer+(bufferCapacity-bufferLength), buffer, bufferLength*U_SIZEOF_UCHAR);
startIndex+=bufferCapacity-bufferLength;
}
buffer[--startIndex]=c;
if(c2!=0) {
buffer[--startIndex]=c2;
}
if(isBoundary) {
break;
}
}
return bufferCapacity-startIndex;
}
U_CAPI int32_t U_EXPORT2
unorm_previous(UCharIterator *src,
UChar *dest, int32_t destCapacity,
UNormalizationMode mode, int32_t options,
UBool doNormalize, UBool *pNeededToNormalize,
UErrorCode *pErrorCode) {
UChar stackBuffer[100];
UChar *buffer=NULL;
IsPrevBoundaryFn *isPreviousBoundary=NULL;
uint32_t mask=0;
int32_t startIndex=0, bufferLength=0, bufferCapacity=0, destLength=0;
int32_t c=0, c2=0;
UChar minC=0;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if( destCapacity<0 || (dest==NULL && destCapacity>0) ||
src==NULL
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(!_haveData(*pErrorCode)) {
return 0;
}
if(pNeededToNormalize!=NULL) {
*pNeededToNormalize=FALSE;
}
switch(mode) {
case UNORM_FCD:
if(fcdTrie.index==NULL) {
*pErrorCode=U_UNSUPPORTED_ERROR;
return 0;
}
case UNORM_NFD:
isPreviousBoundary=_isPrevNFDSafe;
minC=_NORM_MIN_WITH_LEAD_CC;
mask=_NORM_CC_MASK|_NORM_QC_NFD;
break;
case UNORM_NFKD:
isPreviousBoundary=_isPrevNFDSafe;
minC=_NORM_MIN_WITH_LEAD_CC;
mask=_NORM_CC_MASK|_NORM_QC_NFKD;
break;
case UNORM_NFC:
isPreviousBoundary=_isPrevTrueStarter;
minC=(UChar)indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE];
mask=_NORM_CC_MASK|_NORM_QC_NFC;
break;
case UNORM_NFKC:
isPreviousBoundary=_isPrevTrueStarter;
minC=(UChar)indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE];
mask=_NORM_CC_MASK|_NORM_QC_NFKC;
break;
case UNORM_NONE:
destLength=0;
if((c=src->previous(src))>=0) {
destLength=1;
if(UTF_IS_TRAIL(c) && (c2=src->previous(src))>=0) {
if(UTF_IS_LEAD(c2)) {
if(destCapacity>=2) {
dest[1]=(UChar)c;
destLength=2;
}
c=c2;
} else {
src->move(src, 1, UITER_CURRENT);
}
}
if(destCapacity>0) {
dest[0]=(UChar)c;
}
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
default:
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
buffer=stackBuffer;
bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR);
bufferLength=_findPreviousIterationBoundary(*src,
isPreviousBoundary, minC, mask,
buffer, bufferCapacity,
startIndex,
pErrorCode);
if(bufferLength>0) {
if(doNormalize) {
destLength=unorm_internalNormalize(dest, destCapacity,
buffer+startIndex, bufferLength,
mode, options,
pErrorCode);
if(pNeededToNormalize!=0 && U_SUCCESS(*pErrorCode)) {
*pNeededToNormalize=
(UBool)(destLength!=bufferLength ||
0!=uprv_memcmp(dest, buffer+startIndex, destLength*U_SIZEOF_UCHAR));
}
} else {
if(destCapacity>0) {
uprv_memcpy(dest, buffer+startIndex, uprv_min(bufferLength, destCapacity)*U_SIZEOF_UCHAR);
}
destLength=u_terminateUChars(dest, destCapacity, bufferLength, pErrorCode);
}
} else {
destLength=u_terminateUChars(dest, destCapacity, 0, pErrorCode);
}
if(buffer!=stackBuffer) {
uprv_free(buffer);
}
return destLength;
}
static inline uint32_t
_getNextNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2) {
uint32_t norm32;
c=(UChar)src.next(&src);
c2=0;
if(c<minC) {
return 0;
}
norm32=_getNorm32(c);
if(UTF_IS_FIRST_SURROGATE(c)) {
if(src.hasNext(&src) && UTF_IS_SECOND_SURROGATE(c2=(UChar)src.current(&src))) {
src.move(&src, 1, UITER_CURRENT);
if((norm32&mask)==0) {
return 0;
} else {
return _getNorm32FromSurrogatePair(norm32, c2);
}
} else {
c2=0;
return 0;
}
}
return norm32;
}
typedef UBool
IsNextBoundaryFn(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar &c2);
static UBool
_isNextNFDSafe(UCharIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
return _isNFDSafe(_getNextNorm32(src, minC, ccOrQCMask, c, c2), ccOrQCMask, ccOrQCMask&_NORM_QC_MASK);
}
static UBool
_isNextTrueStarter(UCharIterator &src, uint32_t minC, uint32_t ccOrQCMask, UChar &c, UChar &c2) {
uint32_t norm32, decompQCMask;
decompQCMask=(ccOrQCMask<<2)&0xf;
norm32=_getNextNorm32(src, minC, ccOrQCMask|decompQCMask, c, c2);
return _isTrueStarter(norm32, ccOrQCMask, decompQCMask);
}
static int32_t
_findNextIterationBoundary(UCharIterator &src,
IsNextBoundaryFn *isNextBoundary, uint32_t minC, uint32_t mask,
UChar *&buffer, int32_t &bufferCapacity,
UErrorCode *pErrorCode) {
UChar *stackBuffer;
int32_t bufferIndex;
UChar c, c2;
if(!src.hasNext(&src)) {
return 0;
}
stackBuffer=buffer;
buffer[0]=c=(UChar)src.next(&src);
bufferIndex=1;
if(UTF_IS_FIRST_SURROGATE(c) && src.hasNext(&src)) {
if(UTF_IS_SECOND_SURROGATE(c2=(UChar)src.next(&src))) {
buffer[bufferIndex++]=c2;
} else {
src.move(&src, -1, UITER_CURRENT);
}
}
while(src.hasNext(&src)) {
if(isNextBoundary(src, minC, mask, c, c2)) {
src.move(&src, c2==0 ? -1 : -2, UITER_CURRENT);
break;
} else {
if(bufferIndex+(c2==0 ? 1 : 2)<=bufferCapacity ||
u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity,
2*bufferCapacity,
bufferIndex)
) {
buffer[bufferIndex++]=c;
if(c2!=0) {
buffer[bufferIndex++]=c2;
}
} else {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
src.move(&src, 0, UITER_LIMIT);
return 0;
}
}
}
return bufferIndex;
}
U_CAPI int32_t U_EXPORT2
unorm_next(UCharIterator *src,
UChar *dest, int32_t destCapacity,
UNormalizationMode mode, int32_t options,
UBool doNormalize, UBool *pNeededToNormalize,
UErrorCode *pErrorCode) {
UChar stackBuffer[100];
UChar *buffer;
IsNextBoundaryFn *isNextBoundary;
uint32_t mask;
int32_t bufferLength, bufferCapacity, destLength;
int32_t c, c2;
UChar minC;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if( destCapacity<0 || (dest==NULL && destCapacity>0) ||
src==NULL
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(!_haveData(*pErrorCode)) {
return 0;
}
if(pNeededToNormalize!=NULL) {
*pNeededToNormalize=FALSE;
}
switch(mode) {
case UNORM_FCD:
if(fcdTrie.index==NULL) {
*pErrorCode=U_UNSUPPORTED_ERROR;
return 0;
}
case UNORM_NFD:
isNextBoundary=_isNextNFDSafe;
minC=_NORM_MIN_WITH_LEAD_CC;
mask=_NORM_CC_MASK|_NORM_QC_NFD;
break;
case UNORM_NFKD:
isNextBoundary=_isNextNFDSafe;
minC=_NORM_MIN_WITH_LEAD_CC;
mask=_NORM_CC_MASK|_NORM_QC_NFKD;
break;
case UNORM_NFC:
isNextBoundary=_isNextTrueStarter;
minC=(UChar)indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE];
mask=_NORM_CC_MASK|_NORM_QC_NFC;
break;
case UNORM_NFKC:
isNextBoundary=_isNextTrueStarter;
minC=(UChar)indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE];
mask=_NORM_CC_MASK|_NORM_QC_NFKC;
break;
case UNORM_NONE:
destLength=0;
if((c=src->next(src))>=0) {
destLength=1;
if(UTF_IS_LEAD(c) && (c2=src->next(src))>=0) {
if(UTF_IS_TRAIL(c2)) {
if(destCapacity>=2) {
dest[1]=(UChar)c2;
destLength=2;
}
} else {
src->move(src, -1, UITER_CURRENT);
}
}
if(destCapacity>0) {
dest[0]=(UChar)c;
}
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
default:
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
buffer=stackBuffer;
bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR);
bufferLength=_findNextIterationBoundary(*src,
isNextBoundary, minC, mask,
buffer, bufferCapacity,
pErrorCode);
if(bufferLength>0) {
if(doNormalize) {
destLength=unorm_internalNormalize(dest, destCapacity,
buffer, bufferLength,
mode, options,
pErrorCode);
if(pNeededToNormalize!=0 && U_SUCCESS(*pErrorCode)) {
*pNeededToNormalize=
(UBool)(destLength!=bufferLength ||
0!=uprv_memcmp(dest, buffer, destLength*U_SIZEOF_UCHAR));
}
} else {
if(destCapacity>0) {
uprv_memcpy(dest, buffer, uprv_min(bufferLength, destCapacity)*U_SIZEOF_UCHAR);
}
destLength=u_terminateUChars(dest, destCapacity, bufferLength, pErrorCode);
}
} else {
destLength=u_terminateUChars(dest, destCapacity, 0, pErrorCode);
}
if(buffer!=stackBuffer) {
uprv_free(buffer);
}
return destLength;
}
U_CAPI int32_t U_EXPORT2
unorm_concatenate(const UChar *left, int32_t leftLength,
const UChar *right, int32_t rightLength,
UChar *dest, int32_t destCapacity,
UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode) {
UChar stackBuffer[100];
UChar *buffer;
int32_t bufferLength, bufferCapacity;
UCharIterator iter;
int32_t leftBoundary, rightBoundary, destLength;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if( destCapacity<0 || (dest==NULL && destCapacity>0) ||
left==NULL || leftLength<-1 ||
right==NULL || rightLength<-1
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if( dest!=NULL &&
((right>=dest && right<(dest+destCapacity)) ||
(rightLength>0 && dest>=right && dest<(right+rightLength)))
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
buffer=stackBuffer;
bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR);
uiter_setString(&iter, left, leftLength);
iter.index=leftLength=iter.length;
bufferLength=unorm_previous(&iter, buffer, bufferCapacity,
mode, options,
FALSE, NULL,
pErrorCode);
leftBoundary=iter.index;
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR;
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferLength, 0)) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return 0;
}
uprv_memcpy(buffer, left+leftBoundary, bufferLength*U_SIZEOF_UCHAR);
}
uiter_setString(&iter, right, rightLength);
rightLength=iter.length;
rightBoundary=unorm_next(&iter, buffer+bufferLength, bufferCapacity-bufferLength,
mode, options,
FALSE, NULL,
pErrorCode);
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR;
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, bufferLength+rightBoundary, 0)) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return 0;
}
uprv_memcpy(buffer+bufferLength, right, rightBoundary*U_SIZEOF_UCHAR);
}
bufferLength+=rightBoundary;
if(left!=dest && leftBoundary>0 && destCapacity>0) {
uprv_memcpy(dest, left, uprv_min(leftBoundary, destCapacity)*U_SIZEOF_UCHAR);
}
destLength=leftBoundary;
if(destCapacity>destLength) {
destLength+=unorm_internalNormalize(dest+destLength, destCapacity-destLength,
buffer, bufferLength,
mode, options,
pErrorCode);
} else {
destLength+=unorm_internalNormalize(NULL, 0,
buffer, bufferLength,
mode, options,
pErrorCode);
}
right+=rightBoundary;
rightLength-=rightBoundary;
if(rightLength>0 && destCapacity>destLength) {
uprv_memcpy(dest+destLength, right, uprv_min(rightLength, destCapacity-destLength)*U_SIZEOF_UCHAR);
}
destLength+=rightLength;
if(buffer!=stackBuffer) {
uprv_free(buffer);
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
#endif