#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include <stdio.h>
#include <stdlib.h>
#include "unicode/errorcode.h"
#include "unicode/unistr.h"
#include "unicode/utf16.h"
#include "extradata.h"
#include "normalizer2impl.h"
#include "norms.h"
#include "toolutil.h"
#include "utrie2.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
ExtraData::ExtraData(Norms &n, UBool fast) :
Norms::Enumerator(n),
yesYesCompositions(1000, (UChar32)0xffff, 2), yesNoMappingsAndCompositions(1000, (UChar32)0, 1), yesNoMappingsOnly(1000, (UChar32)0, 1), optimizeFast(fast) {
yesNoMappingsAndCompositions.setCharAt(0, 2);
yesNoMappingsOnly.setCharAt(0, 3);
}
int32_t ExtraData::writeMapping(UChar32 c, const Norm &norm, UnicodeString &dataString) {
UnicodeString &m=*norm.mapping;
int32_t length=m.length();
int32_t firstUnit=length|(norm.trailCC<<8);
int32_t preMappingLength=0;
if(norm.rawMapping!=NULL) {
UnicodeString &rm=*norm.rawMapping;
int32_t rmLength=rm.length();
if(rmLength>Normalizer2Impl::MAPPING_LENGTH_MASK) {
fprintf(stderr,
"gennorm2 error: "
"raw mapping for U+%04lX longer than maximum of %d\n",
(long)c, Normalizer2Impl::MAPPING_LENGTH_MASK);
exit(U_INVALID_FORMAT_ERROR);
}
UChar rm0=rm.charAt(0);
if( rmLength==length-1 &&
0==rm.compare(1, 99, m, 2, 99) &&
rm0>Normalizer2Impl::MAPPING_LENGTH_MASK
) {
dataString.append(rm0);
preMappingLength=1;
} else {
dataString.append(rm);
dataString.append((UChar)rmLength);
preMappingLength=rmLength+1;
}
firstUnit|=Normalizer2Impl::MAPPING_HAS_RAW_MAPPING;
}
int32_t cccLccc=norm.cc|(norm.leadCC<<8);
if(cccLccc!=0) {
dataString.append((UChar)cccLccc);
++preMappingLength;
firstUnit|=Normalizer2Impl::MAPPING_HAS_CCC_LCCC_WORD;
}
dataString.append((UChar)firstUnit);
dataString.append(m);
return preMappingLength;
}
int32_t ExtraData::writeNoNoMapping(UChar32 c, const Norm &norm,
UnicodeString &dataString,
Hashtable &previousMappings) {
UnicodeString newMapping;
int32_t offset=writeMapping(c, norm, newMapping);
int32_t previousOffset=previousMappings.geti(newMapping);
if(previousOffset!=0) {
offset=previousOffset-1;
} else {
offset=dataString.length()+offset;
dataString.append(newMapping);
IcuToolErrorCode errorCode("gennorm2/writeExtraData()/Hashtable.puti()");
previousMappings.puti(newMapping, offset+1, errorCode);
}
return offset;
}
UBool ExtraData::setNoNoDelta(UChar32 c, Norm &norm) const {
if(norm.mappingCP>=0 &&
!(c<=0x7f && norm.mappingCP>0x7f) &&
norms.getNormRef(norm.mappingCP).type<Norm::NO_NO_COMP_YES) {
int32_t delta=norm.mappingCP-c;
if(-Normalizer2Impl::MAX_DELTA<=delta && delta<=Normalizer2Impl::MAX_DELTA) {
norm.type=Norm::NO_NO_DELTA;
norm.offset=delta;
return TRUE;
}
}
return FALSE;
}
void ExtraData::writeCompositions(UChar32 c, const Norm &norm, UnicodeString &dataString) {
if(norm.cc!=0) {
fprintf(stderr,
"gennorm2 error: "
"U+%04lX combines-forward and has ccc!=0, not possible in Unicode normalization\n",
(long)c);
exit(U_INVALID_FORMAT_ERROR);
}
int32_t length;
const CompositionPair *pairs=norm.getCompositionPairs(length);
for(int32_t i=0; i<length; ++i) {
const CompositionPair &pair=pairs[i];
UChar32 compositeAndFwd=pair.composite<<1;
if(norms.getNormRef(pair.composite).compositions!=NULL) {
compositeAndFwd|=1; }
int32_t firstUnit, secondUnit, thirdUnit;
if(pair.trail<Normalizer2Impl::COMP_1_TRAIL_LIMIT) {
if(compositeAndFwd<=0xffff) {
firstUnit=pair.trail<<1;
secondUnit=compositeAndFwd;
thirdUnit=-1;
} else {
firstUnit=(pair.trail<<1)|Normalizer2Impl::COMP_1_TRIPLE;
secondUnit=compositeAndFwd>>16;
thirdUnit=compositeAndFwd;
}
} else {
firstUnit=(Normalizer2Impl::COMP_1_TRAIL_LIMIT+
(pair.trail>>Normalizer2Impl::COMP_1_TRAIL_SHIFT))|
Normalizer2Impl::COMP_1_TRIPLE;
secondUnit=(pair.trail<<Normalizer2Impl::COMP_2_TRAIL_SHIFT)|
(compositeAndFwd>>16);
thirdUnit=compositeAndFwd;
}
if(i==(length-1)) {
firstUnit|=Normalizer2Impl::COMP_1_LAST_TUPLE;
}
dataString.append((UChar)firstUnit).append((UChar)secondUnit);
if(thirdUnit>=0) {
dataString.append((UChar)thirdUnit);
}
}
}
void ExtraData::rangeHandler(UChar32 start, UChar32 end, Norm &norm) {
if(start!=end) {
fprintf(stderr,
"gennorm2 error: unexpected shared data for "
"multiple code points U+%04lX..U+%04lX\n",
(long)start, (long)end);
exit(U_INTERNAL_PROGRAM_ERROR);
}
if(norm.error!=nullptr) {
fprintf(stderr, "gennorm2 error: U+%04lX %s\n", (long)start, norm.error);
exit(U_INVALID_FORMAT_ERROR);
}
writeExtraData(start, norm);
}
#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
#pragma optimize( "", off )
#endif
void ExtraData::writeExtraData(UChar32 c, Norm &norm) {
switch(norm.type) {
case Norm::INERT:
break; case Norm::YES_YES_COMBINES_FWD:
norm.offset=yesYesCompositions.length();
writeCompositions(c, norm, yesYesCompositions);
break;
case Norm::YES_NO_COMBINES_FWD:
norm.offset=yesNoMappingsAndCompositions.length()+
writeMapping(c, norm, yesNoMappingsAndCompositions);
writeCompositions(c, norm, yesNoMappingsAndCompositions);
break;
case Norm::YES_NO_MAPPING_ONLY:
norm.offset=yesNoMappingsOnly.length()+
writeMapping(c, norm, yesNoMappingsOnly);
break;
case Norm::NO_NO_COMP_YES:
if(!optimizeFast && setNoNoDelta(c, norm)) {
break;
}
norm.offset=writeNoNoMapping(c, norm, noNoMappingsCompYes, previousNoNoMappingsCompYes);
break;
case Norm::NO_NO_COMP_BOUNDARY_BEFORE:
if(!optimizeFast && setNoNoDelta(c, norm)) {
break;
}
norm.offset=writeNoNoMapping(
c, norm, noNoMappingsCompBoundaryBefore, previousNoNoMappingsCompBoundaryBefore);
break;
case Norm::NO_NO_COMP_NO_MAYBE_CC:
norm.offset=writeNoNoMapping(
c, norm, noNoMappingsCompNoMaybeCC, previousNoNoMappingsCompNoMaybeCC);
break;
case Norm::NO_NO_EMPTY:
norm.offset=writeNoNoMapping(c, norm, noNoMappingsEmpty, previousNoNoMappingsEmpty);
break;
case Norm::MAYBE_YES_COMBINES_FWD:
norm.offset=maybeYesCompositions.length();
writeCompositions(c, norm, maybeYesCompositions);
break;
case Norm::MAYBE_YES_SIMPLE:
break; case Norm::YES_YES_WITH_CC:
break; default: exit(U_INTERNAL_PROGRAM_ERROR);
}
}
#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
#pragma optimize( "", on )
#endif
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_NORMALIZATION