#include "unicode/utypes.h"
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/rep.h"
#include "unicode/unifilt.h"
#include "unicode/uchar.h"
#include "hextouni.h"
U_NAMESPACE_BEGIN
const char HexToUnicodeTransliterator::fgClassID=0;
const char HexToUnicodeTransliterator::_ID[] = "Hex-Any";
const UChar HexToUnicodeTransliterator::DEFAULT_PATTERN[] = {
0x5C, 0x5C, 0x75, 0x30, 0x30, 0x30, 0x30, 0x3B,
0x5C, 0x5C, 0x55, 0x30, 0x30, 0x30, 0x30, 0x3B,
0x75, 0x2B, 0x30, 0x30, 0x30, 0x30, 0x3B,
0x55, 0x2B, 0x30, 0x30, 0x30, 0x30, 0
};
static const UChar gQuadA[] = {
0x41, 0x41, 0x41, 0x41, 0
};
HexToUnicodeTransliterator::HexToUnicodeTransliterator(UnicodeFilter* adoptedFilter) :
Transliterator(_ID, adoptedFilter) {
UErrorCode status = U_ZERO_ERROR;
applyPattern(DEFAULT_PATTERN, status);
}
HexToUnicodeTransliterator::HexToUnicodeTransliterator(const UnicodeString& thePattern,
UErrorCode& status) :
Transliterator(_ID, 0) {
applyPattern(thePattern, status);
}
HexToUnicodeTransliterator::HexToUnicodeTransliterator(const UnicodeString& thePattern,
UnicodeFilter* adoptedFilter,
UErrorCode& status) :
Transliterator(_ID, adoptedFilter) {
applyPattern(thePattern, status);
}
HexToUnicodeTransliterator::HexToUnicodeTransliterator(const HexToUnicodeTransliterator& o) :
Transliterator(o),
pattern(o.pattern),
affixes(o.affixes),
affixCount(o.affixCount) {
}
HexToUnicodeTransliterator& HexToUnicodeTransliterator::operator=(
const HexToUnicodeTransliterator& o) {
Transliterator::operator=(o);
pattern = o.pattern;
affixes = o.affixes;
affixCount = o.affixCount;
return *this;
}
Transliterator* HexToUnicodeTransliterator::clone(void) const {
return new HexToUnicodeTransliterator(*this);
}
void HexToUnicodeTransliterator::applyPattern(const UnicodeString& thePattern,
UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
pattern = thePattern;
affixes.truncate(0);
affixCount = 0;
int32_t mode = 0;
int32_t prefixLen = 0, suffixLen = 0, minDigits = 0, maxDigits = 0;
int32_t start = 0;
UChar c = 0; UBool isLiteral = FALSE; for (int32_t i=0; i<=pattern.length(); ++i) {
if (i == pattern.length()) {
if (i > 0 && !(c == SEMICOLON && !isLiteral)) {
c = SEMICOLON;
isLiteral = FALSE;
} else {
break;
}
} else {
c = pattern.charAt(i);
isLiteral = FALSE;
}
if (c == BACKSLASH) {
if ((i+1)<pattern.length()) {
isLiteral = TRUE;
c = pattern.charAt(++i);
} else {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
}
if (!isLiteral) {
switch (c) {
case POUND:
if (mode == 0) {
++mode;
} else if (mode != 1) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
++maxDigits;
break;
case ZERO:
if (mode < 2) {
mode = 2;
} else if (mode != 2) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
++minDigits;
++maxDigits;
break;
case SEMICOLON:
if (minDigits < 1 || maxDigits > 4
|| prefixLen > 0xFFFF || suffixLen > 0xFFFF) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (start == affixes.length()) {
affixes.append(gQuadA);
}
affixes.setCharAt(start++, (UChar) prefixLen);
affixes.setCharAt(start++, (UChar) suffixLen);
affixes.setCharAt(start++, (UChar) minDigits);
affixes.setCharAt(start++, (UChar) maxDigits);
start = affixes.length();
++affixCount;
prefixLen = suffixLen = minDigits = maxDigits = mode = 0;
break;
default:
isLiteral = TRUE;
break;
}
}
if (isLiteral) {
if (start == affixes.length()) {
affixes.append(gQuadA);
}
affixes.append(c);
if (mode == 0) {
++prefixLen;
} else {
mode = 3;
++suffixLen;
}
}
}
}
const UnicodeString& HexToUnicodeTransliterator::toPattern(void) const {
return pattern;
}
void HexToUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
UBool isIncremental) const {
int32_t cursor = offsets.start;
int32_t limit = offsets.limit;
int32_t i, j, ipat;
while (cursor < limit) {
for (j=0, ipat=0; j<affixCount; ++j) {
int32_t prefixLen = affixes.charAt(ipat++);
int32_t suffixLen = affixes.charAt(ipat++);
int32_t minDigits = affixes.charAt(ipat++);
int32_t maxDigits = affixes.charAt(ipat++);
int32_t curs = cursor;
UBool match = TRUE;
for (i=0; i<prefixLen; ++i) {
if (curs >= limit) {
if (i > 0) {
if (isIncremental) {
goto exit;
}
match = FALSE;
break;
}
}
UChar c = text.charAt(curs++);
if (c != affixes.charAt(ipat + i)) {
match = FALSE;
break;
}
}
if (match) {
UChar u = 0;
int32_t digitCount = 0;
for (;;) {
if (curs >= limit) {
if (curs > cursor && isIncremental) {
goto exit;
}
break;
}
int32_t digit = u_digit(text.charAt(curs), 16);
if (digit < 0) {
break;
}
++curs;
u <<= 4;
u |= digit;
if (++digitCount == maxDigits) {
break;
}
}
match = (digitCount >= minDigits);
if (match) {
for (i=0; i<suffixLen; ++i) {
if (curs >= limit) {
if (curs > cursor && isIncremental) {
goto exit;
}
match = FALSE;
break;
}
UChar c = text.charAt(curs++);
if (c != affixes.charAt(ipat + prefixLen + i)) {
match = FALSE;
break;
}
}
if (match) {
UnicodeString str(u);
text.handleReplaceBetween(cursor, curs, str);
limit -= curs - cursor - 1;
break;
}
}
}
ipat += prefixLen + suffixLen;
}
++cursor;
}
exit:
offsets.contextLimit += limit - offsets.limit;
offsets.limit = limit;
offsets.start = cursor;
}
U_NAMESPACE_END
#endif