#include "unicode/putil.h"
#include "unicode/ustring.h"
#include "cstring.h"
#include "cmemory.h"
#include "ustr_imp.h"
U_CAPI UChar* U_EXPORT2
u_strFromUTF32(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar32 *src,
int32_t srcLength,
UErrorCode *pErrorCode)
{
int32_t reqLength = 0;
uint32_t ch =0;
UChar *pDestLimit =dest+destCapacity;
UChar *pDest = dest;
const uint32_t *pSrc = (const uint32_t *)src;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
return NULL;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if(srcLength == -1 ){
while(((ch=*pSrc)!=0) && (pDest < pDestLimit)){
++pSrc;
if(ch<=0xFFFF){
*(pDest++)=(UChar)ch;
}else if(ch<=0x10ffff){
*(pDest++)=UTF16_LEAD(ch);
if(pDest<pDestLimit){
*(pDest++)=UTF16_TRAIL(ch);
}else{
reqLength++;
break;
}
}else{
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
}
while((ch=*pSrc++) != 0){
reqLength+=UTF_CHAR_LENGTH(ch);
}
}else{
const uint32_t* pSrcLimit = ((const uint32_t*)pSrc) + srcLength;
while((pSrc < pSrcLimit) && (pDest < pDestLimit)){
ch = *pSrc++;
if(ch<=0xFFFF){
*(pDest++)=(UChar)ch;
}else if(ch<=0x10FFFF){
*(pDest++)=UTF16_LEAD(ch);
if(pDest<pDestLimit){
*(pDest++)=UTF16_TRAIL(ch);
}else{
reqLength++;
break;
}
}else{
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
}
while(pSrc <pSrcLimit){
ch = *pSrc++;
reqLength+=UTF_CHAR_LENGTH(ch);
}
}
reqLength += (int32_t)(pDest - dest);
if(pDestLength){
*pDestLength = reqLength;
}
u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
return dest;
}
U_CAPI UChar32* U_EXPORT2
u_strToUTF32(UChar32 *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *src,
int32_t srcLength,
UErrorCode *pErrorCode)
{
const UChar* pSrc = src;
const UChar* pSrcLimit;
int32_t reqLength=0;
uint32_t ch=0;
uint32_t *pDest = (uint32_t *)dest;
uint32_t *pDestLimit = pDest + destCapacity;
UChar ch2=0;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
return NULL;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if(srcLength==-1) {
while((ch=*pSrc)!=0 && pDest!=pDestLimit) {
++pSrc;
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
}
*(pDest++)= ch;
}
while((ch=*pSrc++)!=0) {
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
}
++reqLength;
}
} else {
pSrcLimit = pSrc+srcLength;
while(pSrc<pSrcLimit && pDest<pDestLimit) {
ch=*pSrc++;
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
}
*(pDest++)= ch;
}
while(pSrc!=pSrcLimit) {
ch=*pSrc++;
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
}
++reqLength;
}
}
reqLength+=(int32_t)(pDest - (uint32_t *)dest);
if(pDestLength){
*pDestLength = reqLength;
}
u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode);
return dest;
}
static const UChar32
utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
static UChar32
utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
const uint8_t *s=*ps;
uint8_t trail, illegal=0;
uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
UTF8_MASK_LEAD_BYTE((c), count);
switch(count) {
case 5:
case 4:
illegal=1;
break;
case 3:
trail=(uint8_t)(*s++ - 0x80);
c=(c<<6)|trail;
if(trail>0x3f || c>=0x110) {
illegal=1;
break;
}
case 2:
trail=(uint8_t)(*s++ - 0x80);
if(trail>0x3f) {
illegal=1;
break;
}
c=(c<<6)|trail;
case 1:
trail=(uint8_t)(*s++ - 0x80);
if(trail>0x3f) {
illegal=1;
}
c=(c<<6)|trail;
break;
case 0:
return U_SENTINEL;
}
if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
s=*ps;
while(count>0 && UTF8_IS_TRAIL(*s)) {
++s;
--count;
}
c=U_SENTINEL;
}
*ps=s;
return c;
}
static UChar32
utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) {
const uint8_t *s=*ps;
uint8_t trail, illegal=0;
uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
if((limit-s)>=count) {
UTF8_MASK_LEAD_BYTE((c), count);
switch(count) {
case 5:
case 4:
illegal=1;
break;
case 3:
trail=*s++;
c=(c<<6)|(trail&0x3f);
if(c<0x110) {
illegal|=(trail&0xc0)^0x80;
} else {
illegal=1;
break;
}
case 2:
trail=*s++;
c=(c<<6)|(trail&0x3f);
illegal|=(trail&0xc0)^0x80;
case 1:
trail=*s++;
c=(c<<6)|(trail&0x3f);
illegal|=(trail&0xc0)^0x80;
break;
case 0:
return U_SENTINEL;
}
} else {
illegal=1;
}
if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
s=*ps;
while(count>0 && s<limit && UTF8_IS_TRAIL(*s)) {
++s;
--count;
}
c=U_SENTINEL;
}
*ps=s;
return c;
}
U_CAPI UChar* U_EXPORT2
u_strFromUTF8WithSub(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const char* src,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode){
UChar *pDest = dest;
UChar *pDestLimit = dest+destCapacity;
UChar32 ch;
int32_t reqLength = 0;
const uint8_t* pSrc = (const uint8_t*) src;
uint8_t t1, t2;
int32_t numSubstitutions;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
return NULL;
}
if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
subchar > 0x10ffff || U_IS_SURROGATE(subchar)
) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
numSubstitutions=0;
if(srcLength < 0){
while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
if(ch <= 0x7f){
*pDest++=(UChar)ch;
++pSrc;
} else {
if(ch > 0xe0) {
if(
ch <= 0xec &&
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
(t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
) {
*pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
pSrc += 3;
continue;
}
} else if(ch < 0xe0) {
if(
ch >= 0xc2 &&
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
) {
*pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
pSrc += 2;
continue;
}
}
++pSrc;
ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
} else if(ch<=0xFFFF) {
*(pDest++)=(UChar)ch;
} else {
*(pDest++)=UTF16_LEAD(ch);
if(pDest<pDestLimit) {
*(pDest++)=UTF16_TRAIL(ch);
} else {
reqLength++;
break;
}
}
}
}
while((ch = *pSrc) != 0) {
if(ch <= 0x7f){
++reqLength;
++pSrc;
} else {
if(ch > 0xe0) {
if(
ch <= 0xec &&
(uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
(uint8_t)(pSrc[2] - 0x80) <= 0x3f
) {
++reqLength;
pSrc += 3;
continue;
}
} else if(ch < 0xe0) {
if(
ch >= 0xc2 &&
(uint8_t)(pSrc[1] - 0x80) <= 0x3f
) {
++reqLength;
pSrc += 2;
continue;
}
}
++pSrc;
ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
reqLength += U16_LENGTH(ch);
}
}
} else {
const uint8_t *pSrcLimit = pSrc + srcLength;
int32_t count;
for(;;) {
count = (int32_t)(pDestLimit - pDest);
srcLength = (int32_t)((pSrcLimit - pSrc) / 3);
if(count > srcLength) {
count = srcLength;
}
if(count < 3) {
break;
}
do {
ch = *pSrc;
if(ch <= 0x7f){
*pDest++=(UChar)ch;
++pSrc;
} else {
if(ch > 0xe0) {
if(
ch <= 0xec &&
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
(t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
) {
*pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
pSrc += 3;
continue;
}
} else if(ch < 0xe0) {
if(
ch >= 0xc2 &&
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
) {
*pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
pSrc += 2;
continue;
}
}
if(ch >= 0xf0 || subchar > 0xffff) {
if(--count == 0) {
break;
}
}
++pSrc;
ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}else if(ch<=0xFFFF){
*(pDest++)=(UChar)ch;
}else{
*(pDest++)=UTF16_LEAD(ch);
if(pDest<pDestLimit){
*(pDest++)=UTF16_TRAIL(ch);
}else{
reqLength++;
break;
}
}
}
} while(--count > 0);
}
while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
ch = *pSrc;
if(ch <= 0x7f){
*pDest++=(UChar)ch;
++pSrc;
} else {
if(ch > 0xe0) {
if(
ch <= 0xec &&
((pSrcLimit - pSrc) >= 3) &&
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
(t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
) {
*pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
pSrc += 3;
continue;
}
} else if(ch < 0xe0) {
if(
ch >= 0xc2 &&
((pSrcLimit - pSrc) >= 2) &&
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
) {
*pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
pSrc += 2;
continue;
}
}
++pSrc;
ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}else if(ch<=0xFFFF){
*(pDest++)=(UChar)ch;
}else{
*(pDest++)=UTF16_LEAD(ch);
if(pDest<pDestLimit){
*(pDest++)=UTF16_TRAIL(ch);
}else{
reqLength++;
break;
}
}
}
}
while(pSrc < pSrcLimit){
ch = *pSrc;
if(ch <= 0x7f){
reqLength++;
++pSrc;
} else {
if(ch > 0xe0) {
if(
ch <= 0xec &&
((pSrcLimit - pSrc) >= 3) &&
(uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
(uint8_t)(pSrc[2] - 0x80) <= 0x3f
) {
reqLength++;
pSrc += 3;
continue;
}
} else if(ch < 0xe0) {
if(
ch >= 0xc2 &&
((pSrcLimit - pSrc) >= 2) &&
(uint8_t)(pSrc[1] - 0x80) <= 0x3f
) {
reqLength++;
pSrc += 2;
continue;
}
}
++pSrc;
ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
reqLength+=UTF_CHAR_LENGTH(ch);
}
}
}
reqLength+=(int32_t)(pDest - dest);
if(pNumSubstitutions!=NULL) {
*pNumSubstitutions=numSubstitutions;
}
if(pDestLength){
*pDestLength = reqLength;
}
u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
return dest;
}
U_CAPI UChar* U_EXPORT2
u_strFromUTF8(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const char* src,
int32_t srcLength,
UErrorCode *pErrorCode){
return u_strFromUTF8WithSub(
dest, destCapacity, pDestLength,
src, srcLength,
U_SENTINEL, NULL,
pErrorCode);
}
U_CAPI UChar * U_EXPORT2
u_strFromUTF8Lenient(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const char *src,
int32_t srcLength,
UErrorCode *pErrorCode) {
UChar *pDest = dest;
UChar32 ch;
int32_t reqLength = 0;
uint8_t* pSrc = (uint8_t*) src;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
return NULL;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if(srcLength < 0) {
UChar *pDestLimit = dest+destCapacity;
uint8_t t1, t2, t3;
while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
if(ch < 0xc0) {
*pDest++=(UChar)ch;
++pSrc;
continue;
} else if(ch < 0xe0) {
if((t1 = pSrc[1]) != 0) {
*pDest++ = (UChar)((ch << 6) + t1 - 0x3080);
pSrc += 2;
continue;
}
} else if(ch < 0xf0) {
if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) {
*pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080);
pSrc += 3;
continue;
}
} else {
if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) {
pSrc += 4;
ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080;
*(pDest++) = U16_LEAD(ch);
if(pDest < pDestLimit) {
*(pDest++) = U16_TRAIL(ch);
} else {
reqLength = 1;
break;
}
continue;
}
}
*pDest++ = 0xfffd;
while(*++pSrc != 0) {}
break;
}
while((ch = *pSrc) != 0) {
if(ch < 0xc0) {
++reqLength;
++pSrc;
continue;
} else if(ch < 0xe0) {
if(pSrc[1] != 0) {
++reqLength;
pSrc += 2;
continue;
}
} else if(ch < 0xf0) {
if(pSrc[1] != 0 && pSrc[2] != 0) {
++reqLength;
pSrc += 3;
continue;
}
} else {
if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) {
reqLength += 2;
pSrc += 4;
continue;
}
}
++reqLength;
break;
}
} else {
const uint8_t *pSrcLimit = pSrc + srcLength;
if(destCapacity < srcLength) {
if(pDestLength != NULL) {
*pDestLength = srcLength;
}
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
return NULL;
}
if((pSrcLimit - pSrc) >= 4) {
pSrcLimit -= 3;
do {
ch = *pSrc++;
if(ch < 0xc0) {
*pDest++=(UChar)ch;
} else if(ch < 0xe0) {
*pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
} else if(ch < 0xf0) {
ch = (ch << 12) + (*pSrc++ << 6);
*pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
} else {
ch = (ch << 18) + (*pSrc++ << 12);
ch += *pSrc++ << 6;
ch += *pSrc++ - 0x3c82080;
*(pDest++) = U16_LEAD(ch);
*(pDest++) = U16_TRAIL(ch);
}
} while(pSrc < pSrcLimit);
pSrcLimit += 3;
}
while(pSrc < pSrcLimit) {
ch = *pSrc++;
if(ch < 0xc0) {
*pDest++=(UChar)ch;
continue;
} else if(ch < 0xe0) {
if(pSrc < pSrcLimit) {
*pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
continue;
}
} else if(ch < 0xf0) {
if((pSrcLimit - pSrc) >= 2) {
ch = (ch << 12) + (*pSrc++ << 6);
*pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
pSrc += 3;
continue;
}
} else {
if((pSrcLimit - pSrc) >= 3) {
ch = (ch << 18) + (*pSrc++ << 12);
ch += *pSrc++ << 6;
ch += *pSrc++ - 0x3c82080;
*(pDest++) = U16_LEAD(ch);
*(pDest++) = U16_TRAIL(ch);
pSrc += 4;
continue;
}
}
*pDest++ = 0xfffd;
break;
}
}
reqLength+=(int32_t)(pDest - dest);
if(pDestLength){
*pDestLength = reqLength;
}
u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
return dest;
}
static U_INLINE uint8_t *
_appendUTF8(uint8_t *pDest, UChar32 c) {
if((c)<=0x7f) {
*pDest++=(uint8_t)c;
} else if(c<=0x7ff) {
*pDest++=(uint8_t)((c>>6)|0xc0);
*pDest++=(uint8_t)((c&0x3f)|0x80);
} else if(c<=0xffff) {
*pDest++=(uint8_t)((c>>12)|0xe0);
*pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
*pDest++=(uint8_t)(((c)&0x3f)|0x80);
} else {
*pDest++=(uint8_t)(((c)>>18)|0xf0);
*pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
*pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
*pDest++=(uint8_t)(((c)&0x3f)|0x80);
}
return pDest;
}
U_CAPI char* U_EXPORT2
u_strToUTF8WithSub(char *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *pSrc,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode){
int32_t reqLength=0;
uint32_t ch=0,ch2=0;
uint8_t *pDest = (uint8_t *)dest;
uint8_t *pDestLimit = pDest + destCapacity;
int32_t numSubstitutions;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
return NULL;
}
if( (pSrc==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
subchar > 0x10ffff || U_IS_SURROGATE(subchar)
) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
numSubstitutions=0;
if(srcLength==-1) {
while((ch=*pSrc)!=0) {
++pSrc;
if(ch <= 0x7f) {
if(pDest<pDestLimit) {
*pDest++ = (char)ch;
} else {
reqLength = 1;
break;
}
} else if(ch <= 0x7ff) {
if((pDestLimit - pDest) >= 2) {
*pDest++=(uint8_t)((ch>>6)|0xc0);
*pDest++=(uint8_t)((ch&0x3f)|0x80);
} else {
reqLength = 2;
break;
}
} else if(ch <= 0xd7ff || ch >= 0xe000) {
if((pDestLimit - pDest) >= 3) {
*pDest++=(uint8_t)((ch>>12)|0xe0);
*pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
*pDest++=(uint8_t)((ch&0x3f)|0x80);
} else {
reqLength = 3;
break;
}
} else {
int32_t length;
if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
} else if(subchar>=0) {
ch=subchar;
++numSubstitutions;
} else {
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
length = U8_LENGTH(ch);
if((pDestLimit - pDest) >= length) {
pDest=_appendUTF8(pDest, ch);
} else {
reqLength = length;
break;
}
}
}
while((ch=*pSrc++)!=0) {
if(ch<=0x7f) {
++reqLength;
} else if(ch<=0x7ff) {
reqLength+=2;
} else if(!UTF_IS_SURROGATE(ch)) {
reqLength+=3;
} else if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
reqLength+=4;
} else if(subchar>=0) {
reqLength+=U8_LENGTH(subchar);
++numSubstitutions;
} else {
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
}
} else {
const UChar *pSrcLimit = pSrc+srcLength;
int32_t count;
for(;;) {
count = (int32_t)((pDestLimit - pDest) / 3);
srcLength = (int32_t)(pSrcLimit - pSrc);
if(count > srcLength) {
count = srcLength;
}
if(count < 3) {
break;
}
do {
ch=*pSrc++;
if(ch <= 0x7f) {
*pDest++ = (char)ch;
} else if(ch <= 0x7ff) {
*pDest++=(uint8_t)((ch>>6)|0xc0);
*pDest++=(uint8_t)((ch&0x3f)|0x80);
} else if(ch <= 0xd7ff || ch >= 0xe000) {
*pDest++=(uint8_t)((ch>>12)|0xe0);
*pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
*pDest++=(uint8_t)((ch&0x3f)|0x80);
} else {
if(--count == 0) {
--pSrc;
break;
}
if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
*pDest++=(uint8_t)((ch>>18)|0xf0);
*pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80);
*pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
*pDest++=(uint8_t)((ch&0x3f)|0x80);
} else {
if(subchar>=0) {
ch=subchar;
++numSubstitutions;
} else {
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
pDest=_appendUTF8(pDest, ch);
}
}
} while(--count > 0);
}
while(pSrc<pSrcLimit) {
ch=*pSrc++;
if(ch <= 0x7f) {
if(pDest<pDestLimit) {
*pDest++ = (char)ch;
} else {
reqLength = 1;
break;
}
} else if(ch <= 0x7ff) {
if((pDestLimit - pDest) >= 2) {
*pDest++=(uint8_t)((ch>>6)|0xc0);
*pDest++=(uint8_t)((ch&0x3f)|0x80);
} else {
reqLength = 2;
break;
}
} else if(ch <= 0xd7ff || ch >= 0xe000) {
if((pDestLimit - pDest) >= 3) {
*pDest++=(uint8_t)((ch>>12)|0xe0);
*pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
*pDest++=(uint8_t)((ch&0x3f)|0x80);
} else {
reqLength = 3;
break;
}
} else {
int32_t length;
if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
} else if(subchar>=0) {
ch=subchar;
++numSubstitutions;
} else {
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
length = U8_LENGTH(ch);
if((pDestLimit - pDest) >= length) {
pDest=_appendUTF8(pDest, ch);
} else {
reqLength = length;
break;
}
}
}
while(pSrc<pSrcLimit) {
ch=*pSrc++;
if(ch<=0x7f) {
++reqLength;
} else if(ch<=0x7ff) {
reqLength+=2;
} else if(!UTF_IS_SURROGATE(ch)) {
reqLength+=3;
} else if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
reqLength+=4;
} else if(subchar>=0) {
reqLength+=U8_LENGTH(subchar);
++numSubstitutions;
} else {
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
}
}
reqLength+=(int32_t)(pDest - (uint8_t *)dest);
if(pNumSubstitutions!=NULL) {
*pNumSubstitutions=numSubstitutions;
}
if(pDestLength){
*pDestLength = reqLength;
}
u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode);
return (char*)dest;
}
U_CAPI char* U_EXPORT2
u_strToUTF8(char *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *pSrc,
int32_t srcLength,
UErrorCode *pErrorCode){
return u_strToUTF8WithSub(
dest, destCapacity, pDestLength,
pSrc, srcLength,
U_SENTINEL, NULL,
pErrorCode);
}