CFStringEncodings.c [plain text]
#include "CFInternal.h"
#include <CoreFoundation/CFString.h>
#include <CoreFoundation/CFByteOrder.h>
#include "CFUtilities.h"
#include <string.h>
#include "CFStringEncodingConverterExt.h"
#include "CFUniChar.h"
#include "CFUnicodeDecomposition.h"
static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
__CFWantsToUseASCIICompatibleConversion = false;
}
return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
}
void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
__CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
}
Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
UniChar __CFCharToUniCharTable[256] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
};
void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
if (__CFCharToUniCharFunc != func) {
int ch;
__CFCharToUniCharFunc = func;
if (func) {
for (ch = 128; ch < 256; ch++) {
UniChar uch;
__CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
}
} else { for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
}
}
}
__private_extern__ void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
CFIndex idx;
for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
}
#define kCFCharConversionBufferLength 512
#define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
#define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
#if defined(__BIG_ENDIAN__)
#define SHOULD_SWAP(BOM) (BOM == 0xFFFE)
#else
#define SHOULD_SWAP(BOM) (BOM != 0xFEFF)
#endif
Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
}
enum {
__NSNonLossyErrorMode = -1,
__NSNonLossyASCIIMode = 0,
__NSNonLossyBackslashMode = 1,
__NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
__NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
__NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
__NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
};
Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
UInt32 idx;
const UniChar *uniChars = (const UniChar *)bytes;
const uint8_t *chars = (const uint8_t *)bytes;
const uint8_t *end = chars + len;
uint16_t bom;
Boolean allASCII = false;
if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
buffer->isASCII = !alwaysUnicode;
buffer->shouldFreeChars = false;
buffer->numChars = 0;
if (0 == len) return true;
buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
switch (encoding) {
case kCFStringEncodingUnicode:
bom = (*uniChars == 0xfffe || *uniChars == 0xfeff) ? (*uniChars++) : 0;
len = len / 2 - (0 == bom ? 0 : 1);
if (buffer->isASCII) { if (SHOULD_SWAP(bom)) {
for (idx = 0; idx < len; idx++) if ((uniChars[idx] & 0x80ff) != 0) {buffer->isASCII = false; break;}
} else {
for (idx = 0; idx < len; idx++) if (uniChars[idx] > 127) {buffer->isASCII = false; break;}
}
}
if (buffer->isASCII) {
buffer->numChars = len;
buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
if (SHOULD_SWAP(bom)) { for (idx = 0; idx < len; idx++) buffer->chars.ascii[idx] = (uniChars[idx] >> 8);
} else {
for (idx = 0; idx < len; idx++) buffer->chars.ascii[idx] = uniChars[idx];
}
} else {
buffer->numChars = len;
if (useClientsMemoryPtr && (bom == 0) && !SHOULD_SWAP(bom)) { *useClientsMemoryPtr = true;
buffer->shouldFreeChars = false;
buffer->chars.unicode = (UniChar *)bytes;
} else {
buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
if (SHOULD_SWAP(bom)) {
for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = CFSwapInt16(uniChars[idx]);
} else {
memmove(buffer->chars.unicode, uniChars, len * sizeof(UniChar));
}
}
}
return true;
case kCFStringEncodingNonLossyASCII: {
UTF16Char currentValue = 0;
uint8_t character;
int8_t mode = __NSNonLossyASCIIMode;
buffer->isASCII = false;
buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
buffer->numChars = 0;
while (chars < end) {
character = (*chars++);
switch (mode) {
case __NSNonLossyASCIIMode:
if (character == '\\') {
mode = __NSNonLossyBackslashMode;
} else if (character < 0x80) {
currentValue = character;
} else {
mode = __NSNonLossyErrorMode;
}
break;
case __NSNonLossyBackslashMode:
if ((character == 'U') || (character == 'u')) {
mode = __NSNonLossyHexInitialMode;
currentValue = 0;
} else if ((character >= '0') && (character <= '9')) {
mode = __NSNonLossyOctalInitialMode;
currentValue = character - '0';
} else if (character == '\\') {
mode = __NSNonLossyASCIIMode;
currentValue = character;
} else {
mode = __NSNonLossyErrorMode;
}
break;
default:
if (mode < __NSNonLossyHexFinalMode) {
if ((character >= '0') && (character <= '9')) {
currentValue = (currentValue << 4) | (character - '0');
if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
} else {
if (character >= 'a') character -= ('a' - 'A');
if ((character >= 'A') && (character <= 'F')) {
currentValue = (currentValue << 4) | ((character - 'A') + 10);
if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
} else {
mode = __NSNonLossyErrorMode;
}
}
} else {
if ((character >= '0') && (character <= '9')) {
currentValue = (currentValue << 3) | (character - '0');
if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
} else {
mode = __NSNonLossyErrorMode;
}
}
break;
}
if (mode == __NSNonLossyASCIIMode) {
buffer->chars.unicode[buffer->numChars++] = currentValue;
} else if (mode == __NSNonLossyErrorMode) {
return false;
}
}
return (mode == __NSNonLossyASCIIMode);
}
case kCFStringEncodingUTF8:
if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { chars += 3;
len -= 3;
if (0 == len) return true;
}
allASCII = !alwaysUnicode;
if (allASCII) {
for (idx = 0; idx < len; idx++) {
if (128 <= chars[idx]) {
allASCII = false;
break;
}
}
}
buffer->isASCII = allASCII;
if (allASCII) {
buffer->numChars = len;
buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
} else {
UInt32 numDone;
static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
if (!__CFFromUTF8) {
const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
__CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
}
buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
buffer->numChars = 0;
while (chars < end) {
numDone = 0;
chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
if (0 == numDone) {
if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
buffer->isASCII = !alwaysUnicode;
buffer->shouldFreeChars = false;
buffer->chars.ascii = NULL;
buffer->numChars = 0;
return false;
}
buffer->numChars += numDone;
}
}
return true;
default:
if (CFStringEncodingIsValidEncoding(encoding)) {
const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
if (!converter) return false;
if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
allASCII = !alwaysUnicode && isASCIISuperset;
if (allASCII) {
for (idx = 0; idx < len; idx++) {
if (128 <= chars[idx]) {
allASCII = false;
break;
}
}
}
buffer->isASCII = allASCII;
if (allASCII) {
buffer->numChars = len;
buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
} else {
buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
buffer->numChars = len;
if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
} else {
for (idx = 0; idx < len; idx++)
if (chars[idx] < 0x80 && isASCIISuperset)
buffer->chars.unicode[idx] = (UniChar)chars[idx];
else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx))
return false;
}
}
return true;
} else {
allASCII = !alwaysUnicode && isASCIISuperset;
if (allASCII) {
for (idx = 0; idx < len; idx++)
if (128 <= chars[idx]) {
allASCII = false;
break;
}
}
buffer->isASCII = allASCII;
if (allASCII) {
buffer->numChars = len;
buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
} else {
UInt32 guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
static UInt32 lossyFlag = (UInt32)-1;
buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
if (lossyFlag == (UInt32)-1) lossyFlag = (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? 0 : kCFStringEncodingAllowLossyConversion);
if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) {
if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
buffer->isASCII = !alwaysUnicode;
buffer->shouldFreeChars = false;
buffer->chars.ascii = NULL;
buffer->numChars = 0;
return false;
}
}
return true;
}
} else {
return false;
}
}
}
CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
CFIndex totalBytesWritten = 0;
CFIndex numCharsProcessed = 0;
const UniChar *unichars;
if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
if (!__CFToUTF8) {
const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
__CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
}
numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
} else if (encoding == kCFStringEncodingNonLossyASCII) {
const char *hex = "0123456789abcdef";
UniChar ch;
CFStringInlineBuffer buf;
CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
while (numCharsProcessed < rangeLen) {
CFIndex reqLength;
CFIndex cnt;
char tmp[6];
ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
reqLength = 1;
tmp[0] = ch;
} else {
if (ch == '\\') {
tmp[1] = '\\';
reqLength = 2;
} else if (ch < 256) {
tmp[1] = '0' + (ch >> 6);
tmp[2] = '0' + ((ch >> 3) & 7);
tmp[3] = '0' + (ch & 7);
reqLength = 4;
} else {
tmp[1] = 'u'; tmp[2] = hex[(ch >> 12) & 0x0f];
tmp[3] = hex[(ch >> 8) & 0x0f];
tmp[4] = hex[(ch >> 4) & 0x0f];
tmp[5] = hex[ch & 0x0f];
reqLength = 6;
}
tmp[0] = '\\';
}
if (buffer) {
if (totalBytesWritten + reqLength > max) break;
for (cnt = 0; cnt < reqLength; cnt++) {
buffer[totalBytesWritten + cnt] = tmp[cnt];
}
}
totalBytesWritten += reqLength;
numCharsProcessed++;
}
} else if (encoding == kCFStringEncodingUnicode) {
CFIndex extraForBOM = generatingExternalFile ? sizeof(UniChar) : 0;
numCharsProcessed = rangeLen;
if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
}
totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
if (buffer) {
if (generatingExternalFile) {
#if defined(__BIG_ENDIAN__)
*buffer++ = 0xfe; *buffer++ = 0xff;
#else
*buffer++ = 0xff; *buffer++ = 0xfe;
#endif
}
CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
}
} else {
CFIndex numChars;
UInt32 flags;
const unsigned char *cString = NULL;
if (!CF_IS_OBJC(CFStringGetTypeID(), string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { const unsigned char *ptr;
if ((cString = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
ptr = (cString += rangeLoc);
if (__CFStringGetEightBitStringEncoding() == encoding) {
numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
if (buffer) memmove(buffer, cString, numCharsProcessed);
if (usedBufLen) *usedBufLen = numCharsProcessed;
return numCharsProcessed;
}
while (*ptr < 0x80 && rangeLen > 0) {
++ptr;
--rangeLen;
}
numCharsProcessed = ptr - cString;
if (buffer) {
numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
memmove(buffer, cString, numCharsProcessed);
buffer += numCharsProcessed;
max -= numCharsProcessed;
}
if (!rangeLen || (buffer && (max == 0))) {
if (usedBufLen) *usedBufLen = numCharsProcessed;
return numCharsProcessed;
}
rangeLoc += numCharsProcessed;
totalBytesWritten += numCharsProcessed;
}
if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
ptr = (cString += (rangeLoc + 1));
if (__CFStringGetEightBitStringEncoding() == encoding) {
numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
if (buffer) memmove(buffer, cString, numCharsProcessed);
if (usedBufLen) *usedBufLen = numCharsProcessed;
return numCharsProcessed;
}
while (*ptr < 0x80 && rangeLen > 0) {
++ptr;
--rangeLen;
}
numCharsProcessed = ptr - cString;
if (buffer) {
numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
memmove(buffer, cString, numCharsProcessed);
buffer += numCharsProcessed;
max -= numCharsProcessed;
}
if (!rangeLen || (buffer && (max == 0))) {
if (usedBufLen) *usedBufLen = numCharsProcessed;
return numCharsProcessed;
}
rangeLoc += numCharsProcessed;
totalBytesWritten += numCharsProcessed;
}
}
if (!buffer) max = 0;
flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | __CFGetASCIICompatibleFlag();
if (!cString && (cString = (const char*)CFStringGetCharactersPtr(string))) { if (CFStringEncodingIsValidEncoding(encoding)) { CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar*)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
} else {
return 0;
}
} else {
UniChar charBuf[kCFCharConversionBufferLength];
UInt32 currentLength;
UInt32 usedLen;
uint32_t lastUsedLen = 0, lastNumChars = 0;
uint32_t result;
Boolean isCFBuiltin = CFStringEncodingIsValidEncoding(encoding);
#define MAX_DECOMP_LEN (6)
while (rangeLen > 0) {
currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
if (isCFBuiltin) { if ((result = CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, currentLength, &numChars, buffer, max, &usedLen)) != kCFStringEncodingConversionSuccess) {
if (kCFStringEncodingInvalidInputStream == result) {
CFRange composedRange;
if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
result = CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
}
}
if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
CFStringGetCharacters(string, composedRange, charBuf);
if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { CFIndex lastRangeLoc = rangeLoc - lastNumChars;
currentLength = composedRange.location - lastRangeLoc;
CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
if ((result = CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen)) == kCFStringEncodingConversionSuccess) { totalBytesWritten -= lastUsedLen;
numCharsProcessed -= lastNumChars;
rangeLoc = lastRangeLoc;
rangeLen += lastNumChars;
if (max) {
buffer -= lastUsedLen;
max += lastUsedLen;
}
}
}
}
}
}
if (kCFStringEncodingConversionSuccess != result) { totalBytesWritten += usedLen;
numCharsProcessed += numChars;
break;
}
}
} else {
return 0;
}
totalBytesWritten += usedLen;
numCharsProcessed += numChars;
rangeLoc += numChars;
rangeLen -= numChars;
if (max) {
buffer += usedLen;
max -= usedLen;
if (max <= 0) break;
}
lastUsedLen = usedLen; lastNumChars = numChars;
flags &= ~kCFStringEncodingPrependBOM;
}
}
}
if (usedBufLen) *usedBufLen = totalBytesWritten;
return numCharsProcessed;
}
#define MAX_STACK_BUFFER_LEN (255)
CF_EXPORT Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
#if defined(__MACH__)
const UTF16Char *characters = CFStringGetCharactersPtr(string);
uint32_t usedBufLen;
if (NULL == characters) {
CFIndex length = CFStringGetLength(string);
if (length > MAX_STACK_BUFFER_LEN) {
UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
CFRange range = CFRangeMake(0, MAX_STACK_BUFFER_LEN);
uint32_t localUsedBufLen;
usedBufLen = 0;
while (length > 0) {
CFStringGetCharacters(string, range, charactersBuffer);
if (CFUniCharIsSurrogateHighCharacter(charactersBuffer[range.length - 1])) --range.length;
if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, maxBufLen - usedBufLen, &localUsedBufLen, true, kCFUniCharUTF8Format, true)) return false;
buffer += localUsedBufLen;
usedBufLen += localUsedBufLen;
length -= range.length;
range.location += range.length;
range.length = (length < MAX_STACK_BUFFER_LEN ? length : MAX_STACK_BUFFER_LEN);
}
} else {
UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
CFStringGetCharacters(string, CFRangeMake(0, length), charactersBuffer);
if (!CFUniCharDecompose(charactersBuffer, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
buffer += usedBufLen;
}
} else {
if (!CFUniCharDecompose(characters, CFStringGetLength(string), NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
buffer += usedBufLen;
}
if (usedBufLen < (uint32_t)maxBufLen) { *buffer = '\0';
return true;
} else {
return false;
}
#else __MACH__
return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
#endif __MACH__
}