#include <CoreFoundation/CFXMLParser.h>
#include <CoreFoundation/CFNumber.h>
#include "CFXMLInputStream.h"
#include "CFUniChar.h"
#include "CFInternal.h"
struct __CFXMLParser {
CFRuntimeBase _cfBase;
_CFXMLInputStream input;
void **stack;
void **top;
UInt32 capacity;
struct __CFXMLNode *node; CFMutableDictionaryRef argDict;
CFMutableArrayRef argArray;
UInt32 options;
CFXMLParserCallBacks callBacks;
CFXMLParserContext context;
CFXMLParserStatusCode status;
CFStringRef errorString;
};
static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) {
const struct __CFXMLParser *parser = (const struct __CFXMLParser *)cf;
return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser %p>"), parser);
}
static void __CFXMLParserDeallocate(CFTypeRef cf) {
struct __CFXMLParser *parser = (struct __CFXMLParser *)cf;
CFAllocatorRef alloc = CFGetAllocator(parser);
_freeInputStream(&(parser->input));
if (parser->argDict) CFRelease(parser->argDict);
if (parser->argArray) CFRelease(parser->argArray);
if (parser->errorString) CFRelease(parser->errorString);
if (parser->node) CFRelease(parser->node);
CFAllocatorDeallocate(alloc, parser->stack);
if (parser->context.info && parser->context.release) {
parser->context.release(parser->context.info);
}
}
static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID;
static const CFRuntimeClass __CFXMLParserClass = {
0,
"CFXMLParser",
NULL, NULL, __CFXMLParserDeallocate,
NULL,
NULL,
NULL, __CFXMLParserCopyDescription
};
static void __CFXMLParserInitialize(void) {
__kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass);
}
CFTypeID CFXMLParserGetTypeID(void) {
if (_kCFRuntimeNotATypeID == __kCFXMLParserTypeID) __CFXMLParserInitialize();
return __kCFXMLParserTypeID;
}
void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) {
CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__);
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
if (context) {
context->version = parser->context.version;
context->info = parser->context.info;
context->retain = parser->context.retain;
context->release = parser->context.release;
context->copyDescription = parser->context.copyDescription;
UNFAULT_CALLBACK(context->retain);
UNFAULT_CALLBACK(context->release);
UNFAULT_CALLBACK(context->copyDescription);
}
}
void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) {
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
if (callBacks) {
callBacks->version = parser->callBacks.version;
callBacks->createXMLStructure = parser->callBacks.createXMLStructure;
callBacks->addChild = parser->callBacks.addChild;
callBacks->endXMLStructure = parser->callBacks.endXMLStructure;
callBacks->resolveExternalEntity = parser->callBacks.resolveExternalEntity;
callBacks->handleError = parser->callBacks.handleError;
UNFAULT_CALLBACK(callBacks->createXMLStructure);
UNFAULT_CALLBACK(callBacks->addChild);
UNFAULT_CALLBACK(callBacks->endXMLStructure);
UNFAULT_CALLBACK(callBacks->resolveExternalEntity);
UNFAULT_CALLBACK(callBacks->handleError);
}
}
CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) {
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
return parser->input.url;
}
CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) {
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
return _inputStreamCurrentLocation(&parser->input);
}
CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) {
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
return _inputStreamCurrentLine(&parser->input);
}
void *CFXMLParserGetDocument(CFXMLParserRef parser) {
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
if (parser->capacity > 0)
return parser->stack[0];
else
return NULL;
}
CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) {
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
return parser->status;
}
CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) {
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
return (CFStringRef)CFRetain(parser->errorString);
}
void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) {
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__);
CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__);
__CFGenericValidateType(errorDescription, CFStringGetTypeID());
parser->status = errorCode;
if (parser->errorString) CFRelease(parser->errorString);
parser->errorString = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, errorDescription);
}
static Boolean parseXML(CFXMLParserRef parser);
static Boolean parseComment(CFXMLParserRef parser, Boolean report);
static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report);
static Boolean parseInlineDTD(CFXMLParserRef parser);
static Boolean parseDTD(CFXMLParserRef parser);
static Boolean parsePhysicalEntityReference(CFXMLParserRef parser);
static Boolean parseCDSect(CFXMLParserRef parser);
static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report);
static Boolean parsePCData(CFXMLParserRef parser);
static Boolean parseWhitespace(CFXMLParserRef parser);
static Boolean parseAttributeListDeclaration(CFXMLParserRef parser);
static Boolean parseNotationDeclaration(CFXMLParserRef parser);
static Boolean parseElementDeclaration(CFXMLParserRef parser);
static Boolean parseEntityDeclaration(CFXMLParserRef parser);
static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID);
static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag);
static Boolean parseTagContent(CFXMLParserRef parser);
static Boolean parseTag(CFXMLParserRef parser);
static Boolean parseAttributes(CFXMLParserRef parser);
static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str);
static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str);
static Boolean reportNewLeaf(CFXMLParserRef parser); static void pushXMLNode(CFXMLParserRef parser, void *node);
static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL);
struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL);
UniChar *buf;
if (parser && node) {
alloc = CFGetAllocator(parser);
_initializeInputStream(&(parser->input), alloc, dataSource, xmlData);
parser->top = parser->stack;
parser->stack = NULL;
parser->capacity = 0;
buf = (UniChar *)CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0);
parser->node = node;
parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc);
parser->node->additionalData = NULL;
parser->node->version = version;
parser->argDict = NULL; parser->argArray = NULL;
parser->options = options;
parser->callBacks = *callBacks;
FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure));
FAULT_CALLBACK((void **)&(parser->callBacks.addChild));
FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure));
FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity));
FAULT_CALLBACK((void **)&(parser->callBacks.handleError));
if (context) {
parser->context = *context;
if (parser->context.info && parser->context.retain) {
parser->context.retain(parser->context.info);
}
} else {
parser->context.version = 0;
parser->context.info = NULL;
parser->context.retain = NULL;
parser->context.release = NULL;
parser->context.copyDescription = NULL;
}
parser->status = kCFXMLStatusParseNotBegun;
parser->errorString = NULL;
} else {
if (parser) CFRelease(parser);
if (node) CFRelease(node);
parser = NULL;
}
return parser;
}
CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__);
__CFGenericValidateType(xmlData, CFDataGetTypeID());
CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context);
}
CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
return __CFXMLParserInit(allocator, dataSource, parseOptions, NULL, versionOfNodes, callBacks, context);
}
Boolean CFXMLParserParse(CFXMLParserRef parser) {
CFXMLDocumentInfo docData;
__CFGenericValidateType(parser, CFXMLParserGetTypeID());
if (parser->status != kCFXMLStatusParseNotBegun) return false;
parser->status = kCFXMLStatusParseInProgress;
if (!_openInputStream(&parser->input)) {
if (!parser->input.data) {
parser->status = kCFXMLErrorNoData;
parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url));
} else {
CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened");
parser->status = kCFXMLErrorUnknownEncoding;
parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII);
}
if (parser->callBacks.handleError) {
INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
}
return false;
}
parser->stack = (void **)CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0);
parser->capacity = 16;
parser->node->dataTypeID = kCFXMLNodeTypeDocument;
docData.encoding = _inputStreamGetEncoding(&parser->input);
docData.sourceURL = parser->input.url;
parser->node->additionalData = &docData;
parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
parser->top = parser->stack;
parser->node->additionalData = NULL;
if (parser->status != kCFXMLStatusParseInProgress) {
_CFReportError(parser, parser->status, NULL);
return false;
}
return parseXML(parser);
}
static Boolean parseWhitespace(CFXMLParserRef parser) {
CFIndex len;
Boolean report = !(parser->options & kCFXMLParserSkipWhitespace);
len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL);
if (report && len) {
parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
parser->node->additionalData = NULL;
return reportNewLeaf(parser);
} else {
return true;
}
}
static Boolean parseComment(CFXMLParserRef parser, Boolean report) {
const UniChar dashes[2] = {'-', '-'};
UniChar ch;
report = report && (!(parser->options & kCFXMLParserSkipMetaData));
if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment");
return false;
} else if (ch != '>') {
_CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment");
return false;
} else if (report) {
parser->node->dataTypeID = kCFXMLNodeTypeComment;
parser->node->additionalData = NULL;
return reportNewLeaf(parser);
} else {
return true;
}
}
static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) {
const UniChar piTermination[2] = {'?', '>'};
CFMutableStringRef str;
CFStringRef name;
if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
_CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction");
return false;
}
_inputStreamSkipWhitespace(&parser->input, NULL);
str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL;
if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction");
if (str) CFRelease(str);
return false;
}
if (str) {
CFXMLProcessingInstructionInfo data;
Boolean result;
CFStringRef tmp = parser->node->dataString;
parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction;
parser->node->dataString = name;
data.dataString = str;
parser->node->additionalData = &data;
result = reportNewLeaf(parser);
parser->node->additionalData = NULL;
parser->node->dataString = tmp;
CFRelease(str);
return result;
} else {
return true;
}
}
static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
static Boolean parseDTD(CFXMLParserRef parser) {
UniChar ch;
Boolean success, hasExtID = false;
CFXMLDocumentTypeInfo docData = {{NULL, NULL}};
void *dtdStructure = NULL;
CFStringRef name;
success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7);
success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
success = success && _inputStreamScanXMLName(&parser->input, false, &name);
if (success) {
_inputStreamSkipWhitespace(&parser->input, NULL);
success = _inputStreamPeekCharacter(&parser->input, &ch);
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
return false;
}
if (success && ch != '[' && ch != '>') {
hasExtID = true;
success = parseExternalID(parser, false, &(docData.externalID));
if (success) {
_inputStreamSkipWhitespace(&parser->input, NULL);
success = _inputStreamPeekCharacter(&parser->input, &ch);
}
}
if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
CFStringRef tmp = parser->node->dataString;
parser->node->dataTypeID = kCFXMLNodeTypeDocumentType;
parser->node->dataString = name;
parser->node->additionalData = &docData;
dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) {
INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info);
}
parser->node->additionalData = NULL;
parser->node->dataString = tmp;
if (parser->status != kCFXMLStatusParseInProgress) {
_CFReportError(parser, parser->status, NULL);
return false;
}
} else {
dtdStructure = NULL;
}
if (docData.externalID.publicID) CFRelease(docData.externalID.publicID);
if (docData.externalID.systemID) CFRelease(docData.externalID.systemID);
pushXMLNode(parser, dtdStructure);
if (success && ch == '[') {
_inputStreamGetCharacter(&parser->input, &ch);
if (!parseInlineDTD(parser)) return false;
_inputStreamSkipWhitespace(&parser->input, NULL);
success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
} else if (success && ch == '>') {
_inputStreamGetCharacter(&parser->input, &ch);
}
if (!success) {
if (_inputStreamAtEOF(&parser->input)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD");
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
}
return false;
}
parser->top --;
if (success && dtdStructure) {
INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info);
if (parser->status != kCFXMLStatusParseInProgress) {
_CFReportError(parser, parser->status, NULL);
return false;
}
}
return true;
}
static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) {
UniChar ch;
CFStringRef name;
if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
_CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
return false;
} else if (!_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference");
return false;
} else if (ch != ';') {
_CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
return false;
} else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
CFXMLEntityReferenceInfo myData;
Boolean result;
CFStringRef tmp = parser->node->dataString;
parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
parser->node->dataString = name;
myData.entityType = kCFXMLEntityTypeParameter;
parser->node->additionalData = &myData;
result = reportNewLeaf(parser);
parser->node->additionalData = NULL;
parser->node->dataString = tmp;
return result;
} else {
return true;
}
}
static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) {
UniChar ch;
Boolean done = false;
if (!_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
return false;
} else if (ch != '(') {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
return false;
}
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
return false;
}
while (!done) {
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
return false;
} else if (ch == ')') {
done = true;
} else if (ch == '|') {
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
return false;
}
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
return false;
}
}
return true;
}
static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) {
Boolean success = false;
static const UniChar attTypeStrings[6][8] = {
{'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
{'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
{'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
{'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
{'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
{'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
if (str) _inputStreamSetMark(&parser->input);
if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) ||
_inputStreamMatchString(&parser->input, attTypeStrings[1], 6) ||
_inputStreamMatchString(&parser->input, attTypeStrings[1], 5) ||
_inputStreamMatchString(&parser->input, attTypeStrings[1], 2) ||
_inputStreamMatchString(&parser->input, attTypeStrings[2], 6) ||
_inputStreamMatchString(&parser->input, attTypeStrings[3], 8) ||
_inputStreamMatchString(&parser->input, attTypeStrings[4], 8) ||
_inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) {
success = true;
} else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) {
if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
success = false;
} else {
success = parseEnumeration(parser, false);
}
} else {
success = parseEnumeration(parser, true);
}
if (str) {
if (success) {
_inputStreamGetCharactersFromMark(&parser->input, str);
}
_inputStreamClearMark(&parser->input);
}
return success;
}
static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) {
const UniChar strings[3][8] = {
{'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
{'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
{'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
UniChar ch;
Boolean success;
if (str) _inputStreamSetMark(&parser->input);
if (!_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
success = false;
} else if (ch == '#') {
if (_inputStreamMatchString(&parser->input, strings[0], 8) ||
_inputStreamMatchString(&parser->input, strings[1], 7)) {
success = true;
} else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
success = false;
} else {
success = parseAttributeValue(parser, NULL);
}
} else {
_inputStreamReturnCharacter(&parser->input, ch);
success = parseAttributeValue(parser, NULL);
}
if (str) {
if (success) {
_inputStreamGetCharactersFromMark(&parser->input, str);
}
_inputStreamClearMark(&parser->input);
}
return success;
}
static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) {
const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
CFXMLAttributeListDeclarationInfo attListData;
CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray;
CFIndex capacity = 8;
UniChar ch;
Boolean success = true;
CFStringRef name;
if (!_inputStreamMatchString(&parser->input, attList, 7) ||
_inputStreamSkipWhitespace(&parser->input, NULL) == 0 ||
!_inputStreamScanXMLName(&parser->input, false, &name)) {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
return false;
}
attListData.numberOfAttributes = 0;
if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) {
attributes = NULL;
}
while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
CFXMLAttributeDeclarationInfo *attribute = NULL;
if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>')
break;
if (attributes) {
if (capacity == attListData.numberOfAttributes) {
capacity = 2*capacity;
if (attributes != attributeArray) {
attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
} else {
attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
}
}
attribute = &(attributes[attListData.numberOfAttributes]);
attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
}
if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
success = false;
break;
}
if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) {
success = false;
break;
}
if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
success = false;
break;
}
if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) {
success = false;
break;
}
attListData.numberOfAttributes ++;
}
if (success) {
if (!_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
success = false;
} else if (ch != '>') {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
success = false;
} else if (attributes) {
CFStringRef tmp = parser->node->dataString;
parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration;
parser->node->dataString = name;
attListData.attributes = attributes;
parser->node->additionalData = (void *)&attListData;
success = reportNewLeaf(parser);
parser->node->additionalData = NULL;
parser->node->dataString = tmp;
}
}
if (attributes) {
CFIndex idx;
for (idx = 0; idx < attListData.numberOfAttributes; idx ++) {
CFRelease(attributes[idx].typeString);
CFRelease(attributes[idx].defaultString);
}
if (attributes != attributeArray) {
CFAllocatorDeallocate(CFGetAllocator(parser), attributes);
}
}
return success;
}
CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) {
Boolean success;
if (extID) {
CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
if (_inputStreamScanQuotedString(&parser->input, urlStr)) {
success = true;
extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url);
} else {
extID->systemID = NULL;
success = false;
}
CFRelease(urlStr);
} else {
success = _inputStreamScanQuotedString(&parser->input, NULL);
}
return success;
}
static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) {
const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
Boolean success;
if (extID) {
extID->systemID = NULL;
extID->publicID = NULL;
}
if (_inputStreamMatchString(&parser->input, publicString, 6)) {
success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
if (extID) {
extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID);
} else {
success = success && _inputStreamScanQuotedString(&parser->input, NULL);
}
if (success) {
UniChar ch;
if (alsoAcceptPublicID) {
_inputStreamSetMark(&parser->input); }
if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0
|| !_inputStreamPeekCharacter(&parser->input, &ch)
|| (ch != '\'' && ch != '\"')
|| !parseSystemLiteral(parser, extID)) {
success = alsoAcceptPublicID;
if (alsoAcceptPublicID) {
_inputStreamBackUpToMark(&parser->input);
}
} else {
success = true;
}
if (alsoAcceptPublicID) {
_inputStreamClearMark(&parser->input);
}
}
} else if (_inputStreamMatchString(&parser->input, systemString, 6)) {
success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID);
} else {
success = false;
}
return success;
}
static Boolean parseNotationDeclaration(CFXMLParserRef parser) {
static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
CFXMLNotationInfo notationData = {{NULL, NULL}};
CFStringRef name;
Boolean success =
_inputStreamMatchString(&parser->input, notationString, 8) &&
_inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) &&
_inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
parseExternalID(parser, true, report ? &(notationData.externalID) : NULL);
if (success) {
UniChar ch;
_inputStreamSkipWhitespace(&parser->input, NULL);
success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>');
}
if (!success) {
if (_inputStreamAtEOF(&parser->input)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
}
} else if (report) {
CFStringRef tmp = parser->node->dataString;
parser->node->dataTypeID = kCFXMLNodeTypeNotation;
parser->node->dataString = name;
parser->node->additionalData = ¬ationData;
success = reportNewLeaf(parser);
parser->node->additionalData = NULL;
parser->node->dataString = tmp;
}
if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID);
if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID);
return success;
}
static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) {
UniChar ch, separator;
if (!pastParen) {
if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false;
_inputStreamSkipWhitespace(&parser->input, NULL);
}
if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
if (ch == '(') {
if (!parseChoiceOrSequence(parser, false)) return false;
} else {
if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
}
if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch);
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
if (ch == ')') return true;
if (ch != '|' && ch != ',') return false;
separator = ch;
while (ch == separator) {
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
if (ch != '(') {
if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
} else if (!parseChoiceOrSequence(parser, false)) {
return false;
}
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
}
return ch == ')';
}
static Boolean parseMixedElementContent(CFXMLParserRef parser) {
static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
UniChar ch;
if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false;
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false;
if (ch == ')') return true;
while (ch == '|') {
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
}
if (ch != ')') return false;
if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false;
return true;
}
static Boolean parseElementContentSpec(CFXMLParserRef parser) {
static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'};
static const UniChar eltContentAny[3] = {'A', 'N', 'Y'};
UniChar ch;
if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) {
return true;
} else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') {
return false;
} else {
_inputStreamGetCharacter(&parser->input, &ch);
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
if (ch == '#') {
return parseMixedElementContent(parser);
} else {
if (parseChoiceOrSequence(parser, true)) {
if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) {
_inputStreamGetCharacter(&parser->input, &ch);
}
return true;
} else {
return false;
}
}
}
}
static Boolean parseElementDeclaration(CFXMLParserRef parser) {
Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
Boolean success;
static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
UniChar ch = '>';
CFMutableStringRef contentDesc = NULL;
CFStringRef name;
success = _inputStreamMatchString(&parser->input, eltChars, 7)
&& _inputStreamSkipWhitespace(&parser->input, NULL) != 0
&& _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL)
&& _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
if (success) {
if (report) _inputStreamSetMark(&parser->input);
success = parseElementContentSpec(parser);
if (success && report) {
contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
_inputStreamGetCharactersFromMark(&parser->input, contentDesc);
}
if (report) _inputStreamClearMark(&parser->input);
if (success) _inputStreamSkipWhitespace(&parser->input, NULL);
success = success && _inputStreamMatchString(&parser->input, &ch, 1);
}
if (!success) {
if (_inputStreamAtEOF(&parser->input)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
}
} else if (report) {
CFXMLElementTypeDeclarationInfo eltData;
CFStringRef tmp = parser->node->dataString;
parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration;
parser->node->dataString = name;
eltData.contentDescription = contentDesc;
parser->node->additionalData = &eltData;
success = reportNewLeaf(parser);
parser->node->additionalData = NULL;
parser->node->dataString = tmp;
}
if (contentDesc) CFRelease(contentDesc);
return success;
}
static Boolean parseEntityDeclaration(CFXMLParserRef parser) {
const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
UniChar ch;
Boolean isPEDecl = false;
CFXMLEntityInfo entityData;
CFStringRef name;
Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
Boolean success =
_inputStreamMatchString(&parser->input, entityStr, 6) &&
(_inputStreamSkipWhitespace(&parser->input, NULL) != 0) &&
_inputStreamPeekCharacter(&parser->input, &ch);
entityData.replacementText = NULL;
entityData.entityID.publicID = NULL;
entityData.entityID.systemID = NULL;
entityData.notationName = NULL;
if (success && ch == '%') {
_inputStreamGetCharacter(&parser->input, &ch);
success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
isPEDecl = true;
}
success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch);
if (success && (ch == '\"' || ch == '\'')) {
if (report) {
entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText);
} else {
success = _inputStreamScanQuotedString(&parser->input, NULL);
}
} else if (success) {
success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL);
if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'};
if (_inputStreamMatchString(&parser->input, nDataStr, 5)) {
success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL);
}
}
}
if (success) {
_inputStreamSkipWhitespace(&parser->input, NULL);
success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
}
if (!success) {
if (_inputStreamAtEOF(&parser->input)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
}
} else {
CFStringRef tmp = parser->node->dataString;
if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter;
else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal;
else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal;
else entityData.entityType = kCFXMLEntityTypeUnparsed;
parser->node->dataTypeID = kCFXMLNodeTypeEntity;
parser->node->dataString = name;
parser->node->additionalData = &entityData;
success = reportNewLeaf(parser);
parser->node->additionalData = NULL;
parser->node->dataString = tmp;
if (entityData.replacementText) CFRelease(entityData.replacementText);
}
if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID);
if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID);
return success;
}
static Boolean parseInlineDTD(CFXMLParserRef parser) {
Boolean success = true;
while (success && !_inputStreamAtEOF(&parser->input)) {
UniChar ch;
parseWhitespace(parser);
if (!_inputStreamGetCharacter(&parser->input, &ch)) break;
if (ch == '%') {
success = parsePhysicalEntityReference(parser);
} else if (ch == '<') {
if (!_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
return false;
}
if (ch == '?') {
success = parseProcessingInstruction(parser, true); } else if (ch == '!') {
UniChar dashes[2] = {'-', '-'};
if (_inputStreamMatchString(&parser->input, dashes, 2)) {
success = parseComment(parser, true);
} else {
if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
return false;
} else if (ch == 'A') {
success = parseAttributeListDeclaration(parser);
} else if (ch == 'N') {
success = parseNotationDeclaration(parser);
} else if (ch == 'E') {
_inputStreamGetCharacter(&parser->input, &ch);
if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
return false;
}
_inputStreamReturnCharacter(&parser->input, 'E');
if (ch == 'L') {
success = parseElementDeclaration(parser);
} else if (ch == 'N') {
success = parseEntityDeclaration(parser);
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
return false;
}
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
return false;
}
}
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
return false;
}
} else if (ch == ']') {
return true;
} else {
_CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
return false;
}
}
if (success) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
}
return false;
}
static Boolean parseTagContent(CFXMLParserRef parser) {
while (!_inputStreamAtEOF(&parser->input)) {
UniChar ch;
CFIndex numWhitespaceCharacters;
_inputStreamSetMark(&parser->input);
numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamGetCharacter(&parser->input, &ch)) break;
if (ch != '<' && ch != '&') { _inputStreamBackUpToMark(&parser->input);
_inputStreamClearMark(&parser->input);
if (!parsePCData(parser)) return false;
if(_inputStreamComposingErrorOccurred(&parser->input)) {
_CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
return false;
}
continue;
}
if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) {
_inputStreamReturnCharacter(&parser->input, ch);
_inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
parser->node->additionalData = NULL;
if (!reportNewLeaf(parser)) return false;
_inputStreamGetCharacter(&parser->input, &ch);
}
_inputStreamClearMark(&parser->input);
if (ch == '&') {
if (!parseEntityReference(parser, true)) return false;
continue;
}
if (!_inputStreamPeekCharacter(&parser->input, &ch)) break;
if (ch == '?') { _inputStreamGetCharacter(&parser->input, &ch);
if (!parseProcessingInstruction(parser, true))
return false;
} else if (ch == '/') { _inputStreamReturnCharacter(&parser->input, '<'); return true;
} else if (ch != '!') { if (!parseTag(parser)) return false;
} else {
UniChar dashes[3] = {'!', '-', '-'};
if (_inputStreamMatchString(&parser->input, dashes, 3)) {
if (!parseComment(parser, true)) return false;
} else {
_inputStreamReturnCharacter(&parser->input, '<');
if (!parseCDSect(parser)) return false;
}
}
}
if(_inputStreamComposingErrorOccurred(&parser->input)) {
_CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
return false;
}
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content");
return false;
}
static Boolean parseCDSect(CFXMLParserRef parser) {
const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
const UniChar _CDSectClose[3] = {']', ']', '>'};
if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) {
_CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section");
return false;
}
if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section");
return false;
}
parser->node->dataTypeID = kCFXMLNodeTypeCDATASection;
parser->node->additionalData = NULL;
return reportNewLeaf(parser);
}
static Boolean validateCharacterReference(CFStringRef str) {
Boolean isHex;
CFIndex idx, len = CFStringGetLength(str);
if (len < 2) return false;
if (CFStringGetCharacterAtIndex(str, 0) != '#') return false;
if (CFStringGetCharacterAtIndex(str, 1) == 'x') {
isHex = true;
idx = 2;
if (len == 2) return false;
} else {
isHex = false;
idx = 1;
}
while (idx < len) {
UniChar ch;
ch = CFStringGetCharacterAtIndex(str, idx);
idx ++;
if (!(ch <= '9' && ch >= '0') &&
!(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) {
break;
}
}
return (idx == len);
}
static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) {
UniChar ch;
CFXMLEntityReferenceInfo entData;
CFStringRef name = NULL;
if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
return false;
}
if (ch == '#') {
ch = ';';
if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
return false;
} else if (!validateCharacterReference(parser->node->dataString)) {
_CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference");
return false;
}
entData.entityType = kCFXMLEntityTypeCharacter;
name = parser->node->dataString;
} else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') {
if (_inputStreamAtEOF(&parser->input)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
return false;
} else {
_CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference");
return false;
}
} else {
entData.entityType = kCFXMLEntityTypeParsedInternal;
}
if (report) {
CFStringRef tmp = parser->node->dataString;
Boolean success;
parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
parser->node->dataString = name;
parser->node->additionalData = &entData;
success = reportNewLeaf(parser);
parser->node->additionalData = NULL;
parser->node->dataString = tmp;
return success;
} else {
return true;
}
}
#if 0
{
switch (*(parser->curr)) {
case 'l': if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
ch = '<';
parser->curr += 3;
break;
}
parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
return;
case 'g': if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
ch = '>';
parser->curr += 3;
break;
}
parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
return;
case 'a': if (len < 4) { parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
return;
}
if (*(parser->curr+1) == 'm') {
if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') {
ch = '&';
parser->curr += 4;
break;
}
} else if (*(parser->curr+1) == 'p') {
if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') {
ch = '\'';
parser->curr += 5;
break;
}
}
parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
return;
case 'q': if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') {
ch = '\"';
parser->curr += 6;
break;
}
parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
return;
case '#':
{
UniChar num = 0;
Boolean isHex = false;
if ( len < 4) { parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
return;
}
parser->curr ++;
if (*(parser->curr) == 'x') {
isHex = true;
parser->curr ++;
}
while (parser->curr < parser->end) {
ch = *(parser->curr);
if (ch == ';') {
CFStringAppendCharacters(string, &num, 1);
parser->curr ++;
return;
}
if (!isHex) num = num*10;
else num = num << 4;
if (ch <= '9' && ch >= '0') {
num += (ch - '0');
} else if (!isHex) {
parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
return;
} else if (ch >= 'a' && ch <= 'f') {
num += 10 + (ch - 'a');
} else if (ch >= 'A' && ch <= 'F') {
num += 10 + (ch - 'A');
} else {
parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
return;
}
}
parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
return;
}
default:
parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
return;
}
CFStringAppendCharacters(string, &ch, 1);
}
#endif
static Boolean parsePCData(CFXMLParserRef parser) {
UniChar ch;
Boolean done = false;
_inputStreamSetMark(&parser->input);
while (!done && _inputStreamGetCharacter(&parser->input, &ch)) {
switch (ch) {
case '<':
case '&':
_inputStreamReturnCharacter(&parser->input, ch);
done = true;
break;
case ']':
{
const UniChar endSequence[2] = {']', '>'};
if (_inputStreamMatchString(&parser->input, endSequence, 2)) {
_CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data");
_inputStreamClearMark(&parser->input);
return false;
}
break;
}
default:
;
}
}
_inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
_inputStreamClearMark(&parser->input);
parser->node->dataTypeID = kCFXMLNodeTypeText;
parser->node->additionalData = NULL;
return reportNewLeaf(parser);
}
static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) {
const UniChar beginEndTag[2] = {'<', '/'};
Boolean unexpectedEOF = false, mismatch = false;
CFStringRef closeTag;
if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) {
UniChar ch;
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamGetCharacter(&parser->input, &ch)) {
unexpectedEOF = true;
} else if (ch != '>') {
mismatch = true;
}
} else if (_inputStreamAtEOF(&parser->input)) {
unexpectedEOF = true;
} else {
mismatch = true;
}
if (unexpectedEOF || mismatch) {
if (unexpectedEOF) {
parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag);
parser->status = kCFXMLErrorUnexpectedEOF;
if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info);
} else {
parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag);
parser->status = kCFXMLErrorMalformedCloseTag;
if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info);
}
return false;
}
return true;
}
static Boolean parseTag(CFXMLParserRef parser) {
UniChar ch;
void *tag;
CFXMLElementInfo data;
Boolean success = true;
CFStringRef tagName;
if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) {
_CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
return false;
}
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!parseAttributes(parser)) return false; data.attributes = parser->argDict;
data.attributeOrder = parser->argArray;
if (!_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
return false;
}
if (ch == '/') {
data.isEmpty = true;
if (!_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
return false;
}
} else {
data.isEmpty = false;
}
if (ch != '>') {
_CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
return false;
}
if (*parser->top || parser->top == parser->stack) {
CFStringRef oldStr = parser->node->dataString;
parser->node->dataTypeID = kCFXMLNodeTypeElement;
parser->node->dataString = tagName;
parser->node->additionalData = &data;
tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
if (tag && parser->status == kCFXMLStatusParseInProgress) {
INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info);
}
parser->node->additionalData = NULL;
parser->node->dataString = oldStr;
if (parser->status != kCFXMLStatusParseInProgress) {
_CFReportError(parser, parser->status, NULL);
return false;
}
} else {
tag = NULL;
}
pushXMLNode(parser, tag);
if (!data.isEmpty) {
success = parseTagContent(parser);
if (success) {
success = parseCloseTag(parser, tagName);
}
}
parser->top --;
if (success && tag) {
INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info);
if (parser->status != kCFXMLStatusParseInProgress) {
_CFReportError(parser, parser->status, NULL);
return false;
}
}
return success;
}
static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) {
UniChar quote, ch;
Boolean success = _inputStreamGetCharacter(&parser->input, "e);
if (!success || (quote != '\'' && quote != '\"')) return false;
if (str) _inputStreamSetMark(&parser->input);
while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) {
switch (ch) {
case '<': success = false; break;
case '&':
if (!parseEntityReference(parser, false)) {
success = false;
break;
}
default:
;
}
}
if (success && _inputStreamAtEOF(&parser->input)) {
success = false;
}
if (str) {
if (success) {
_inputStreamReturnCharacter(&parser->input, quote);
_inputStreamGetCharactersFromMark(&parser->input, str);
_inputStreamGetCharacter(&parser->input, &ch);
}
_inputStreamClearMark(&parser->input);
}
return success;
}
Boolean parseAttributes(CFXMLParserRef parser) {
UniChar ch;
CFMutableDictionaryRef dict;
CFMutableArrayRef array;
Boolean failure = false;
if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') {
if (parser->argDict) {
CFDictionaryRemoveAllValues(parser->argDict);
CFArrayRemoveAllValues(parser->argArray);
}
return true; }
if (!parser->argDict) {
parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks);
} else {
CFDictionaryRemoveAllValues(parser->argDict);
CFArrayRemoveAllValues(parser->argArray);
}
dict = parser->argDict;
array = parser->argArray;
while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') {
CFStringRef key;
CFMutableStringRef value;
if (!_inputStreamScanXMLName(&parser->input, false, &key)) {
failure = true;
break;
}
if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) {
_CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute");
return false;
}
_inputStreamSkipWhitespace(&parser->input, NULL);
if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') {
failure = true;
break;
}
_inputStreamSkipWhitespace(&parser->input, NULL);
value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
if (!parseAttributeValue(parser, value)) {
CFRelease(value);
failure = true;
break;
}
CFArrayAppendValue(array, key);
CFDictionarySetValue(dict, key, value);
CFRelease(value);
_inputStreamSkipWhitespace(&parser->input, NULL);
}
if (failure) {
_CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag");
return false;
} else if (_inputStreamAtEOF(&parser->input)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes");
return false;
} else {
return true;
}
}
static Boolean parseXML(CFXMLParserRef parser) {
Boolean success = true, sawDTD = false, sawElement = false;
UniChar ch;
while (success && _inputStreamPeekCharacter(&parser->input, &ch)) {
switch (ch) {
case ' ':
case '\n':
case '\t':
case '\r':
success = parseWhitespace(parser);
break;
case '<':
_inputStreamGetCharacter(&parser->input, &ch);
if (!_inputStreamGetCharacter(&parser->input, &ch)) {
_CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document");
return false;
}
if (ch == '!') {
UniChar dashes[2] = {'-', '-'};
if (_inputStreamMatchString(&parser->input, dashes, 2)) {
success = parseComment(parser, true);
} else {
if (sawDTD) {
_CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD");
return false;
}
success = parseDTD(parser);
if (success) sawDTD = true;
}
} else if (ch == '?') {
success = parseProcessingInstruction(parser, true);
} else {
if (sawElement) {
_CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element");
return false;
}
_inputStreamReturnCharacter(&parser->input, ch);
success = parseTag(parser);
if (success) sawElement = true;
}
break;
default: {
parser->status = kCFXMLErrorMalformedDocument;
parser->errorString = ch < 256 ?
CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) :
CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch);
if (parser->callBacks.handleError) {
INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
}
return false;
}
}
}
if (!success) return false;
if (!sawElement) {
_CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document");
return false;
}
return true;
}
static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) {
if (str) {
parser->status = errNum;
parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII);
}
if (parser->callBacks.handleError) {
INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info);
}
}
static Boolean reportNewLeaf(CFXMLParserRef parser) {
void *xmlStruct;
if (*(parser->top) == NULL) return true;
xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) {
INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info);
if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info);
}
if (parser->status != kCFXMLStatusParseInProgress) {
_CFReportError(parser, parser->status, NULL);
return false;
}
return true;
}
static void pushXMLNode(CFXMLParserRef parser, void *node) {
parser->top ++;
if ((unsigned)(parser->top - parser->stack) == parser->capacity) {
parser->stack = (void **)CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0);
parser->top = parser->stack + parser->capacity;
parser->capacity = 2*parser->capacity;
}
*(parser->top) = node;
}
static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) {
CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node);
CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode);
CFRelease(myNode);
return (void *)tree;
}
static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) {
CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child);
}
static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) {
CFXMLTreeRef node = (CFXMLTreeRef)xmlType;
if (CFTreeGetParent(node))
CFRelease((CFXMLTreeRef)xmlType);
}
CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex version) {
CFXMLParserRef parser;
CFXMLParserCallBacks callbacks;
CFXMLTreeRef result;
CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
callbacks.addChild = _XMLTreeAddChild;
callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
callbacks.resolveExternalEntity = NULL;
callbacks.handleError = NULL;
parser = CFXMLParserCreateWithDataFromURL(allocator, dataSource, parseOptions, version, &callbacks, NULL);
if (CFXMLParserParse(parser)) {
result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
} else {
result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
if (result) CFRelease(result);
result = NULL;
}
CFRelease(parser);
return result;
}
CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion) {
return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL);
}
CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription");
CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber");
CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation");
CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode");
CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) {
CFXMLParserRef parser;
CFXMLParserCallBacks callbacks;
CFXMLTreeRef result;
__CFGenericValidateType(xmlData, CFDataGetTypeID());
CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
callbacks.addChild = _XMLTreeAddChild;
callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
callbacks.resolveExternalEntity = NULL;
callbacks.handleError = NULL;
parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL);
if (CFXMLParserParse(parser)) {
result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
} else {
if (errorDict) { *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
if (*errorDict) {
CFIndex rawnum;
CFNumberRef cfnum;
CFStringRef errstring;
rawnum = CFXMLParserGetLocation(parser);
cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
if(cfnum) {
CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum);
CFRelease(cfnum);
}
rawnum = CFXMLParserGetLineNumber(parser);
cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
if(cfnum) {
CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum);
CFRelease(cfnum);
}
rawnum = CFXMLParserGetStatusCode(parser);
cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
if(cfnum) {
CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum);
CFRelease(cfnum);
}
errstring = CFXMLParserCopyErrorDescription(parser);
if(errstring) {
CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring);
CFRelease(errstring);
}
}
}
result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
if (result) CFRelease(result);
result = NULL;
}
CFRelease(parser);
return result;
}
CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator);
CFStringInlineBuffer inlineBuf;
CFIndex idx = 0;
CFIndex mark = idx;
CFIndex stringLength = CFStringGetLength(string);
UniChar uc;
CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\""));
CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength));
for(idx = 0; idx < stringLength; idx++) {
uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx);
if(CFCharacterSetIsCharacterMember(startChars, uc)) {
CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
CFStringAppend(newString, previousSubstring);
CFRelease(previousSubstring);
switch(uc) {
case '&':
CFStringAppend(newString, CFSTR("&"));
break;
case '<':
CFStringAppend(newString, CFSTR("<"));
break;
case '>':
CFStringAppend(newString, CFSTR(">"));
break;
case '\'':
CFStringAppend(newString, CFSTR("'"));
break;
case '"':
CFStringAppend(newString, CFSTR("""));
break;
}
mark = idx + 1;
}
}
CFStringRef remainder = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
if (NULL != remainder) {
CFStringAppend(newString, remainder);
CFRelease(remainder);
}
CFRelease(startChars);
return newString;
}
CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
CFStringInlineBuffer inlineBuf;
CFStringRef sub;
CFIndex lastChunkStart, length = CFStringGetLength(string);
CFIndex i, entityStart;
UniChar uc;
UInt32 entity;
int base;
CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1));
CFMutableStringRef newString = CFStringCreateMutable(allocator, 0);
lastChunkStart = 0;
for(i = 0; i < length; ) {
uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
if(uc == '&') {
entityStart = i - 1;
entity = 0xFFFF; if(lastChunkStart < i - 1) {
sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart));
CFStringAppend(newString, sub);
CFRelease(sub);
}
uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; if(uc == '#') { base = 10;
entity = 0;
uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
if(uc == 'x') { base = 16;
uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
}
while(true) {
if (uc >= '0' && uc <= '9')
entity = entity * base + (uc-'0');
else if (uc >= 'a' && uc <= 'f' && base == 16)
entity = entity * base + (uc-'a'+10);
else if (uc >= 'A' && uc <= 'F' && base == 16)
entity = entity * base + (uc-'A'+10);
else break;
if (i < length) {
uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
}
else
break;
}
}
while(uc != ';' && i < length) {
uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
}
if(0xFFFF != entity) { if(entity >= 0x10000) {
UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 };
CFStringAppendCharacters(newString, characters, 2);
} else {
UniChar character = entity;
CFStringAppendCharacters(newString, &character, 1);
}
} else { sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); CFStringRef replacementString = (CFStringRef)CFDictionaryGetValue(fullReplDict, sub);
if(replacementString) {
CFStringAppend(newString, replacementString);
} else {
CFRelease(sub); sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); CFStringAppend(newString, sub); }
CFRelease(sub); }
lastChunkStart = i;
}
}
if(lastChunkStart < length) { sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart));
CFStringAppend(newString, sub);
CFRelease(sub);
}
CFRelease(fullReplDict);
return newString;
}