collationdatawriter.cpp [plain text]
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#include "unicode/tblcoll.h"
#include "unicode/udata.h"
#include "unicode/uniset.h"
#include "cmemory.h"
#include "collationdata.h"
#include "collationdatabuilder.h"
#include "collationdatareader.h"
#include "collationdatawriter.h"
#include "collationfastlatin.h"
#include "collationsettings.h"
#include "collationtailoring.h"
#include "uassert.h"
#include "ucmndata.h"
U_NAMESPACE_BEGIN
uint8_t *
RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) { return NULL; }
LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000));
if(buffer.isNull()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
length = cloneBinary(buffer.getAlias(), 20000, errorCode);
if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
if(buffer.allocateInsteadAndCopy(length, 0) == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
errorCode = U_ZERO_ERROR;
length = cloneBinary(buffer.getAlias(), length, errorCode);
}
if(U_FAILURE(errorCode)) { return NULL; }
return buffer.orphan();
}
int32_t
RuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &errorCode) const {
int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1];
return CollationDataWriter::writeTailoring(
*tailoring, *settings, indexes, dest, capacity,
errorCode);
}
static const UDataInfo dataInfo = {
sizeof(UDataInfo),
0,
U_IS_BIG_ENDIAN,
U_CHARSET_FAMILY,
U_SIZEOF_UCHAR,
0,
{ 0x55, 0x43, 0x6f, 0x6c }, { 5, 0, 0, 0 }, { 6, 3, 0, 0 } };
int32_t
CollationDataWriter::writeBase(const CollationData &data, const CollationSettings &settings,
const void *rootElements, int32_t rootElementsLength,
int32_t indexes[], uint8_t *dest, int32_t capacity,
UErrorCode &errorCode) {
return write(TRUE, NULL,
data, settings,
rootElements, rootElementsLength,
indexes, dest, capacity, errorCode);
}
int32_t
CollationDataWriter::writeTailoring(const CollationTailoring &t, const CollationSettings &settings,
int32_t indexes[], uint8_t *dest, int32_t capacity,
UErrorCode &errorCode) {
return write(FALSE, t.version,
*t.data, settings,
NULL, 0,
indexes, dest, capacity, errorCode);
}
int32_t
CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,
const CollationData &data, const CollationSettings &settings,
const void *rootElements, int32_t rootElementsLength,
int32_t indexes[], uint8_t *dest, int32_t capacity,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return 0; }
if(capacity < 0 || (capacity > 0 && dest == NULL)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
int32_t indexesLength;
UBool hasMappings;
UnicodeSet unsafeBackwardSet;
const CollationData *baseData = data.base;
int32_t fastLatinVersion;
if(data.fastLatinTable != NULL) {
fastLatinVersion = (int32_t)CollationFastLatin::VERSION << 16;
} else {
fastLatinVersion = 0;
}
int32_t fastLatinTableLength = 0;
if(isBase) {
indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1;
U_ASSERT(settings.reorderCodesLength == 0);
hasMappings = TRUE;
unsafeBackwardSet = *data.unsafeBackwardSet;
fastLatinTableLength = data.fastLatinTableLength;
} else if(baseData == NULL) {
hasMappings = FALSE;
if(settings.reorderCodesLength == 0) {
indexesLength = CollationDataReader::IX_OPTIONS + 1; } else {
indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2;
}
} else {
hasMappings = TRUE;
indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2;
if(data.contextsLength != 0) {
indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2;
}
unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->unsafeBackwardSet);
if(!unsafeBackwardSet.isEmpty()) {
indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2;
}
if(data.fastLatinTable != baseData->fastLatinTable) {
fastLatinTableLength = data.fastLatinTableLength;
indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2;
}
}
UVector32 codesAndRanges(errorCode);
const int32_t *reorderCodes = settings.reorderCodes;
int32_t reorderCodesLength = settings.reorderCodesLength;
if(settings.hasReordering() &&
CollationSettings::reorderTableHasSplitBytes(settings.reorderTable)) {
data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges, errorCode);
for(int32_t i = 0; i < reorderCodesLength; ++i) {
codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode);
}
if(U_FAILURE(errorCode)) { return 0; }
reorderCodes = codesAndRanges.getBuffer();
reorderCodesLength = codesAndRanges.size();
}
int32_t headerSize;
if(isBase) {
headerSize = 0; } else {
DataHeader header;
header.dataHeader.magic1 = 0xda;
header.dataHeader.magic2 = 0x27;
uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo));
uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo));
headerSize = (int32_t)sizeof(header);
U_ASSERT((headerSize & 3) == 0); if(hasMappings && data.cesLength != 0) {
int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4;
if((sum & 7) != 0) {
headerSize += 4;
}
}
header.dataHeader.headerSize = (uint16_t)headerSize;
if(headerSize <= capacity) {
uprv_memcpy(dest, &header, sizeof(header));
uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(header));
dest += headerSize;
capacity -= headerSize;
} else {
dest = NULL;
capacity = 0;
}
}
indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength;
U_ASSERT((settings.options & ~0xffff) == 0);
indexes[CollationDataReader::IX_OPTIONS] =
data.numericPrimary | fastLatinVersion | settings.options;
indexes[CollationDataReader::IX_RESERVED2] = 0;
indexes[CollationDataReader::IX_RESERVED3] = 0;
int32_t totalSize = indexesLength * 4;
if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) {
indexes[CollationDataReader::IX_JAMO_CE32S_START] = static_cast<int32_t>(data.jamoCE32s - data.ce32s);
} else {
indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1;
}
indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize;
totalSize += reorderCodesLength * 4;
indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize;
if(settings.reorderTable != NULL) {
totalSize += 256;
}
indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize;
if(hasMappings) {
UErrorCode errorCode2 = U_ZERO_ERROR;
int32_t length;
if(totalSize < capacity) {
length = utrie2_serialize(data.trie, dest + totalSize,
capacity - totalSize, &errorCode2);
} else {
length = utrie2_serialize(data.trie, NULL, 0, &errorCode2);
}
if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
errorCode = errorCode2;
return 0;
}
U_ASSERT((length & 7) == 0);
totalSize += length;
}
indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize;
indexes[CollationDataReader::IX_CES_OFFSET] = totalSize;
if(hasMappings && data.cesLength != 0) {
U_ASSERT(((headerSize + totalSize) & 7) == 0);
totalSize += data.cesLength * 8;
}
indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize;
indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize;
if(hasMappings) {
totalSize += data.ce32sLength * 4;
}
indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize;
totalSize += rootElementsLength * 4;
indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize;
if(hasMappings) {
totalSize += data.contextsLength * 2;
}
indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize;
if(hasMappings && !unsafeBackwardSet.isEmpty()) {
UErrorCode errorCode2 = U_ZERO_ERROR;
int32_t length;
if(totalSize < capacity) {
uint16_t *p = reinterpret_cast<uint16_t *>(dest + totalSize);
length = unsafeBackwardSet.serialize(
p, (capacity - totalSize) / 2, errorCode2);
} else {
length = unsafeBackwardSet.serialize(NULL, 0, errorCode2);
}
if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
errorCode = errorCode2;
return 0;
}
totalSize += length * 2;
}
indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize;
totalSize += fastLatinTableLength * 2;
UnicodeString scripts;
indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize;
if(isBase) {
scripts.append((UChar)data.numScripts);
scripts.append(reinterpret_cast<const UChar *>(data.scriptsIndex), data.numScripts + 16);
scripts.append(reinterpret_cast<const UChar *>(data.scriptStarts), data.scriptStartsLength);
totalSize += scripts.length() * 2;
}
indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize;
if(isBase) {
totalSize += 256;
}
indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize;
indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize;
if(totalSize > capacity) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
return headerSize + totalSize;
}
uprv_memcpy(dest, indexes, indexesLength * 4);
copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes, dest);
copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reorderTable, dest);
copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest);
copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest);
copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements, dest);
copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, dest);
copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fastLatinTable, dest);
copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(), dest);
copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.compressibleBytes, dest);
return headerSize + totalSize;
}
void
CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex,
const void *src, uint8_t *dest) {
int32_t start = indexes[startIndex];
int32_t limit = indexes[startIndex + 1];
if(start < limit) {
uprv_memcpy(dest + start, src, limit - start);
}
}
U_NAMESPACE_END
#endif // !UCONFIG_NO_COLLATION