collationsettings.cpp   [plain text]


/*
*******************************************************************************
* Copyright (C) 2013-2015, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* collationsettings.cpp
*
* created on: 2013feb07
* created by: Markus W. Scherer
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/ucol.h"
#include "cmemory.h"
#include "collation.h"
#include "collationdata.h"
#include "collationsettings.h"
#include "sharedobject.h"
#include "uassert.h"
#include "umutex.h"
#include "uvectr32.h"

U_NAMESPACE_BEGIN

CollationSettings::CollationSettings(const CollationSettings &other)
        : SharedObject(other),
          options(other.options), variableTop(other.variableTop),
          reorderTable(NULL),
          minHighNoReorder(other.minHighNoReorder),
          reorderRanges(NULL), reorderRangesLength(0),
          reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
          fastLatinOptions(other.fastLatinOptions) {
    UErrorCode errorCode = U_ZERO_ERROR;
    copyReorderingFrom(other, errorCode);
    if(fastLatinOptions >= 0) {
        uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
    }
}

CollationSettings::~CollationSettings() {
    if(reorderCodesCapacity != 0) {
        uprv_free(const_cast<int32_t *>(reorderCodes));
    }
}

UBool
CollationSettings::operator==(const CollationSettings &other) const {
    if(options != other.options) { return FALSE; }
    if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
    if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
    for(int32_t i = 0; i < reorderCodesLength; ++i) {
        if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
    }
    return TRUE;
}

int32_t
CollationSettings::hashCode() const {
    int32_t h = options << 8;
    if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
    h ^= reorderCodesLength;
    for(int32_t i = 0; i < reorderCodesLength; ++i) {
        h ^= (reorderCodes[i] << i);
    }
    return h;
}

void
CollationSettings::resetReordering() {
    // When we turn off reordering, we want to set a NULL permutation
    // rather than a no-op permutation.
    // Keep the memory via reorderCodes and its capacity.
    reorderTable = NULL;
    minHighNoReorder = 0;
    reorderRangesLength = 0;
    reorderCodesLength = 0;
}

void
CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
                                   const uint32_t *ranges, int32_t rangesLength,
                                   const uint8_t *table, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    if(table != NULL &&
            (rangesLength == 0 ?
                    !reorderTableHasSplitBytes(table) :
                    rangesLength >= 2 &&
                    // The first offset must be 0. The last offset must not be 0.
                    (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
        // We need to release the memory before setting the alias pointer.
        if(reorderCodesCapacity != 0) {
            uprv_free(const_cast<int32_t *>(reorderCodes));
            reorderCodesCapacity = 0;
        }
        reorderTable = table;
        reorderCodes = codes;
        reorderCodesLength = length;
        // Drop ranges before the first split byte. They are reordered by the table.
        // This then speeds up reordering of the remaining ranges.
        int32_t firstSplitByteRangeIndex = 0;
        while(firstSplitByteRangeIndex < rangesLength &&
                (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
            // The second byte of the primary limit is 0.
            ++firstSplitByteRangeIndex;
        }
        if(firstSplitByteRangeIndex == rangesLength) {
            U_ASSERT(!reorderTableHasSplitBytes(table));
            minHighNoReorder = 0;
            reorderRanges = NULL;
            reorderRangesLength = 0;
        } else {
            U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
            minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
            reorderRanges = ranges + firstSplitByteRangeIndex;
            reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
        }
        return;
    }
    // Regenerate missing data.
    setReordering(data, codes, length, errorCode);
}

void
CollationSettings::setReordering(const CollationData &data,
                                 const int32_t *codes, int32_t codesLength,
                                 UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
        resetReordering();
        return;
    }
    UVector32 rangesList(errorCode);
    data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
    if(U_FAILURE(errorCode)) { return; }
    int32_t rangesLength = rangesList.size();
    if(rangesLength == 0) {
        resetReordering();
        return;
    }
    const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
    // ranges[] contains at least two (limit, offset) pairs.
    // The first offset must be 0. The last offset must not be 0.
    // Separators (at the low end) and trailing weights (at the high end)
    // are never reordered.
    U_ASSERT(rangesLength >= 2);
    U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
    minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;

    // Write the lead byte permutation table.
    // Set a 0 for each lead byte that has a range boundary in the middle.
    uint8_t table[256];
    int32_t b = 0;
    int32_t firstSplitByteRangeIndex = -1;
    for(int32_t i = 0; i < rangesLength; ++i) {
        uint32_t pair = ranges[i];
        int32_t limit1 = (int32_t)(pair >> 24);
        while(b < limit1) {
            table[b] = (uint8_t)(b + pair);
            ++b;
        }
        // Check the second byte of the limit.
        if((pair & 0xff0000) != 0) {
            table[limit1] = 0;
            b = limit1 + 1;
            if(firstSplitByteRangeIndex < 0) {
                firstSplitByteRangeIndex = i;
            }
        }
    }
    while(b <= 0xff) {
        table[b] = (uint8_t)b;
        ++b;
    }
    if(firstSplitByteRangeIndex < 0) {
        // The lead byte permutation table alone suffices for reordering.
        rangesLength = 0;
    } else {
        // Remove the ranges below the first split byte.
        ranges += firstSplitByteRangeIndex;
        rangesLength -= firstSplitByteRangeIndex;
    }
    setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
}

void
CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
                                    const uint32_t *ranges, int32_t rangesLength,
                                    const uint8_t *table, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    int32_t *ownedCodes;
    int32_t totalLength = codesLength + rangesLength;
    U_ASSERT(totalLength > 0);
    if(totalLength <= reorderCodesCapacity) {
        ownedCodes = const_cast<int32_t *>(reorderCodes);
    } else {
        // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
        int32_t capacity = (totalLength + 3) & ~3;  // round up to a multiple of 4 ints
        ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
        if(ownedCodes == NULL) {
            resetReordering();
            errorCode = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        if(reorderCodesCapacity != 0) {
            uprv_free(const_cast<int32_t *>(reorderCodes));
        }
        reorderCodes = ownedCodes;
        reorderCodesCapacity = capacity;
    }
    uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
    uprv_memcpy(ownedCodes, codes, codesLength * 4);
    uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
    reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
    reorderCodesLength = codesLength;
    reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
    reorderRangesLength = rangesLength;
}

void
CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    if(!other.hasReordering()) {
        resetReordering();
        return;
    }
    minHighNoReorder = other.minHighNoReorder;
    if(other.reorderCodesCapacity == 0) {
        // The reorder arrays are aliased to memory-mapped data.
        reorderTable = other.reorderTable;
        reorderRanges = other.reorderRanges;
        reorderRangesLength = other.reorderRangesLength;
        reorderCodes = other.reorderCodes;
        reorderCodesLength = other.reorderCodesLength;
    } else {
        setReorderArrays(other.reorderCodes, other.reorderCodesLength,
                         other.reorderRanges, other.reorderRangesLength,
                         other.reorderTable, errorCode);
    }
}

UBool
CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
    U_ASSERT(table[0] == 0);
    for(int32_t i = 1; i < 256; ++i) {
        if(table[i] == 0) {
            return TRUE;
        }
    }
    return FALSE;
}

uint32_t
CollationSettings::reorderEx(uint32_t p) const {
    if(p >= minHighNoReorder) { return p; }
    // Round up p so that its lower 16 bits are >= any offset bits.
    // Then compare q directly with (limit, offset) pairs.
    uint32_t q = p | 0xffff;
    uint32_t r;
    const uint32_t *ranges = reorderRanges;
    while(q >= (r = *ranges)) { ++ranges; }
    return p + (r << 24);
}

void
CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    int32_t noStrength = options & ~STRENGTH_MASK;
    switch(value) {
    case UCOL_PRIMARY:
    case UCOL_SECONDARY:
    case UCOL_TERTIARY:
    case UCOL_QUATERNARY:
    case UCOL_IDENTICAL:
        options = noStrength | (value << STRENGTH_SHIFT);
        break;
    case UCOL_DEFAULT:
        options = noStrength | (defaultOptions & STRENGTH_MASK);
        break;
    default:
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        break;
    }
}

void
CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
                           int32_t defaultOptions, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    switch(value) {
    case UCOL_ON:
        options |= bit;
        break;
    case UCOL_OFF:
        options &= ~bit;
        break;
    case UCOL_DEFAULT:
        options = (options & ~bit) | (defaultOptions & bit);
        break;
    default:
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        break;
    }
}

void
CollationSettings::setCaseFirst(UColAttributeValue value,
                                int32_t defaultOptions, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
    switch(value) {
    case UCOL_OFF:
        options = noCaseFirst;
        break;
    case UCOL_LOWER_FIRST:
        options = noCaseFirst | CASE_FIRST;
        break;
    case UCOL_UPPER_FIRST:
        options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
        break;
    case UCOL_DEFAULT:
        options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
        break;
    default:
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        break;
    }
}

void
CollationSettings::setAlternateHandling(UColAttributeValue value,
                                        int32_t defaultOptions, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    int32_t noAlternate = options & ~ALTERNATE_MASK;
    switch(value) {
    case UCOL_NON_IGNORABLE:
        options = noAlternate;
        break;
    case UCOL_SHIFTED:
        options = noAlternate | SHIFTED;
        break;
    case UCOL_DEFAULT:
        options = noAlternate | (defaultOptions & ALTERNATE_MASK);
        break;
    default:
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        break;
    }
}

void
CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    int32_t noMax = options & ~MAX_VARIABLE_MASK;
    switch(value) {
    case MAX_VAR_SPACE:
    case MAX_VAR_PUNCT:
    case MAX_VAR_SYMBOL:
    case MAX_VAR_CURRENCY:
        options = noMax | (value << MAX_VARIABLE_SHIFT);
        break;
    case UCOL_DEFAULT:
        options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
        break;
    default:
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        break;
    }
}

U_NAMESPACE_END

#endif  // !UCONFIG_NO_COLLATION