scriptset.cpp   [plain text]


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*
* scriptset.cpp
*
* created on: 2013 Jan 7
* created by: Andy Heninger
*/

#include "unicode/utypes.h"

#include "unicode/uchar.h"
#include "unicode/unistr.h"

#include "scriptset.h"
#include "uassert.h"
#include "cmemory.h"

U_NAMESPACE_BEGIN

//----------------------------------------------------------------------------
//
//  ScriptSet implementation
//
//----------------------------------------------------------------------------
ScriptSet::ScriptSet() {
    uprv_memset(bits, 0, sizeof(bits));
}

ScriptSet::~ScriptSet() {
}

ScriptSet::ScriptSet(const ScriptSet &other) {
    *this = other;
}

ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
    uprv_memcpy(bits, other.bits, sizeof(bits));
    return *this;
}

UBool ScriptSet::operator == (const ScriptSet &other) const {
    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
        if (bits[i] != other.bits[i]) {
            return FALSE;
        }
    }
    return TRUE;
}

UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
    if (U_FAILURE(status)) {
        return FALSE;
    }
    if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return FALSE;
    }
    uint32_t index = script / 32;
    uint32_t bit   = 1 << (script & 31);
    return ((bits[index] & bit) != 0);
}


ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return *this;
    }
    if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return *this;
    }
    uint32_t index = script / 32;
    uint32_t bit   = 1 << (script & 31);
    bits[index] |= bit;
    return *this;
}

ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return *this;
    }
    if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return *this;
    }
    uint32_t index = script / 32;
    uint32_t bit   = 1 << (script & 31);
    bits[index] &= ~bit;
    return *this;
}



ScriptSet &ScriptSet::Union(const ScriptSet &other) {
    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
        bits[i] |= other.bits[i];
    }
    return *this;
}

ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
        bits[i] &= other.bits[i];
    }
    return *this;
}

ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
    ScriptSet t;
    t.set(script, status);
    if (U_SUCCESS(status)) {
        this->intersect(t);
    }
    return *this;
}

UBool ScriptSet::intersects(const ScriptSet &other) const {
    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
        if ((bits[i] & other.bits[i]) != 0) {
            return true;
        }
    }
    return false;
}

UBool ScriptSet::contains(const ScriptSet &other) const {
    ScriptSet t(*this);
    t.intersect(other);
    return (t == other);
}


ScriptSet &ScriptSet::setAll() {
    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
        bits[i] = 0xffffffffu;
    }
    return *this;
}


ScriptSet &ScriptSet::resetAll() {
    uprv_memset(bits, 0, sizeof(bits));
    return *this;
}

int32_t ScriptSet::countMembers() const {
    // This bit counter is good for sparse numbers of '1's, which is
    //  very much the case that we will usually have.
    int32_t count = 0;
    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
        uint32_t x = bits[i];
        while (x > 0) {
            count++;
            x &= (x - 1);    // and off the least significant one bit.
        }
    }
    return count;
}

int32_t ScriptSet::hashCode() const {
    int32_t hash = 0;
    for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
        hash ^= bits[i];
    }
    return hash;
}

int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
    // TODO: Wants a better implementation.
    if (fromIndex < 0) {
        return -1;
    }
    UErrorCode status = U_ZERO_ERROR;
    for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) {
        if (test((UScriptCode)scriptIndex, status)) {
            return scriptIndex;
        }
    }
    return -1;
}

UBool ScriptSet::isEmpty() const {
    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
        if (bits[i] != 0) {
            return FALSE;
        }
    }
    return TRUE;
}

UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
    UBool firstTime = TRUE;
    for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
        if (!firstTime) {
            dest.append((UChar)0x20);
        }
        firstTime = FALSE;
        const char *scriptName = uscript_getShortName((UScriptCode(i)));
        dest.append(UnicodeString(scriptName, -1, US_INV));
    }
    return dest;
}

ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
    resetAll();
    if (U_FAILURE(status)) {
        return *this;
    }
    UnicodeString oneScriptName;
    for (int32_t i=0; i<scriptString.length();) {
        UChar32 c = scriptString.char32At(i);
        i = scriptString.moveIndex32(i, 1);
        if (!u_isUWhiteSpace(c)) {
            oneScriptName.append(c);
            if (i < scriptString.length()) {
                continue;
            }
        }
        if (oneScriptName.length() > 0) {
            char buf[40];
            oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
            buf[sizeof(buf)-1] = 0;
            int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
            if (sc == UCHAR_INVALID_CODE) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
            } else {
                this->set((UScriptCode)sc, status);
            }
            if (U_FAILURE(status)) {
                return *this;
            }
            oneScriptName.remove();
        }
    }
    return *this;
}

void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }
    static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20;
    MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
    UErrorCode internalStatus = U_ZERO_ERROR;
    int32_t script_count = -1;

    while (TRUE) {
        script_count = uscript_getScriptExtensions(
            codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);
        if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
            // Need to allocate more space
            if (scripts.resize(script_count) == NULL) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
            internalStatus = U_ZERO_ERROR;
        } else {
            break;
        }
    }

    // Check if we failed for some reason other than buffer overflow
    if (U_FAILURE(internalStatus)) {
        status = internalStatus;
        return;
    }

    // Load the scripts into the ScriptSet and return
    for (int32_t i = 0; i < script_count; i++) {
        this->set(scripts[i], status);
        if (U_FAILURE(status)) { return; }
    }
}

U_NAMESPACE_END

U_CAPI UBool U_EXPORT2
uhash_equalsScriptSet(const UElement key1, const UElement key2) {
    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
    return (*s1 == *s2);
}

U_CAPI int8_t U_EXPORT2
uhash_compareScriptSet(UElement key0, UElement key1) {
    icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    int32_t diff = s0->countMembers() - s1->countMembers();
    if (diff != 0) return static_cast<UBool>(diff);
    int32_t i0 = s0->nextSetBit(0);
    int32_t i1 = s1->nextSetBit(0);
    while ((diff = i0-i1) == 0 && i0 > 0) {
        i0 = s0->nextSetBit(i0+1);
        i1 = s1->nextSetBit(i1+1);
    }
    return (int8_t)diff;
}

U_CAPI int32_t U_EXPORT2
uhash_hashScriptSet(const UElement key) {
    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
    return s->hashCode();
}

U_CAPI void U_EXPORT2
uhash_deleteScriptSet(void *obj) {
    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
    delete s;
}