#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#include "unicode/tblcoll.h"
#include "unicode/coleitr.h"
#include "unicode/ures.h"
#include "unicode/uset.h"
#include "ucol_imp.h"
#include "uresimp.h"
#include "uhash.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
U_NAMESPACE_BEGIN
RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
: Collator(that)
, dataIsOwned(FALSE)
, isWriteThroughAlias(FALSE)
, ucollator(that.ucollator)
, urulestring(that.urulestring)
{
}
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
UErrorCode& status) :
dataIsOwned(FALSE)
{
construct(rules,
UCOL_DEFAULT_STRENGTH,
UCOL_DEFAULT,
status);
}
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
ECollationStrength collationStrength,
UErrorCode& status) : dataIsOwned(FALSE)
{
construct(rules,
getUCollationStrength(collationStrength),
UCOL_DEFAULT,
status);
}
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
UColAttributeValue decompositionMode,
UErrorCode& status) :
dataIsOwned(FALSE)
{
construct(rules,
UCOL_DEFAULT_STRENGTH,
decompositionMode,
status);
}
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
ECollationStrength collationStrength,
UColAttributeValue decompositionMode,
UErrorCode& status) : dataIsOwned(FALSE)
{
construct(rules,
getUCollationStrength(collationStrength),
decompositionMode,
status);
}
void
RuleBasedCollator::setRuleStringFromCollator(UErrorCode& status)
{
urulestring = NULL;
if (U_SUCCESS(status))
{
int32_t length;
const UChar *r = ucol_getRules(ucollator, &length);
if (length > 0) {
urulestring = new UnicodeString(TRUE, r, length);
}
else {
urulestring = new UnicodeString();
}
if (urulestring == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
}
void
RuleBasedCollator::construct(const UnicodeString& rules,
UColAttributeValue collationStrength,
UColAttributeValue decompositionMode,
UErrorCode& status)
{
urulestring = 0;
ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
decompositionMode, collationStrength,
NULL, &status);
dataIsOwned = TRUE; isWriteThroughAlias = FALSE;
setRuleStringFromCollator(status);
}
RuleBasedCollator::~RuleBasedCollator()
{
if (dataIsOwned)
{
ucol_close(ucollator);
delete urulestring;
}
ucollator = 0;
urulestring = 0;
}
UBool RuleBasedCollator::operator==(const Collator& that) const
{
if (Collator::operator==(that))
return TRUE;
if (getDynamicClassID() != that.getDynamicClassID())
return FALSE;
RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
return ucol_equals(this->ucollator, thatAlias.ucollator);
}
UBool RuleBasedCollator::operator!=(const Collator& other) const
{
return !(*this == other);
}
RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
{
if (this != &that)
{
if (dataIsOwned)
{
ucol_close(ucollator);
ucollator = NULL;
delete urulestring;
}
dataIsOwned = FALSE;
isWriteThroughAlias = FALSE;
ucollator = that.ucollator;
urulestring = that.urulestring;
}
return *this;
}
Collator* RuleBasedCollator::clone() const
{
return new RuleBasedCollator(*this);
}
CollationElementIterator* RuleBasedCollator::createCollationElementIterator
(const UnicodeString& source) const
{
UErrorCode status = U_ZERO_ERROR;
CollationElementIterator *result = new CollationElementIterator(source, this,
status);
if (U_FAILURE(status)) {
delete result;
return NULL;
}
return result;
}
CollationElementIterator* RuleBasedCollator::createCollationElementIterator
(const CharacterIterator& source) const
{
UErrorCode status = U_ZERO_ERROR;
CollationElementIterator *result = new CollationElementIterator(source, this,
status);
if (U_FAILURE(status)) {
delete result;
return NULL;
}
return result;
}
const UnicodeString& RuleBasedCollator::getRules() const
{
return (*urulestring);
}
void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
{
int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
if (rulesize > 0) {
UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
if(rules != NULL) {
ucol_getRulesEx(ucollator, delta, rules, rulesize);
buffer.setTo(rules, rulesize);
uprv_free(rules);
} else { buffer.remove();
}
}
else {
buffer.remove();
}
}
UnicodeSet *
RuleBasedCollator::getTailoredSet(UErrorCode &status) const
{
if(U_FAILURE(status)) {
return NULL;
}
return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
}
void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
{
if (versionInfo!=NULL){
ucol_getVersion(ucollator, versionInfo);
}
}
Collator::EComparisonResult RuleBasedCollator::compare(
const UnicodeString& source,
const UnicodeString& target,
int32_t length) const
{
UErrorCode status = U_ZERO_ERROR;
return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
}
UCollationResult RuleBasedCollator::compare(
const UnicodeString& source,
const UnicodeString& target,
int32_t length,
UErrorCode &status) const
{
return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
}
Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
int32_t sourceLength,
const UChar* target,
int32_t targetLength)
const
{
return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
target, targetLength));
}
UCollationResult RuleBasedCollator::compare(const UChar* source,
int32_t sourceLength,
const UChar* target,
int32_t targetLength,
UErrorCode &status) const
{
if(U_SUCCESS(status)) {
return ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
} else {
return UCOL_EQUAL;
}
}
Collator::EComparisonResult RuleBasedCollator::compare(
const UnicodeString& source,
const UnicodeString& target) const
{
return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(),
target.getBuffer(), target.length()));
}
UCollationResult RuleBasedCollator::compare(
const UnicodeString& source,
const UnicodeString& target,
UErrorCode &status) const
{
if(U_SUCCESS(status)) {
return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
target.getBuffer(), target.length());
} else {
return UCOL_EQUAL;
}
}
CollationKey& RuleBasedCollator::getCollationKey(
const UnicodeString& source,
CollationKey& sortkey,
UErrorCode& status) const
{
return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
}
CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
int32_t sourceLen,
CollationKey& sortkey,
UErrorCode& status) const
{
if (U_FAILURE(status))
{
return sortkey.setToBogus();
}
if ((!source) || (sourceLen == 0)) {
return sortkey.reset();
}
uint8_t *result;
int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator,
source, sourceLen,
&result,
&status);
sortkey.adopt(result, resultLen);
return sortkey;
}
int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
{
uint8_t result;
UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
return result;
}
uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
UErrorCode &status)
{
return ucol_cloneRuleData(ucollator, &length, &status);
}
void RuleBasedCollator::setAttribute(UColAttribute attr,
UColAttributeValue value,
UErrorCode &status)
{
if (U_FAILURE(status))
return;
checkOwned();
ucol_setAttribute(ucollator, attr, value, &status);
}
UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
UErrorCode &status)
{
if (U_FAILURE(status))
return UCOL_DEFAULT;
return ucol_getAttribute(ucollator, attr, &status);
}
uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
checkOwned();
return ucol_setVariableTop(ucollator, varTop, len, &status);
}
uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) {
checkOwned();
return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
}
void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) {
checkOwned();
ucol_restoreVariableTop(ucollator, varTop, &status);
}
uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
return ucol_getVariableTop(ucollator, &status);
}
Collator* RuleBasedCollator::safeClone(void)
{
UErrorCode intStatus = U_ZERO_ERROR;
int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
&intStatus);
if (U_FAILURE(intStatus)) {
return NULL;
}
UnicodeString *r = new UnicodeString(*urulestring);
RuleBasedCollator *result = new RuleBasedCollator(ucol, r);
result->dataIsOwned = TRUE;
result->isWriteThroughAlias = FALSE;
return result;
}
int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
uint8_t *result, int32_t resultLength)
const
{
return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
}
int32_t RuleBasedCollator::getSortKey(const UChar *source,
int32_t sourceLength, uint8_t *result,
int32_t resultLength) const
{
return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
}
Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
{
UErrorCode intStatus = U_ZERO_ERROR;
return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
&intStatus));
}
void RuleBasedCollator::setStrength(ECollationStrength newStrength)
{
checkOwned();
UErrorCode intStatus = U_ZERO_ERROR;
UCollationStrength strength = getUCollationStrength(newStrength);
ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
}
int32_t RuleBasedCollator::hashCode() const
{
int32_t length;
const UChar *rules = ucol_getRules(ucollator, &length);
return uhash_hashUCharsN(rules, length);
}
const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
const char *result = ucol_getLocale(ucollator, type, &status);
if(result == NULL) {
Locale res("");
res.setToBogus();
return res;
} else {
return Locale(result);
}
}
void
RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale) {
checkOwned();
size_t rlen = uprv_strlen(requestedLocale.getName());
char* rloc = (char *)uprv_malloc((rlen+1)*sizeof(char));
if (rloc) {
uprv_strcpy(rloc, requestedLocale.getName());
size_t vlen = uprv_strlen(validLocale.getName());
char* vloc = (char*)uprv_malloc((vlen+1)*sizeof(char));
if (vloc) {
uprv_strcpy(vloc, validLocale.getName());
ucol_setReqValidLocales(ucollator, rloc, vloc);
return;
}
uprv_free(rloc);
}
}
RuleBasedCollator::RuleBasedCollator()
: dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(0), urulestring(0)
{
}
RuleBasedCollator::RuleBasedCollator(UCollator *collator,
UnicodeString *rule)
: dataIsOwned(FALSE), isWriteThroughAlias(FALSE), urulestring(0)
{
ucollator = collator;
urulestring = rule;
}
RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
UErrorCode& status) :
dataIsOwned(FALSE), ucollator(0), urulestring(0)
{
if (U_FAILURE(status))
return;
setUCollator(desiredLocale, status);
if (U_FAILURE(status))
{
status = U_ZERO_ERROR;
setUCollator(kRootLocaleName, status);
if (status == U_ZERO_ERROR) {
status = U_USING_DEFAULT_WARNING;
}
}
if (U_SUCCESS(status))
{
int32_t length;
const UChar *r = ucol_getRules(ucollator, &length);
if (length > 0) {
urulestring = new UnicodeString(TRUE, r, length);
}
else {
urulestring = new UnicodeString();
}
if (urulestring == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
dataIsOwned = TRUE;
isWriteThroughAlias = FALSE;
}
}
void
RuleBasedCollator::setUCollator(const char *locale,
UErrorCode &status)
{
if (U_FAILURE(status))
return;
if (ucollator && dataIsOwned)
ucol_close(ucollator);
ucollator = ucol_open_internal(locale, &status);
dataIsOwned = TRUE;
isWriteThroughAlias = FALSE;
}
void
RuleBasedCollator::checkOwned() {
if (!(dataIsOwned || isWriteThroughAlias)) {
UErrorCode status = U_ZERO_ERROR;
ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
setRuleStringFromCollator(status);
dataIsOwned = TRUE;
isWriteThroughAlias = FALSE;
}
}
const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;
const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;
const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;
const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;
const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000;
const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100;
const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001;
const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;
const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;
const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;
const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;
const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000;
const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;
const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;
const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;
const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202;
const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000;
const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202;
const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002;
const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202;
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
U_NAMESPACE_END
#endif