#include "config.h"
#include "IntlCollator.h"
#include "IntlObjectInlines.h"
#include "JSBoundFunction.h"
#include "JSCInlines.h"
#include "ObjectConstructor.h"
#include <wtf/HexNumber.h>
namespace JSC {
const ClassInfo IntlCollator::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) };
namespace IntlCollatorInternal {
constexpr bool verbose = false;
}
IntlCollator* IntlCollator::create(VM& vm, Structure* structure)
{
IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure);
format->finishCreation(vm);
return format;
}
Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
{
return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
}
IntlCollator::IntlCollator(VM& vm, Structure* structure)
: Base(vm, structure)
{
}
void IntlCollator::finishCreation(VM& vm)
{
Base::finishCreation(vm);
ASSERT(inherits(vm, info()));
}
void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor)
{
IntlCollator* thisObject = jsCast<IntlCollator*>(cell);
ASSERT_GC_OBJECT_INHERITS(thisObject, info());
Base::visitChildren(thisObject, visitor);
visitor.append(thisObject->m_boundCompare);
}
Vector<String> IntlCollator::sortLocaleData(const String& locale, RelevantExtensionKey key)
{
Vector<String> keyLocaleData;
switch (key) {
case RelevantExtensionKey::Co: {
keyLocaleData.append({ });
UErrorCode status = U_ZERO_ERROR;
auto enumeration = std::unique_ptr<UEnumeration, ICUDeleter<uenum_close>>(ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status));
if (U_SUCCESS(status)) {
const char* collation;
while ((collation = uenum_next(enumeration.get(), nullptr, &status)) && U_SUCCESS(status)) {
if (!strcmp(collation, "standard") || !strcmp(collation, "search"))
continue;
if (!strcmp(collation, "dictionary"))
keyLocaleData.append("dict"_s);
else if (!strcmp(collation, "gb2312han"))
keyLocaleData.append("gb2312"_s);
else if (!strcmp(collation, "phonebook"))
keyLocaleData.append("phonebk"_s);
else if (!strcmp(collation, "traditional"))
keyLocaleData.append("trad"_s);
else
keyLocaleData.append(collation);
}
}
break;
}
case RelevantExtensionKey::Kf:
keyLocaleData.reserveInitialCapacity(3);
keyLocaleData.uncheckedAppend("false"_s);
keyLocaleData.uncheckedAppend("lower"_s);
keyLocaleData.uncheckedAppend("upper"_s);
break;
case RelevantExtensionKey::Kn:
keyLocaleData.reserveInitialCapacity(2);
keyLocaleData.uncheckedAppend("false"_s);
keyLocaleData.uncheckedAppend("true"_s);
break;
default:
ASSERT_NOT_REACHED();
}
return keyLocaleData;
}
Vector<String> IntlCollator::searchLocaleData(const String&, RelevantExtensionKey key)
{
Vector<String> keyLocaleData;
switch (key) {
case RelevantExtensionKey::Co:
keyLocaleData.reserveInitialCapacity(1);
keyLocaleData.append({ });
break;
case RelevantExtensionKey::Kf:
keyLocaleData.reserveInitialCapacity(3);
keyLocaleData.uncheckedAppend("false"_s);
keyLocaleData.uncheckedAppend("lower"_s);
keyLocaleData.uncheckedAppend("upper"_s);
break;
case RelevantExtensionKey::Kn:
keyLocaleData.reserveInitialCapacity(2);
keyLocaleData.uncheckedAppend("false"_s);
keyLocaleData.uncheckedAppend("true"_s);
break;
default:
ASSERT_NOT_REACHED();
}
return keyLocaleData;
}
void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue)
{
VM& vm = globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);
auto requestedLocales = canonicalizeLocaleList(globalObject, locales);
RETURN_IF_EXCEPTION(scope, void());
JSValue options = optionsValue;
if (!optionsValue.isUndefined()) {
options = optionsValue.toObject(globalObject);
RETURN_IF_EXCEPTION(scope, void());
}
m_usage = intlOption<Usage>(globalObject, options, vm.propertyNames->usage, { { "sort"_s, Usage::Sort }, { "search"_s, Usage::Search } }, "usage must be either \"sort\" or \"search\""_s, Usage::Sort);
RETURN_IF_EXCEPTION(scope, void());
auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData;
ResolveLocaleOptions localeOptions;
LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit);
RETURN_IF_EXCEPTION(scope, void());
{
String collation = intlStringOption(globalObject, options, vm.propertyNames->collation, { }, nullptr, nullptr);
RETURN_IF_EXCEPTION(scope, void());
if (!collation.isNull()) {
if (!isUnicodeLocaleIdentifierType(collation)) {
throwRangeError(globalObject, scope, "collation is not a well-formed collation value"_s);
return;
}
localeOptions[static_cast<unsigned>(RelevantExtensionKey::Co)] = WTFMove(collation);
}
}
TriState numeric = intlBooleanOption(globalObject, options, vm.propertyNames->numeric);
RETURN_IF_EXCEPTION(scope, void());
if (numeric != TriState::Indeterminate)
localeOptions[static_cast<unsigned>(RelevantExtensionKey::Kn)] = String(numeric == TriState::True ? "true"_s : "false"_s);
String caseFirstOption = intlStringOption(globalObject, options, vm.propertyNames->caseFirst, { "upper", "lower", "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"", nullptr);
RETURN_IF_EXCEPTION(scope, void());
if (!caseFirstOption.isNull())
localeOptions[static_cast<unsigned>(RelevantExtensionKey::Kf)] = caseFirstOption;
auto& availableLocales = intlCollatorAvailableLocales();
auto resolved = resolveLocale(globalObject, availableLocales, requestedLocales, localeMatcher, localeOptions, { RelevantExtensionKey::Co, RelevantExtensionKey::Kf, RelevantExtensionKey::Kn }, localeData);
m_locale = resolved.locale;
if (m_locale.isEmpty()) {
throwTypeError(globalObject, scope, "failed to initialize Collator due to invalid locale"_s);
return;
}
const String& collation = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Co)];
m_collation = collation.isNull() ? "default"_s : collation;
m_numeric = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Kn)] == "true"_s;
const String& caseFirstString = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Kf)];
if (caseFirstString == "lower")
m_caseFirst = CaseFirst::Lower;
else if (caseFirstString == "upper")
m_caseFirst = CaseFirst::Upper;
else
m_caseFirst = CaseFirst::False;
m_sensitivity = intlOption<Sensitivity>(globalObject, options, vm.propertyNames->sensitivity, { { "base"_s, Sensitivity::Base }, { "accent"_s, Sensitivity::Accent }, { "case"_s, Sensitivity::Case }, { "variant"_s, Sensitivity::Variant } }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\""_s, Sensitivity::Variant);
RETURN_IF_EXCEPTION(scope, void());
TriState ignorePunctuation = intlBooleanOption(globalObject, options, vm.propertyNames->ignorePunctuation);
RETURN_IF_EXCEPTION(scope, void());
m_ignorePunctuation = (ignorePunctuation == TriState::True);
CString dataLocaleWithExtensions;
switch (m_usage) {
case Usage::Sort:
if (collation.isNull())
dataLocaleWithExtensions = resolved.dataLocale.utf8();
else
dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-", m_collation).utf8();
break;
case Usage::Search:
dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-search").utf8();
break;
}
dataLogLnIf(IntlCollatorInternal::verbose, "locale:(", resolved.locale, "),dataLocaleWithExtensions:(", dataLocaleWithExtensions, ")");
UErrorCode status = U_ZERO_ERROR;
m_collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(dataLocaleWithExtensions.data(), &status));
if (U_FAILURE(status)) {
throwTypeError(globalObject, scope, "failed to initialize Collator"_s);
return;
}
UColAttributeValue strength = UCOL_PRIMARY;
UColAttributeValue caseLevel = UCOL_OFF;
UColAttributeValue caseFirst = UCOL_OFF;
switch (m_sensitivity) {
case Sensitivity::Base:
break;
case Sensitivity::Accent:
strength = UCOL_SECONDARY;
break;
case Sensitivity::Case:
caseLevel = UCOL_ON;
break;
case Sensitivity::Variant:
strength = UCOL_TERTIARY;
break;
}
switch (m_caseFirst) {
case CaseFirst::False:
break;
case CaseFirst::Lower:
caseFirst = UCOL_LOWER_FIRST;
break;
case CaseFirst::Upper:
caseFirst = UCOL_UPPER_FIRST;
break;
}
ucol_setAttribute(m_collator.get(), UCOL_STRENGTH, strength, &status);
ucol_setAttribute(m_collator.get(), UCOL_CASE_LEVEL, caseLevel, &status);
ucol_setAttribute(m_collator.get(), UCOL_CASE_FIRST, caseFirst, &status);
ucol_setAttribute(m_collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status);
ucol_setAttribute(m_collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status);
ucol_setAttribute(m_collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
ASSERT(U_SUCCESS(status));
}
JSValue IntlCollator::compareStrings(JSGlobalObject* globalObject, StringView x, StringView y) const
{
ASSERT(m_collator);
VM& vm = globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);
UErrorCode status = U_ZERO_ERROR;
UCollationResult result = ([&]() -> UCollationResult {
if (x.isAllSpecialCharacters<canUseASCIIUCADUCETComparison>() && y.isAllSpecialCharacters<canUseASCIIUCADUCETComparison>()) {
if (canDoASCIIUCADUCETComparison()) {
if (x.is8Bit() && y.is8Bit())
return compareASCIIWithUCADUCET(x.characters8(), x.length(), y.characters8(), y.length());
if (x.is8Bit())
return compareASCIIWithUCADUCET(x.characters8(), x.length(), y.characters16(), y.length());
if (y.is8Bit())
return compareASCIIWithUCADUCET(x.characters16(), x.length(), y.characters8(), y.length());
return compareASCIIWithUCADUCET(x.characters16(), x.length(), y.characters16(), y.length());
}
if (x.is8Bit() && y.is8Bit())
return ucol_strcollUTF8(m_collator.get(), bitwise_cast<const char*>(x.characters8()), x.length(), bitwise_cast<const char*>(y.characters8()), y.length(), &status);
}
return ucol_strcoll(m_collator.get(), x.upconvertedCharacters(), x.length(), y.upconvertedCharacters(), y.length());
}());
if (U_FAILURE(status))
return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s));
return jsNumber(result);
}
ASCIILiteral IntlCollator::usageString(Usage usage)
{
switch (usage) {
case Usage::Sort:
return "sort"_s;
case Usage::Search:
return "search"_s;
}
ASSERT_NOT_REACHED();
return ASCIILiteral::null();
}
ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity)
{
switch (sensitivity) {
case Sensitivity::Base:
return "base"_s;
case Sensitivity::Accent:
return "accent"_s;
case Sensitivity::Case:
return "case"_s;
case Sensitivity::Variant:
return "variant"_s;
}
ASSERT_NOT_REACHED();
return ASCIILiteral::null();
}
ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst)
{
switch (caseFirst) {
case CaseFirst::False:
return "false"_s;
case CaseFirst::Lower:
return "lower"_s;
case CaseFirst::Upper:
return "upper"_s;
}
ASSERT_NOT_REACHED();
return ASCIILiteral::null();
}
JSObject* IntlCollator::resolvedOptions(JSGlobalObject* globalObject) const
{
VM& vm = globalObject->vm();
JSObject* options = constructEmptyObject(globalObject);
options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale));
options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(vm, usageString(m_usage)));
options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(vm, sensitivityString(m_sensitivity)));
options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation));
options->putDirect(vm, vm.propertyNames->collation, jsString(vm, m_collation));
options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric));
options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(vm, caseFirstString(m_caseFirst)));
return options;
}
void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format)
{
m_boundCompare.set(vm, this, format);
}
static bool canDoASCIIUCADUCETComparisonWithUCollator(UCollator& collator)
{
static constexpr std::pair<UColAttribute, UColAttributeValue> attributes[] = {
{ UCOL_FRENCH_COLLATION, UCOL_OFF },
{ UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE },
{ UCOL_STRENGTH, UCOL_TERTIARY },
{ UCOL_CASE_LEVEL, UCOL_OFF },
{ UCOL_CASE_FIRST, UCOL_OFF },
{ UCOL_NUMERIC_COLLATION, UCOL_OFF },
};
for (auto& pair : attributes) {
UErrorCode status = U_ZERO_ERROR;
auto result = ucol_getAttribute(&collator, pair.first, &status);
ASSERT(U_SUCCESS(status));
if (result != pair.second)
return false;
}
int32_t length = 0;
ucol_getRules(&collator, &length);
return !length;
}
bool IntlCollator::updateCanDoASCIIUCADUCETComparison() const
{
ASSERT(m_collator);
auto checkASCIIUCADUCETComparisonCompatibility = [&] {
if (m_usage != Usage::Sort)
return false;
if (m_collation != "default"_s)
return false;
if (m_sensitivity != Sensitivity::Variant)
return false;
if (m_caseFirst != CaseFirst::False)
return false;
if (m_numeric)
return false;
if (m_ignorePunctuation)
return false;
return canDoASCIIUCADUCETComparisonWithUCollator(*m_collator);
};
bool result = checkASCIIUCADUCETComparisonCompatibility();
m_canDoASCIIUCADUCETComparison = triState(result);
return result;
}
#if ASSERT_ENABLED
void IntlCollator::checkICULocaleInvariants(const HashSet<String>& locales)
{
for (auto& locale : locales) {
auto checkASCIIOrderingWithDUCET = [](const String& locale, UCollator& collator) {
bool allAreGood = true;
for (unsigned x = 0; x < 128; ++x) {
for (unsigned y = 0; y < 128; ++y) {
if (canUseASCIIUCADUCETComparison(x) && canUseASCIIUCADUCETComparison(y)) {
UErrorCode status = U_ZERO_ERROR;
UChar xstring[] = { static_cast<UChar>(x), 0 };
UChar ystring[] = { static_cast<UChar>(y), 0 };
auto resultICU = ucol_strcoll(&collator, xstring, 1, ystring, 1);
ASSERT(U_SUCCESS(status));
auto resultJSC = compareASCIIWithUCADUCET(xstring, 1, ystring, 1);
if (resultICU != resultJSC) {
dataLogLn("BAD ", locale, " ", makeString(hex(x)), "(", StringView(xstring, 1), ") <=> ", makeString(hex(y)), "(", StringView(ystring, 1), ") ICU:(", static_cast<int32_t>(resultICU), "),JSC:(", static_cast<int32_t>(resultJSC), ")");
allAreGood = false;
}
}
}
}
return allAreGood;
};
UErrorCode status = U_ZERO_ERROR;
auto collator = std::unique_ptr<UCollator, ICUDeleter<ucol_close>>(ucol_open(locale.ascii().data(), &status));
ASSERT(U_SUCCESS(status));
ucol_setAttribute(collator.get(), UCOL_STRENGTH, UCOL_TERTIARY, &status);
ASSERT(U_SUCCESS(status));
ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, UCOL_OFF, &status);
ASSERT(U_SUCCESS(status));
ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, UCOL_OFF, &status);
ASSERT(U_SUCCESS(status));
ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, UCOL_OFF, &status);
ASSERT(U_SUCCESS(status));
ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, UCOL_DEFAULT, &status);
ASSERT(U_SUCCESS(status));
ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
ASSERT(U_SUCCESS(status));
if (!canDoASCIIUCADUCETComparisonWithUCollator(*collator))
continue;
int32_t length = ucol_getReorderCodes(collator.get(), nullptr, 0, &status);
ASSERT(U_SUCCESS(status));
ASSERT(!length);
auto ensureNotIncludingASCII = [&](USet& set) {
Vector<UChar, 32> buffer;
for (int32_t index = 0, count = uset_getItemCount(&set); index < count; ++index) {
UChar32 start = 0;
UChar32 end = 0;
auto status = callBufferProducingFunction(uset_getItem, &set, index, &start, &end, buffer);
ASSERT(U_SUCCESS(status));
if (buffer.isEmpty()) {
if (isASCII(start)) {
dataLogLn("BAD ", locale, " including ASCII tailored characters");
CRASH();
}
} else {
if (StringView(buffer.data(), buffer.size()).isAllASCII()) {
dataLogLn("BAD ", locale, " ", String(buffer.data(), buffer.size()), " including ASCII tailored characters");
CRASH();
}
}
}
};
auto contractions = std::unique_ptr<USet, ICUDeleter<uset_close>>(uset_openEmpty());
auto expansions = std::unique_ptr<USet, ICUDeleter<uset_close>>(uset_openEmpty());
ucol_getContractionsAndExpansions(collator.get(), contractions.get(), expansions.get(), true, &status);
ASSERT(U_SUCCESS(status));
ensureNotIncludingASCII(*contractions);
ensureNotIncludingASCII(*expansions);
auto tailored = std::unique_ptr<USet, ICUDeleter<uset_close>>(ucol_getTailoredSet(collator.get(), &status));
ensureNotIncludingASCII(*tailored);
dataLogLnIf(IntlCollatorInternal::verbose, "LOCALE ", locale);
ASSERT(checkASCIIOrderingWithDUCET(locale, *collator));
}
}
#endif
}