IntlCollator.cpp   [plain text]


/*
 * Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family)
 * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com)
 * Copyright (C) 2016-2020 Apple Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"
#include "IntlCollator.h"

#include "IntlObjectInlines.h"
#include "JSBoundFunction.h"
#include "JSCInlines.h"
#include "ObjectConstructor.h"
#include <wtf/HexNumber.h>

namespace JSC {

const ClassInfo IntlCollator::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) };

namespace IntlCollatorInternal {
constexpr bool verbose = false;
}

IntlCollator* IntlCollator::create(VM& vm, Structure* structure)
{
    IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure);
    format->finishCreation(vm);
    return format;
}

Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
{
    return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
}

IntlCollator::IntlCollator(VM& vm, Structure* structure)
    : Base(vm, structure)
{
}

void IntlCollator::finishCreation(VM& vm)
{
    Base::finishCreation(vm);
    ASSERT(inherits(vm, info()));
}

void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor)
{
    IntlCollator* thisObject = jsCast<IntlCollator*>(cell);
    ASSERT_GC_OBJECT_INHERITS(thisObject, info());

    Base::visitChildren(thisObject, visitor);

    visitor.append(thisObject->m_boundCompare);
}

Vector<String> IntlCollator::sortLocaleData(const String& locale, RelevantExtensionKey key)
{
    // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
    Vector<String> keyLocaleData;
    switch (key) {
    case RelevantExtensionKey::Co: {
        // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
        keyLocaleData.append({ });

        UErrorCode status = U_ZERO_ERROR;
        auto enumeration = std::unique_ptr<UEnumeration, ICUDeleter<uenum_close>>(ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status));
        if (U_SUCCESS(status)) {
            const char* collation;
            while ((collation = uenum_next(enumeration.get(), nullptr, &status)) && U_SUCCESS(status)) {
                // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array."
                if (!strcmp(collation, "standard") || !strcmp(collation, "search"))
                    continue;

                // Map keyword values to BCP 47 equivalents.
                if (!strcmp(collation, "dictionary"))
                    keyLocaleData.append("dict"_s);
                else if (!strcmp(collation, "gb2312han"))
                    keyLocaleData.append("gb2312"_s);
                else if (!strcmp(collation, "phonebook"))
                    keyLocaleData.append("phonebk"_s);
                else if (!strcmp(collation, "traditional"))
                    keyLocaleData.append("trad"_s);
                else
                    keyLocaleData.append(collation);
            }
        }
        break;
    }
    case RelevantExtensionKey::Kf:
        keyLocaleData.reserveInitialCapacity(3);
        keyLocaleData.uncheckedAppend("false"_s);
        keyLocaleData.uncheckedAppend("lower"_s);
        keyLocaleData.uncheckedAppend("upper"_s);
        break;
    case RelevantExtensionKey::Kn:
        keyLocaleData.reserveInitialCapacity(2);
        keyLocaleData.uncheckedAppend("false"_s);
        keyLocaleData.uncheckedAppend("true"_s);
        break;
    default:
        ASSERT_NOT_REACHED();
    }
    return keyLocaleData;
}

Vector<String> IntlCollator::searchLocaleData(const String&, RelevantExtensionKey key)
{
    // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
    Vector<String> keyLocaleData;
    switch (key) {
    case RelevantExtensionKey::Co:
        // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
        keyLocaleData.reserveInitialCapacity(1);
        keyLocaleData.append({ });
        break;
    case RelevantExtensionKey::Kf:
        keyLocaleData.reserveInitialCapacity(3);
        keyLocaleData.uncheckedAppend("false"_s);
        keyLocaleData.uncheckedAppend("lower"_s);
        keyLocaleData.uncheckedAppend("upper"_s);
        break;
    case RelevantExtensionKey::Kn:
        keyLocaleData.reserveInitialCapacity(2);
        keyLocaleData.uncheckedAppend("false"_s);
        keyLocaleData.uncheckedAppend("true"_s);
        break;
    default:
        ASSERT_NOT_REACHED();
    }
    return keyLocaleData;
}

// https://tc39.github.io/ecma402/#sec-initializecollator
void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue)
{
    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    auto requestedLocales = canonicalizeLocaleList(globalObject, locales);
    RETURN_IF_EXCEPTION(scope, void());

    JSValue options = optionsValue;
    if (!optionsValue.isUndefined()) {
        options = optionsValue.toObject(globalObject);
        RETURN_IF_EXCEPTION(scope, void());
    }

    m_usage = intlOption<Usage>(globalObject, options, vm.propertyNames->usage, { { "sort"_s, Usage::Sort }, { "search"_s, Usage::Search } }, "usage must be either \"sort\" or \"search\""_s, Usage::Sort);
    RETURN_IF_EXCEPTION(scope, void());

    auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData;

    ResolveLocaleOptions localeOptions;

    LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit);
    RETURN_IF_EXCEPTION(scope, void());

    {
        String collation = intlStringOption(globalObject, options, vm.propertyNames->collation, { }, nullptr, nullptr);
        RETURN_IF_EXCEPTION(scope, void());
        if (!collation.isNull()) {
            if (!isUnicodeLocaleIdentifierType(collation)) {
                throwRangeError(globalObject, scope, "collation is not a well-formed collation value"_s);
                return;
            }
            localeOptions[static_cast<unsigned>(RelevantExtensionKey::Co)] = WTFMove(collation);
        }
    }

    TriState numeric = intlBooleanOption(globalObject, options, vm.propertyNames->numeric);
    RETURN_IF_EXCEPTION(scope, void());
    if (numeric != TriState::Indeterminate)
        localeOptions[static_cast<unsigned>(RelevantExtensionKey::Kn)] = String(numeric == TriState::True ? "true"_s : "false"_s);

    String caseFirstOption = intlStringOption(globalObject, options, vm.propertyNames->caseFirst, { "upper", "lower", "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"", nullptr);
    RETURN_IF_EXCEPTION(scope, void());
    if (!caseFirstOption.isNull())
        localeOptions[static_cast<unsigned>(RelevantExtensionKey::Kf)] = caseFirstOption;

    auto& availableLocales = intlCollatorAvailableLocales();
    auto resolved = resolveLocale(globalObject, availableLocales, requestedLocales, localeMatcher, localeOptions, { RelevantExtensionKey::Co, RelevantExtensionKey::Kf, RelevantExtensionKey::Kn }, localeData);

    m_locale = resolved.locale;
    if (m_locale.isEmpty()) {
        throwTypeError(globalObject, scope, "failed to initialize Collator due to invalid locale"_s);
        return;
    }

    const String& collation = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Co)];
    m_collation = collation.isNull() ? "default"_s : collation;
    m_numeric = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Kn)] == "true"_s;

    const String& caseFirstString = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Kf)];
    if (caseFirstString == "lower")
        m_caseFirst = CaseFirst::Lower;
    else if (caseFirstString == "upper")
        m_caseFirst = CaseFirst::Upper;
    else
        m_caseFirst = CaseFirst::False;

    m_sensitivity = intlOption<Sensitivity>(globalObject, options, vm.propertyNames->sensitivity, { { "base"_s, Sensitivity::Base }, { "accent"_s, Sensitivity::Accent }, { "case"_s, Sensitivity::Case }, { "variant"_s, Sensitivity::Variant } }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\""_s, Sensitivity::Variant);
    RETURN_IF_EXCEPTION(scope, void());

    TriState ignorePunctuation = intlBooleanOption(globalObject, options, vm.propertyNames->ignorePunctuation);
    RETURN_IF_EXCEPTION(scope, void());
    m_ignorePunctuation = (ignorePunctuation == TriState::True);

    // UCollator does not offer an option to configure "usage" via ucol_setAttribute. So we need to pass this option via locale.
    CString dataLocaleWithExtensions;
    switch (m_usage) {
    case Usage::Sort:
        if (collation.isNull())
            dataLocaleWithExtensions = resolved.dataLocale.utf8();
        else
            dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-", m_collation).utf8();
        break;
    case Usage::Search:
        // searchLocaleData filters out "co" unicode extension. However, we need to pass "co" to ICU when Usage::Search is specified.
        // So we need to pass "co" unicode extension through locale. Since the other relevant extensions are handled via ucol_setAttribute,
        // we can just use dataLocale
        // Since searchLocaleData filters out "co" unicode extension, "collation" option is just ignored.
        dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-search").utf8();
        break;
    }
    dataLogLnIf(IntlCollatorInternal::verbose, "locale:(", resolved.locale, "),dataLocaleWithExtensions:(", dataLocaleWithExtensions, ")");

    UErrorCode status = U_ZERO_ERROR;
    m_collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(dataLocaleWithExtensions.data(), &status));
    if (U_FAILURE(status)) {
        throwTypeError(globalObject, scope, "failed to initialize Collator"_s);
        return;
    }

    UColAttributeValue strength = UCOL_PRIMARY;
    UColAttributeValue caseLevel = UCOL_OFF;
    UColAttributeValue caseFirst = UCOL_OFF;
    switch (m_sensitivity) {
    case Sensitivity::Base:
        break;
    case Sensitivity::Accent:
        strength = UCOL_SECONDARY;
        break;
    case Sensitivity::Case:
        caseLevel = UCOL_ON;
        break;
    case Sensitivity::Variant:
        strength = UCOL_TERTIARY;
        break;
    }
    switch (m_caseFirst) {
    case CaseFirst::False:
        break;
    case CaseFirst::Lower:
        caseFirst = UCOL_LOWER_FIRST;
        break;
    case CaseFirst::Upper:
        caseFirst = UCOL_UPPER_FIRST;
        break;
    }

    // Keep in sync with canDoASCIIUCADUCETComparisonSlow about used attributes.
    ucol_setAttribute(m_collator.get(), UCOL_STRENGTH, strength, &status);
    ucol_setAttribute(m_collator.get(), UCOL_CASE_LEVEL, caseLevel, &status);
    ucol_setAttribute(m_collator.get(), UCOL_CASE_FIRST, caseFirst, &status);
    ucol_setAttribute(m_collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status);

    // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be
    // ignored. There is currently no way to ignore only punctuation.
    ucol_setAttribute(m_collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status);

    // "The method is required to return 0 when comparing Strings that are considered canonically
    // equivalent by the Unicode standard."
    ucol_setAttribute(m_collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    ASSERT(U_SUCCESS(status));
}

// https://tc39.es/ecma402/#sec-collator-comparestrings
JSValue IntlCollator::compareStrings(JSGlobalObject* globalObject, StringView x, StringView y) const
{
    ASSERT(m_collator);

    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    UErrorCode status = U_ZERO_ERROR;
    UCollationResult result = ([&]() -> UCollationResult {
        if (x.isAllSpecialCharacters<canUseASCIIUCADUCETComparison>() && y.isAllSpecialCharacters<canUseASCIIUCADUCETComparison>()) {
            if (canDoASCIIUCADUCETComparison()) {
                if (x.is8Bit() && y.is8Bit())
                    return compareASCIIWithUCADUCET(x.characters8(), x.length(), y.characters8(), y.length());
                if (x.is8Bit())
                    return compareASCIIWithUCADUCET(x.characters8(), x.length(), y.characters16(), y.length());
                if (y.is8Bit())
                    return compareASCIIWithUCADUCET(x.characters16(), x.length(), y.characters8(), y.length());
                return compareASCIIWithUCADUCET(x.characters16(), x.length(), y.characters16(), y.length());
            }

            if (x.is8Bit() && y.is8Bit())
                return ucol_strcollUTF8(m_collator.get(), bitwise_cast<const char*>(x.characters8()), x.length(), bitwise_cast<const char*>(y.characters8()), y.length(), &status);
        }
        return ucol_strcoll(m_collator.get(), x.upconvertedCharacters(), x.length(), y.upconvertedCharacters(), y.length());
    }());
    if (U_FAILURE(status))
        return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s));
    return jsNumber(result);
}

ASCIILiteral IntlCollator::usageString(Usage usage)
{
    switch (usage) {
    case Usage::Sort:
        return "sort"_s;
    case Usage::Search:
        return "search"_s;
    }
    ASSERT_NOT_REACHED();
    return ASCIILiteral::null();
}

ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity)
{
    switch (sensitivity) {
    case Sensitivity::Base:
        return "base"_s;
    case Sensitivity::Accent:
        return "accent"_s;
    case Sensitivity::Case:
        return "case"_s;
    case Sensitivity::Variant:
        return "variant"_s;
    }
    ASSERT_NOT_REACHED();
    return ASCIILiteral::null();
}

ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst)
{
    switch (caseFirst) {
    case CaseFirst::False:
        return "false"_s;
    case CaseFirst::Lower:
        return "lower"_s;
    case CaseFirst::Upper:
        return "upper"_s;
    }
    ASSERT_NOT_REACHED();
    return ASCIILiteral::null();
}

// https://tc39.es/ecma402/#sec-intl.collator.prototype.resolvedoptions
JSObject* IntlCollator::resolvedOptions(JSGlobalObject* globalObject) const
{
    VM& vm = globalObject->vm();
    JSObject* options = constructEmptyObject(globalObject);
    options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale));
    options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(vm, usageString(m_usage)));
    options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(vm, sensitivityString(m_sensitivity)));
    options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation));
    options->putDirect(vm, vm.propertyNames->collation, jsString(vm, m_collation));
    options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric));
    options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(vm, caseFirstString(m_caseFirst)));
    return options;
}

void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format)
{
    m_boundCompare.set(vm, this, format);
}

static bool canDoASCIIUCADUCETComparisonWithUCollator(UCollator& collator)
{
    // Attributes are default ones unless we set. So, non-configured attributes are default ones.
    static constexpr std::pair<UColAttribute, UColAttributeValue> attributes[] = {
        { UCOL_FRENCH_COLLATION, UCOL_OFF },
        { UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE },
        { UCOL_STRENGTH, UCOL_TERTIARY },
        { UCOL_CASE_LEVEL, UCOL_OFF },
        { UCOL_CASE_FIRST, UCOL_OFF },
        { UCOL_NUMERIC_COLLATION, UCOL_OFF },
        // We do not check UCOL_NORMALIZATION_MODE status since FCD normalization does nothing for ASCII strings.
    };

    for (auto& pair : attributes) {
        UErrorCode status = U_ZERO_ERROR;
        auto result = ucol_getAttribute(&collator, pair.first, &status);
        ASSERT(U_SUCCESS(status));
        if (result != pair.second)
            return false;
    }

    // Check existence of tailoring rules. If they do not exist, collation algorithm is UCA DUCET.
    int32_t length = 0;
    ucol_getRules(&collator, &length);
    return !length;
}

bool IntlCollator::updateCanDoASCIIUCADUCETComparison() const
{
    // ICU uses the CLDR root collation order as a default starting point for ordering. (The CLDR root collation is based on the UCA DUCET.)
    // And customizes this root collation via rules.
    // The root collation is UCA DUCET and it is code-point comparison if the characters are all ASCII.
    // http://www.unicode.org/reports/tr10/
    ASSERT(m_collator);
    auto checkASCIIUCADUCETComparisonCompatibility = [&] {
        if (m_usage != Usage::Sort)
            return false;
        if (m_collation != "default"_s)
            return false;
        if (m_sensitivity != Sensitivity::Variant)
            return false;
        if (m_caseFirst != CaseFirst::False)
            return false;
        if (m_numeric)
            return false;
        if (m_ignorePunctuation)
            return false;
        return canDoASCIIUCADUCETComparisonWithUCollator(*m_collator);
    };
    bool result = checkASCIIUCADUCETComparisonCompatibility();
    m_canDoASCIIUCADUCETComparison = triState(result);
    return result;
}

#if ASSERT_ENABLED
void IntlCollator::checkICULocaleInvariants(const HashSet<String>& locales)
{
    for (auto& locale : locales) {
        auto checkASCIIOrderingWithDUCET = [](const String& locale, UCollator& collator) {
            bool allAreGood = true;
            for (unsigned x = 0; x < 128; ++x) {
                for (unsigned y = 0; y < 128; ++y) {
                    if (canUseASCIIUCADUCETComparison(x) && canUseASCIIUCADUCETComparison(y)) {
                        UErrorCode status = U_ZERO_ERROR;
                        UChar xstring[] = { static_cast<UChar>(x), 0 };
                        UChar ystring[] = { static_cast<UChar>(y), 0 };
                        auto resultICU = ucol_strcoll(&collator, xstring, 1, ystring, 1);
                        ASSERT(U_SUCCESS(status));
                        auto resultJSC = compareASCIIWithUCADUCET(xstring, 1, ystring, 1);
                        if (resultICU != resultJSC) {
                            dataLogLn("BAD ", locale, " ", makeString(hex(x)), "(", StringView(xstring, 1), ") <=> ", makeString(hex(y)), "(", StringView(ystring, 1), ") ICU:(", static_cast<int32_t>(resultICU), "),JSC:(", static_cast<int32_t>(resultJSC), ")");
                            allAreGood = false;
                        }
                    }
                }
            }
            return allAreGood;
        };

        UErrorCode status = U_ZERO_ERROR;
        auto collator = std::unique_ptr<UCollator, ICUDeleter<ucol_close>>(ucol_open(locale.ascii().data(), &status));

        ASSERT(U_SUCCESS(status));
        ucol_setAttribute(collator.get(), UCOL_STRENGTH, UCOL_TERTIARY, &status);
        ASSERT(U_SUCCESS(status));
        ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, UCOL_OFF, &status);
        ASSERT(U_SUCCESS(status));
        ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, UCOL_OFF, &status);
        ASSERT(U_SUCCESS(status));
        ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, UCOL_OFF, &status);
        ASSERT(U_SUCCESS(status));
        ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, UCOL_DEFAULT, &status);
        ASSERT(U_SUCCESS(status));
        ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
        ASSERT(U_SUCCESS(status));

        if (!canDoASCIIUCADUCETComparisonWithUCollator(*collator))
            continue;

        // This should not have reorder.
        int32_t length = ucol_getReorderCodes(collator.get(), nullptr, 0, &status);
        ASSERT(U_SUCCESS(status));
        ASSERT(!length);

        // Contractions and Expansions are defined as a rule. If there is no tailoring rule, then they should be UCA DUCET's default.

        auto ensureNotIncludingASCII = [&](USet& set) {
            Vector<UChar, 32> buffer;
            for (int32_t index = 0, count = uset_getItemCount(&set); index < count; ++index) {
                // start and end are inclusive.
                UChar32 start = 0;
                UChar32 end = 0;
                auto status = callBufferProducingFunction(uset_getItem, &set, index, &start, &end, buffer);
                ASSERT(U_SUCCESS(status));
                if (buffer.isEmpty()) {
                    if (isASCII(start)) {
                        dataLogLn("BAD ", locale, " including ASCII tailored characters");
                        CRASH();
                    }
                } else {
                    if (StringView(buffer.data(), buffer.size()).isAllASCII()) {
                        dataLogLn("BAD ", locale, " ", String(buffer.data(), buffer.size()), " including ASCII tailored characters");
                        CRASH();
                    }
                }
            }
        };

        auto contractions = std::unique_ptr<USet, ICUDeleter<uset_close>>(uset_openEmpty());
        auto expansions = std::unique_ptr<USet, ICUDeleter<uset_close>>(uset_openEmpty());
        ucol_getContractionsAndExpansions(collator.get(), contractions.get(), expansions.get(), true, &status);
        ASSERT(U_SUCCESS(status));

        ensureNotIncludingASCII(*contractions);
        ensureNotIncludingASCII(*expansions);

        // This locale should not have tailoring.
        auto tailored = std::unique_ptr<USet, ICUDeleter<uset_close>>(ucol_getTailoredSet(collator.get(), &status));
        ensureNotIncludingASCII(*tailored);

        dataLogLnIf(IntlCollatorInternal::verbose, "LOCALE ", locale);

        ASSERT(checkASCIIOrderingWithDUCET(locale, *collator));
    }
}
#endif

} // namespace JSC