IntlObject.cpp   [plain text]


/*
 * Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family)
 * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com)
 * Copyright (C) 2016-2020 Apple Inc. All rights reserved.
 * Copyright (C) 2020 Sony Interactive Entertainment Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"
#include "IntlObject.h"

#include "Error.h"
#include "FunctionPrototype.h"
#include "IntlCollator.h"
#include "IntlCollatorConstructor.h"
#include "IntlCollatorPrototype.h"
#include "IntlDateTimeFormatConstructor.h"
#include "IntlDateTimeFormatPrototype.h"
#include "IntlDisplayNames.h"
#include "IntlDisplayNamesConstructor.h"
#include "IntlDisplayNamesPrototype.h"
#include "IntlLocale.h"
#include "IntlLocaleConstructor.h"
#include "IntlLocalePrototype.h"
#include "IntlNumberFormatConstructor.h"
#include "IntlNumberFormatPrototype.h"
#include "IntlObjectInlines.h"
#include "IntlPluralRulesConstructor.h"
#include "IntlPluralRulesPrototype.h"
#include "IntlRelativeTimeFormatConstructor.h"
#include "IntlRelativeTimeFormatPrototype.h"
#include "JSCInlines.h"
#include "Options.h"
#include <unicode/ucol.h>
#include <unicode/ufieldpositer.h>
#include <unicode/uloc.h>
#include <unicode/unumsys.h>
#include <wtf/Assertions.h>
#include <wtf/Language.h>
#include <wtf/NeverDestroyed.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/text/StringImpl.h>
#include <wtf/unicode/icu/ICUHelpers.h>

namespace JSC {

STATIC_ASSERT_IS_TRIVIALLY_DESTRUCTIBLE(IntlObject);

static EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(JSGlobalObject*, CallFrame*);

static JSValue createCollatorConstructor(VM& vm, JSObject* object)
{
    IntlObject* intlObject = jsCast<IntlObject*>(object);
    JSGlobalObject* globalObject = intlObject->globalObject(vm);
    return IntlCollatorConstructor::create(vm, IntlCollatorConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlCollatorPrototype*>(globalObject->collatorStructure()->storedPrototypeObject()));
}

static JSValue createDateTimeFormatConstructor(VM& vm, JSObject* object)
{
    IntlObject* intlObject = jsCast<IntlObject*>(object);
    JSGlobalObject* globalObject = intlObject->globalObject(vm);
    return IntlDateTimeFormatConstructor::create(vm, IntlDateTimeFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlDateTimeFormatPrototype*>(globalObject->dateTimeFormatStructure()->storedPrototypeObject()));
}

static JSValue createDisplayNamesConstructor(VM& vm, JSObject* object)
{
    IntlObject* intlObject = jsCast<IntlObject*>(object);
    JSGlobalObject* globalObject = intlObject->globalObject(vm);
    return IntlDisplayNamesConstructor::create(vm, IntlDisplayNamesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlDisplayNamesPrototype*>(globalObject->displayNamesStructure()->storedPrototypeObject()));
}

static JSValue createLocaleConstructor(VM& vm, JSObject* object)
{
    IntlObject* intlObject = jsCast<IntlObject*>(object);
    JSGlobalObject* globalObject = intlObject->globalObject(vm);
    return IntlLocaleConstructor::create(vm, IntlLocaleConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlLocalePrototype*>(globalObject->localeStructure()->storedPrototypeObject()));
}

static JSValue createNumberFormatConstructor(VM& vm, JSObject* object)
{
    IntlObject* intlObject = jsCast<IntlObject*>(object);
    JSGlobalObject* globalObject = intlObject->globalObject(vm);
    return IntlNumberFormatConstructor::create(vm, IntlNumberFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlNumberFormatPrototype*>(globalObject->numberFormatStructure()->storedPrototypeObject()));
}

static JSValue createPluralRulesConstructor(VM& vm, JSObject* object)
{
    IntlObject* intlObject = jsCast<IntlObject*>(object);
    JSGlobalObject* globalObject = intlObject->globalObject(vm);
    return IntlPluralRulesConstructor::create(vm, IntlPluralRulesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlPluralRulesPrototype*>(globalObject->pluralRulesStructure()->storedPrototypeObject()));
}

static JSValue createRelativeTimeFormatConstructor(VM& vm, JSObject* object)
{
    IntlObject* intlObject = jsCast<IntlObject*>(object);
    JSGlobalObject* globalObject = intlObject->globalObject(vm);
    return IntlRelativeTimeFormatConstructor::create(vm, IntlRelativeTimeFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlRelativeTimeFormatPrototype*>(globalObject->relativeTimeFormatStructure()->storedPrototypeObject()));
}

}

#include "IntlObject.lut.h"

namespace JSC {

/* Source for IntlObject.lut.h
@begin intlObjectTable
  getCanonicalLocales   intlObjectFuncGetCanonicalLocales            DontEnum|Function 1
  Collator              createCollatorConstructor                    DontEnum|PropertyCallback
  DateTimeFormat        createDateTimeFormatConstructor              DontEnum|PropertyCallback
  Locale                createLocaleConstructor                      DontEnum|PropertyCallback
  NumberFormat          createNumberFormatConstructor                DontEnum|PropertyCallback
  PluralRules           createPluralRulesConstructor                 DontEnum|PropertyCallback
  RelativeTimeFormat    createRelativeTimeFormatConstructor          DontEnum|PropertyCallback
@end
*/

struct MatcherResult {
    String locale;
    String extension;
    size_t extensionIndex { 0 };
};

const ClassInfo IntlObject::s_info = { "Intl", &Base::s_info, &intlObjectTable, nullptr, CREATE_METHOD_TABLE(IntlObject) };

void UFieldPositionIteratorDeleter::operator()(UFieldPositionIterator* iterator) const
{
    if (iterator)
        ufieldpositer_close(iterator);
}

IntlObject::IntlObject(VM& vm, Structure* structure)
    : Base(vm, structure)
{
}

IntlObject* IntlObject::create(VM& vm, JSGlobalObject* globalObject, Structure* structure)
{
    IntlObject* object = new (NotNull, allocateCell<IntlObject>(vm.heap)) IntlObject(vm, structure);
    object->finishCreation(vm, globalObject);
    return object;
}

void IntlObject::finishCreation(VM& vm, JSGlobalObject*)
{
    Base::finishCreation(vm);
    ASSERT(inherits(vm, info()));
    JSC_TO_STRING_TAG_WITHOUT_TRANSITION();
#if HAVE(ICU_U_LOCALE_DISPLAY_NAMES)
    if (Options::useIntlDisplayNames())
        putDirectWithoutTransition(vm, vm.propertyNames->DisplayNames, createDisplayNamesConstructor(vm, this), static_cast<unsigned>(PropertyAttribute::DontEnum));
#else
    UNUSED_PARAM(createDisplayNamesConstructor);
#endif
}

Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
{
    return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
}

Vector<char, 32> localeIDBufferForLanguageTag(const CString& tag)
{
    if (!tag.length())
        return { };

    UErrorCode status = U_ZERO_ERROR;
    Vector<char, 32> buffer(32);
    int32_t parsedLength;
    auto bufferLength = uloc_forLanguageTag(tag.data(), buffer.data(), buffer.size(), &parsedLength, &status);
    if (needsToGrowToProduceCString(status)) {
        // Before ICU 64, there's a chance uloc_forLanguageTag will "buffer overflow" while requesting a *smaller* size.
        buffer.resize(bufferLength + 1);
        status = U_ZERO_ERROR;
        uloc_forLanguageTag(tag.data(), buffer.data(), bufferLength + 1, &parsedLength, &status);
    }
    if (U_FAILURE(status) || parsedLength != static_cast<int32_t>(tag.length()))
        return { };

    ASSERT(buffer.contains('\0'));
    return buffer;
}

String languageTagForLocaleID(const char* localeID, bool isImmortal)
{
    Vector<char, 32> buffer;
    auto status = callBufferProducingFunction(uloc_toLanguageTag, localeID, buffer, false);
    if (U_FAILURE(status))
        return String();

    // This is used to store into static variables that may be shared across JSC execution threads.
    // This must be immortal to make concurrent ref/deref safe.
    if (isImmortal)
        return StringImpl::createStaticStringImpl(buffer.data(), buffer.size());

    return String(buffer.data(), buffer.size());
}

// Ensure we have xx-ZZ whenever we have xx-Yyyy-ZZ.
static void addScriptlessLocaleIfNeeded(HashSet<String>& availableLocales, StringView locale)
{
    if (locale.length() < 10)
        return;

    Vector<StringView, 3> subtags;
    for (auto subtag : locale.split('-')) {
        if (subtags.size() == 3)
            return;
        subtags.append(subtag);
    }

    if (subtags.size() != 3 || subtags[1].length() != 4 || subtags[2].length() > 3)
        return;

    Vector<char, 12> buffer;
    ASSERT(subtags[0].is8Bit() && subtags[0].isAllASCII());
    buffer.append(reinterpret_cast<const char*>(subtags[0].characters8()), subtags[0].length());
    buffer.append('-');
    ASSERT(subtags[2].is8Bit() && subtags[2].isAllASCII());
    buffer.append(reinterpret_cast<const char*>(subtags[2].characters8()), subtags[2].length());

    availableLocales.add(StringImpl::createStaticStringImpl(buffer.data(), buffer.size()));
}

const HashSet<String>& intlAvailableLocales()
{
    static LazyNeverDestroyed<HashSet<String>> availableLocales;
    static std::once_flag initializeOnce;
    std::call_once(initializeOnce, [&] {
        availableLocales.construct();
        ASSERT(availableLocales->isEmpty());
        constexpr bool isImmortal = true;
        int32_t count = uloc_countAvailable();
        for (int32_t i = 0; i < count; ++i) {
            String locale = languageTagForLocaleID(uloc_getAvailable(i), isImmortal);
            if (locale.isEmpty())
                continue;
            availableLocales->add(locale);
            addScriptlessLocaleIfNeeded(availableLocales.get(), locale);
        }
    });
    return availableLocales;
}

// This table is total ordering indexes for ASCII characters in UCA DUCET.
// It is generated from CLDR common/uca/allkeys_DUCET.txt.
//
// Rough overview of UCA is the followings.
// https://unicode.org/reports/tr10/#Main_Algorithm
//
//     1. Normalize each input string.
//
//     2. Produce an array of collation elements for each string.
//
//         There are 3 (or 4) levels. And each character has 4 weights. We concatenate them into one sequence called collation elements.
//         For example, "c" has `[.0706.0020.0002]`. And "ca◌́b" becomes `[.0706.0020.0002], [.06D9.0020.0002], [.0000.0021.0002], [.06EE.0020.0002]`
//         We need to consider variable weighting (https://unicode.org/reports/tr10/#Variable_Weighting), but if it is Non-ignorable, we can just use
//         the collation elements defined in the table.
//
//     3. Produce a sort key for each string from the arrays of collation elements.
//
//         Generate sort key from collation elements. From lower levels to higher levels, we collect weights. But 0000 weight is skipped.
//         Between levels, we insert 0000 weight if the boundary.
//
//             string: "ca◌́b"
//             collation elements: `[.0706.0020.0002], [.06D9.0020.0002], [.0000.0021.0002], [.06EE.0020.0002]`
//             sort key: `0706 06D9 06EE 0000 0020 0020 0021 0020 0000 0002 0002 0002 0002`
//                                        ^                        ^
//                                        level boundary           level boundary
//
//     4. Compare the two sort keys with a binary comparison operation.
//
// Key observations are the followings.
//
//     1. If an input is an ASCII string, UCA step-1 normalization does nothing.
//     2. If an input is an ASCII string, non-starters (https://unicode.org/reports/tr10/#UTS10-D33) does not exist. So no special handling in UCA step-2 is required.
//     3. If an input is an ASCII string, no multiple character collation elements exist. So no special handling in UCA step-2 is required. For example, "L·" is not ASCII.
//     4. UCA step-3 handles 0000 weighted characters specially. And ASCII contains these characters. But 0000 elements are used only for rare control characters.
//        We can ignore this special handling if ASCII strings do not include control characters.
//     5. Except 0000 cases, all characters' level-1 weights are different. And level-2 weights are always 0020, which is lower than any level-1 weights.
//        This means that binary comparison in UCA step-4 do not need to check level 2~ weights.
//
//  Based on the above observation, our fast path handles ASCII strings excluding control characters. The following weight is recomputed weights from level-1 weights.
const uint8_t ducetWeights[128] = {
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 1, 2, 3, 4, 5, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    6, 12, 16, 28, 38, 29, 27, 15,
    17, 18, 24, 32, 9, 8, 14, 25,
    39, 40, 41, 42, 43, 44, 45, 46,
    47, 48, 11, 10, 33, 34, 35, 13,
    23, 50, 52, 54, 56, 58, 60, 62,
    64, 66, 68, 70, 72, 74, 76, 78,
    80, 82, 84, 86, 88, 90, 92, 94,
    96, 98, 100, 19, 26, 20, 31, 7,
    30, 49, 51, 53, 55, 57, 59, 61,
    63, 65, 67, 69, 71, 73, 75, 77,
    79, 81, 83, 85, 87, 89, 91, 93,
    95, 97, 99, 21, 36, 22, 37, 0,
};

const HashSet<String>& intlCollatorAvailableLocales()
{
    static LazyNeverDestroyed<HashSet<String>> availableLocales;
    static std::once_flag initializeOnce;
    std::call_once(initializeOnce, [&] {
        availableLocales.construct();
        ASSERT(availableLocales->isEmpty());
        constexpr bool isImmortal = true;
        int32_t count = ucol_countAvailable();
        for (int32_t i = 0; i < count; ++i) {
            String locale = languageTagForLocaleID(ucol_getAvailable(i), isImmortal);
            if (locale.isEmpty())
                continue;
            availableLocales->add(locale);
            addScriptlessLocaleIfNeeded(availableLocales.get(), locale);
        }
        IntlCollator::checkICULocaleInvariants(availableLocales.get());
    });
    return availableLocales;
}

// https://tc39.es/ecma402/#sec-getoption
TriState intlBooleanOption(JSGlobalObject* globalObject, JSValue options, PropertyName property)
{
    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    if (options.isUndefined())
        return TriState::Indeterminate;

    JSObject* opts = options.toObject(globalObject);
    RETURN_IF_EXCEPTION(scope, TriState::Indeterminate);

    JSValue value = opts->get(globalObject, property);
    RETURN_IF_EXCEPTION(scope, TriState::Indeterminate);

    if (value.isUndefined())
        return TriState::Indeterminate;

    return triState(value.toBoolean(globalObject));
}

String intlStringOption(JSGlobalObject* globalObject, JSValue options, PropertyName property, std::initializer_list<const char*> values, const char* notFound, const char* fallback)
{
    // GetOption (options, property, type="string", values, fallback)
    // https://tc39.github.io/ecma402/#sec-getoption

    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    if (options.isUndefined())
        return fallback;

    JSObject* opts = options.toObject(globalObject);
    RETURN_IF_EXCEPTION(scope, String());

    JSValue value = opts->get(globalObject, property);
    RETURN_IF_EXCEPTION(scope, String());

    if (!value.isUndefined()) {
        String stringValue = value.toWTFString(globalObject);
        RETURN_IF_EXCEPTION(scope, String());

        if (values.size() && std::find(values.begin(), values.end(), stringValue) == values.end()) {
            throwException(globalObject, scope, createRangeError(globalObject, notFound));
            return { };
        }
        return stringValue;
    }

    return fallback;
}

unsigned intlNumberOption(JSGlobalObject* globalObject, JSValue options, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
{
    // GetNumberOption (options, property, minimum, maximum, fallback)
    // https://tc39.github.io/ecma402/#sec-getnumberoption

    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    if (options.isUndefined())
        return fallback;

    JSObject* opts = options.toObject(globalObject);
    RETURN_IF_EXCEPTION(scope, 0);

    JSValue value = opts->get(globalObject, property);
    RETURN_IF_EXCEPTION(scope, 0);

    RELEASE_AND_RETURN(scope, intlDefaultNumberOption(globalObject, value, property, minimum, maximum, fallback));
}

unsigned intlDefaultNumberOption(JSGlobalObject* globalObject, JSValue value, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
{
    // DefaultNumberOption (value, minimum, maximum, fallback)
    // https://tc39.github.io/ecma402/#sec-defaultnumberoption

    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    if (!value.isUndefined()) {
        double doubleValue = value.toNumber(globalObject);
        RETURN_IF_EXCEPTION(scope, 0);

        if (!(doubleValue >= minimum && doubleValue <= maximum)) {
            throwException(globalObject, scope, createRangeError(globalObject, *property.publicName() + " is out of range"));
            return 0;
        }
        return static_cast<unsigned>(doubleValue);
    }
    return fallback;
}

// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier
bool isUnicodeLocaleIdentifierType(StringView string)
{
    ASSERT(!string.isNull());

    for (auto part : string.splitAllowingEmptyEntries('-')) {
        auto length = part.length();
        if (length < 3 || length > 8)
            return false;

        for (auto character : part.codeUnits()) {
            if (!isASCIIAlphanumeric(character))
                return false;
        }
    }

    return true;
}

// https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
// https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid
static String canonicalizeLanguageTag(const CString& tag)
{
    auto buffer = localeIDBufferForLanguageTag(tag);
    if (buffer.isEmpty())
        return String();

    return languageTagForLocaleID(buffer.data());
}

Vector<String> canonicalizeLocaleList(JSGlobalObject* globalObject, JSValue locales)
{
    // CanonicalizeLocaleList (locales)
    // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist

    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    Vector<String> seen;

    if (locales.isUndefined())
        return seen;

    JSObject* localesObject;
    if (locales.isString() || locales.inherits<IntlLocale>(vm)) {
        JSArray* localesArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous));
        if (!localesArray) {
            throwOutOfMemoryError(globalObject, scope);
            return { };
        }
        localesArray->push(globalObject, locales);
        RETURN_IF_EXCEPTION(scope, Vector<String>());

        localesObject = localesArray;
    } else {
        localesObject = locales.toObject(globalObject);
        RETURN_IF_EXCEPTION(scope, Vector<String>());
    }

    // 6. Let len be ToLength(Get(O, "length")).
    JSValue lengthProperty = localesObject->get(globalObject, vm.propertyNames->length);
    RETURN_IF_EXCEPTION(scope, Vector<String>());

    uint64_t length = static_cast<uint64_t>(lengthProperty.toLength(globalObject));
    RETURN_IF_EXCEPTION(scope, Vector<String>());

    HashSet<String> seenSet;
    for (uint64_t k = 0; k < length; ++k) {
        bool kPresent = localesObject->hasProperty(globalObject, k);
        RETURN_IF_EXCEPTION(scope, Vector<String>());

        if (kPresent) {
            JSValue kValue = localesObject->get(globalObject, k);
            RETURN_IF_EXCEPTION(scope, Vector<String>());

            if (!kValue.isString() && !kValue.isObject()) {
                throwTypeError(globalObject, scope, "locale value must be a string or object"_s);
                return { };
            }

            Expected<CString, UTF8ConversionError> rawTag;
            if (kValue.inherits<IntlLocale>(vm))
                rawTag = jsCast<IntlLocale*>(kValue)->toString().tryGetUtf8();
            else {
                JSString* tag = kValue.toString(globalObject);
                RETURN_IF_EXCEPTION(scope, Vector<String>());

                auto tagValue = tag->value(globalObject);
                RETURN_IF_EXCEPTION(scope, Vector<String>());

                rawTag = tagValue.tryGetUtf8();
            }
            if (!rawTag) {
                if (rawTag.error() == UTF8ConversionError::OutOfMemory)
                    throwOutOfMemoryError(globalObject, scope);
                return { };
            }

            String canonicalizedTag = canonicalizeLanguageTag(rawTag.value());
            if (canonicalizedTag.isNull()) {
                String errorMessage = tryMakeString("invalid language tag: ", rawTag->data());
                if (UNLIKELY(!errorMessage)) {
                    throwException(globalObject, scope, createOutOfMemoryError(globalObject));
                    return { };
                }
                throwException(globalObject, scope, createRangeError(globalObject, errorMessage));
                return { };
            }

            if (seenSet.add(canonicalizedTag).isNewEntry)
                seen.append(canonicalizedTag);
        }
    }

    return seen;
}

String bestAvailableLocale(const HashSet<String>& availableLocales, const String& locale)
{
    return bestAvailableLocale(locale, [&](const String& candidate) {
        return availableLocales.contains(candidate);
    });
}

String defaultLocale(JSGlobalObject* globalObject)
{
    // DefaultLocale ()
    // https://tc39.github.io/ecma402/#sec-defaultlocale

    // WebCore's global objects will have their own ideas of how to determine the language. It may
    // be determined by WebCore-specific logic like some WK settings. Usually this will return the
    // same thing as userPreferredLanguages()[0].
    if (auto defaultLanguage = globalObject->globalObjectMethodTable()->defaultLanguage) {
        String locale = canonicalizeLanguageTag(defaultLanguage().utf8());
        if (!locale.isEmpty())
            return locale;
    }

    Vector<String> languages = userPreferredLanguages();
    for (const auto& language : languages) {
        String locale = canonicalizeLanguageTag(language.utf8());
        if (!locale.isEmpty())
            return locale;
    }

    // If all else fails, ask ICU. It will probably say something bogus like en_us even if the user
    // has configured some other language, but being wrong is better than crashing.
    static LazyNeverDestroyed<String> icuDefaultLocalString;
    static std::once_flag initializeOnce;
    std::call_once(initializeOnce, [&] {
        constexpr bool isImmortal = true;
        icuDefaultLocalString.construct(languageTagForLocaleID(uloc_getDefault(), isImmortal));
    });
    if (!icuDefaultLocalString->isEmpty())
        return icuDefaultLocalString.get();

    return "en"_s;
}

String removeUnicodeLocaleExtension(const String& locale)
{
    Vector<String> parts = locale.split('-');
    StringBuilder builder;
    size_t partsSize = parts.size();
    bool atPrivate = false;
    if (partsSize > 0)
        builder.append(parts[0]);
    for (size_t p = 1; p < partsSize; ++p) {
        if (parts[p] == "x")
            atPrivate = true;
        if (!atPrivate && parts[p] == "u" && p + 1 < partsSize) {
            // Skip the u- and anything that follows until another singleton.
            // While the next part is part of the unicode extension, skip it.
            while (p + 1 < partsSize && parts[p + 1].length() > 1)
                ++p;
        } else {
            builder.append('-', parts[p]);
        }
    }
    return builder.toString();
}

static MatcherResult lookupMatcher(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
{
    // LookupMatcher (availableLocales, requestedLocales)
    // https://tc39.github.io/ecma402/#sec-lookupmatcher

    String locale;
    String noExtensionsLocale;
    String availableLocale;
    for (size_t i = 0; i < requestedLocales.size() && availableLocale.isNull(); ++i) {
        locale = requestedLocales[i];
        noExtensionsLocale = removeUnicodeLocaleExtension(locale);
        availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
    }

    MatcherResult result;
    if (!availableLocale.isEmpty()) {
        result.locale = availableLocale;
        if (locale != noExtensionsLocale) {
            size_t extensionIndex = locale.find("-u-");
            RELEASE_ASSERT(extensionIndex != notFound);

            size_t extensionLength = locale.length() - extensionIndex;
            size_t end = extensionIndex + 3;
            while (end < locale.length()) {
                end = locale.find('-', end);
                if (end == notFound)
                    break;
                if (end + 2 < locale.length() && locale[end + 2] == '-') {
                    extensionLength = end - extensionIndex;
                    break;
                }
                end++;
            }
            result.extension = locale.substring(extensionIndex, extensionLength);
            result.extensionIndex = extensionIndex;
        }
    } else
        result.locale = defaultLocale(globalObject);
    return result;
}

static MatcherResult bestFitMatcher(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
{
    // BestFitMatcher (availableLocales, requestedLocales)
    // https://tc39.github.io/ecma402/#sec-bestfitmatcher

    // FIXME: Implement something better than lookup.
    return lookupMatcher(globalObject, availableLocales, requestedLocales);
}

static void unicodeExtensionSubTags(const String& extension, Vector<String>& subtags)
{
    // UnicodeExtensionSubtags (extension)
    // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags

    auto extensionLength = extension.length();
    if (extensionLength < 3)
        return;

    size_t subtagStart = 3; // Skip initial -u-.
    size_t valueStart = 3;
    bool isLeading = true;
    for (size_t index = subtagStart; index < extensionLength; ++index) {
        if (extension[index] == '-') {
            if (index - subtagStart == 2) {
                // Tag is a key, first append prior key's value if there is one.
                if (subtagStart - valueStart > 1)
                    subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
                subtags.append(extension.substring(subtagStart, index - subtagStart));
                valueStart = index + 1;
                isLeading = false;
            } else if (isLeading) {
                // Leading subtags before first key.
                subtags.append(extension.substring(subtagStart, index - subtagStart));
                valueStart = index + 1;
            }
            subtagStart = index + 1;
        }
    }
    if (extensionLength - subtagStart == 2) {
        // Trailing an extension key, first append prior key's value if there is one.
        if (subtagStart - valueStart > 1)
            subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
        valueStart = subtagStart;
    }
    // Append final key's value.
    subtags.append(extension.substring(valueStart, extensionLength - valueStart));
}

constexpr ASCIILiteral relevantExtensionKeyString(RelevantExtensionKey key)
{
    switch (key) {
#define JSC_RETURN_INTL_RELEVANT_EXTENSION_KEYS(lowerName, capitalizedName) \
    case RelevantExtensionKey::capitalizedName: \
        return #lowerName ""_s;
    JSC_INTL_RELEVANT_EXTENSION_KEYS(JSC_RETURN_INTL_RELEVANT_EXTENSION_KEYS)
#undef JSC_RETURN_INTL_RELEVANT_EXTENSION_KEYS
    }
    return ASCIILiteral::null();
}

ResolvedLocale resolveLocale(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, LocaleMatcher localeMatcher, const ResolveLocaleOptions& options, std::initializer_list<RelevantExtensionKey> relevantExtensionKeys, Vector<String> (*localeData)(const String&, RelevantExtensionKey))
{
    // ResolveLocale (availableLocales, requestedLocales, options, relevantExtensionKeys, localeData)
    // https://tc39.github.io/ecma402/#sec-resolvelocale

    MatcherResult matcherResult = localeMatcher == LocaleMatcher::Lookup
        ? lookupMatcher(globalObject, availableLocales, requestedLocales)
        : bestFitMatcher(globalObject, availableLocales, requestedLocales);

    String foundLocale = matcherResult.locale;

    Vector<String> extensionSubtags;
    if (!matcherResult.extension.isNull())
        unicodeExtensionSubTags(matcherResult.extension, extensionSubtags);

    ResolvedLocale resolved;
    resolved.dataLocale = foundLocale;

    String supportedExtension = "-u"_s;
    for (RelevantExtensionKey key : relevantExtensionKeys) {
        ASCIILiteral keyString = relevantExtensionKeyString(key);
        Vector<String> keyLocaleData = localeData(foundLocale, key);
        ASSERT(!keyLocaleData.isEmpty());

        String value = keyLocaleData[0];
        String supportedExtensionAddition;

        if (!extensionSubtags.isEmpty()) {
            size_t keyPos = extensionSubtags.find(keyString);
            if (keyPos != notFound) {
                if (keyPos + 1 < extensionSubtags.size() && extensionSubtags[keyPos + 1].length() > 2) {
                    const String& requestedValue = extensionSubtags[keyPos + 1];
                    if (keyLocaleData.contains(requestedValue)) {
                        value = requestedValue;
                        supportedExtensionAddition = makeString('-', keyString, '-', value);
                    }
                } else if (keyLocaleData.contains("true"_s)) {
                    value = "true"_s;
                    supportedExtensionAddition = makeString('-', keyString);
                }
            }
        }

        if (auto optionsValue = options[static_cast<unsigned>(key)]) {
            // Undefined should not get added to the options, it won't displace the extension.
            // Null will remove the extension.
            if ((optionsValue->isNull() || keyLocaleData.contains(*optionsValue)) && *optionsValue != value) {
                value = optionsValue.value();
                supportedExtensionAddition = String();
            }
        }
        resolved.extensions[static_cast<unsigned>(key)] = value;
        supportedExtension.append(supportedExtensionAddition);
    }

    if (supportedExtension.length() > 2) {
        StringView foundLocaleView(foundLocale);
        foundLocale = makeString(foundLocaleView.substring(0, matcherResult.extensionIndex), supportedExtension, foundLocaleView.substring(matcherResult.extensionIndex));
    }

    resolved.locale = WTFMove(foundLocale);
    return resolved;
}

static JSArray* lookupSupportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
{
    // LookupSupportedLocales (availableLocales, requestedLocales)
    // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales

    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    size_t len = requestedLocales.size();
    JSArray* subset = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithUndecided), 0);
    if (!subset) {
        throwOutOfMemoryError(globalObject, scope);
        return nullptr;
    }

    unsigned index = 0;
    for (size_t k = 0; k < len; ++k) {
        const String& locale = requestedLocales[k];
        String noExtensionsLocale = removeUnicodeLocaleExtension(locale);
        String availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
        if (!availableLocale.isNull()) {
            subset->putDirectIndex(globalObject, index++, jsString(vm, locale));
            RETURN_IF_EXCEPTION(scope, nullptr);
        }
    }

    return subset;
}

static JSArray* bestFitSupportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
{
    // BestFitSupportedLocales (availableLocales, requestedLocales)
    // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales

    // FIXME: Implement something better than lookup.
    return lookupSupportedLocales(globalObject, availableLocales, requestedLocales);
}

JSValue supportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, JSValue options)
{
    // SupportedLocales (availableLocales, requestedLocales, options)
    // https://tc39.github.io/ecma402/#sec-supportedlocales

    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);
    String matcher;

    LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit);
    RETURN_IF_EXCEPTION(scope, JSValue());

    if (localeMatcher == LocaleMatcher::BestFit)
        RELEASE_AND_RETURN(scope, bestFitSupportedLocales(globalObject, availableLocales, requestedLocales));
    RELEASE_AND_RETURN(scope, lookupSupportedLocales(globalObject, availableLocales, requestedLocales));
}

Vector<String> numberingSystemsForLocale(const String& locale)
{
    static LazyNeverDestroyed<Vector<String>> availableNumberingSystems;
    static std::once_flag initializeOnce;
    std::call_once(initializeOnce, [&] {
        availableNumberingSystems.construct();
        ASSERT(availableNumberingSystems->isEmpty());
        UErrorCode status = U_ZERO_ERROR;
        UEnumeration* numberingSystemNames = unumsys_openAvailableNames(&status);
        ASSERT(U_SUCCESS(status));

        int32_t resultLength;
        // Numbering system names are always ASCII, so use char[].
        while (const char* result = uenum_next(numberingSystemNames, &resultLength, &status)) {
            ASSERT(U_SUCCESS(status));
            auto numsys = unumsys_openByName(result, &status);
            ASSERT(U_SUCCESS(status));
            // Only support algorithmic if it is the default fot the locale, handled below.
            if (!unumsys_isAlgorithmic(numsys))
                availableNumberingSystems->append(String(StringImpl::createStaticStringImpl(result, resultLength)));
            unumsys_close(numsys);
        }
        uenum_close(numberingSystemNames);
    });

    UErrorCode status = U_ZERO_ERROR;
    UNumberingSystem* defaultSystem = unumsys_open(locale.utf8().data(), &status);
    ASSERT(U_SUCCESS(status));
    String defaultSystemName(unumsys_getName(defaultSystem));
    unumsys_close(defaultSystem);

    Vector<String> numberingSystems({ defaultSystemName });
    numberingSystems.appendVector(availableNumberingSystems.get());
    return numberingSystems;
}

// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
bool isUnicodeLanguageSubtag(StringView string)
{
    auto length = string.length();
    return length >= 2 && length <= 8 && length != 4 && string.isAllSpecialCharacters<isASCIIAlpha>();
}

// unicode_script_subtag = alpha{4} ;
bool isUnicodeScriptSubtag(StringView string)
{
    return string.length() == 4 && string.isAllSpecialCharacters<isASCIIAlpha>();
}

// unicode_region_subtag = alpha{2} | digit{3} ;
bool isUnicodeRegionSubtag(StringView string)
{
    auto length = string.length();
    return (length == 2 && string.isAllSpecialCharacters<isASCIIAlpha>())
        || (length == 3 && string.isAllSpecialCharacters<isASCIIDigit>());
}

// unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
bool isUnicodeVariantSubtag(StringView string)
{
    auto length = string.length();
    if (length >= 5 && length <= 8)
        return string.isAllSpecialCharacters<isASCIIAlphanumeric>();
    return length == 4 && isASCIIDigit(string[0]) && string.substring(1).isAllSpecialCharacters<isASCIIAlphanumeric>();
}

// unicode_language_id, but intersection of BCP47 and UTS35.
// unicode_language_id =
//     | unicode_language_subtag (sep unicode_script_subtag)? (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
// https://github.com/tc39/proposal-intl-displaynames/issues/79
bool isUnicodeLanguageId(StringView string)
{
    Vector<StringView, 4> subtags;
    for (auto subtag : string.splitAllowingEmptyEntries('-'))
        subtags.append(subtag);

    ASSERT(subtags.size() >= 1);
    unsigned cursor = 0;
    if (!isUnicodeLanguageSubtag(subtags[cursor]))
        return false;
    ++cursor;

    if (cursor == subtags.size())
        return true;
    if (isUnicodeScriptSubtag(subtags[cursor]))
        ++cursor;

    if (cursor == subtags.size())
        return true;
    if (isUnicodeRegionSubtag(subtags[cursor]))
        ++cursor;

    while (true) {
        if (cursor == subtags.size())
            return true;
        if (!isUnicodeVariantSubtag(subtags[cursor]))
            return false;
        ++cursor;
    }
}

bool isWellFormedCurrencyCode(StringView currency)
{
    return currency.length() == 3 && currency.isAllSpecialCharacters<isASCIIAlpha>();
}

EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(JSGlobalObject* globalObject, CallFrame* callFrame)
{
    // Intl.getCanonicalLocales(locales)
    // https://tc39.github.io/ecma402/#sec-intl.getcanonicallocales

    VM& vm = globalObject->vm();
    auto scope = DECLARE_THROW_SCOPE(vm);

    Vector<String> localeList = canonicalizeLocaleList(globalObject, callFrame->argument(0));
    RETURN_IF_EXCEPTION(scope, encodedJSValue());
    auto length = localeList.size();

    JSArray* localeArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous), length);
    if (!localeArray) {
        throwOutOfMemoryError(globalObject, scope);
        return encodedJSValue();
    }

    for (size_t i = 0; i < length; ++i) {
        localeArray->putDirectIndex(globalObject, i, jsString(vm, localeList[i]));
        RETURN_IF_EXCEPTION(scope, encodedJSValue());
    }
    return JSValue::encode(localeArray);
}

} // namespace JSC