localebuildertest.cpp   [plain text]


// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include <memory>

#include "cmemory.h"
#include "cstring.h"
#include "localebuildertest.h"
#include "unicode/localebuilder.h"
#include "unicode/strenum.h"

LocaleBuilderTest::LocaleBuilderTest()
{
}

LocaleBuilderTest::~LocaleBuilderTest()
{
}

void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
{
    TESTCASE_AUTO_BEGIN;
    TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
    TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
    TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
    TESTCASE_AUTO(TestLocaleBuilder);
    TESTCASE_AUTO(TestLocaleBuilderBasic);
    TESTCASE_AUTO(TestPosixCases);
    TESTCASE_AUTO(TestSetExtensionOthers);
    TESTCASE_AUTO(TestSetExtensionPU);
    TESTCASE_AUTO(TestSetExtensionT);
    TESTCASE_AUTO(TestSetExtensionU);
    TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
    TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
    TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
    TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
    TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
    TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
    TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
    TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
    TESTCASE_AUTO(TestSetLanguageIllFormed);
    TESTCASE_AUTO(TestSetLanguageWellFormed);
    TESTCASE_AUTO(TestSetLocale);
    TESTCASE_AUTO(TestSetRegionIllFormed);
    TESTCASE_AUTO(TestSetRegionWellFormed);
    TESTCASE_AUTO(TestSetScriptIllFormed);
    TESTCASE_AUTO(TestSetScriptWellFormed);
    TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
    TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
    TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
    TESTCASE_AUTO(TestSetVariantIllFormed);
    TESTCASE_AUTO(TestSetVariantWellFormed);
    TESTCASE_AUTO_END;
}

void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
    UErrorCode status = U_ZERO_ERROR;
    UErrorCode copyStatus = U_ZERO_ERROR;
    UErrorCode errorStatus = U_ILLEGAL_ARGUMENT_ERROR;
    if (bld.copyErrorTo(copyStatus)) {
        errln(msg, u_errorName(copyStatus));
    }
    if (!bld.copyErrorTo(errorStatus) || errorStatus != U_ILLEGAL_ARGUMENT_ERROR) {
        errln("Should always get the previous error and return FALSE");
    }
    Locale loc = bld.build(status);
    if (U_FAILURE(status)) {
        errln(msg, u_errorName(status));
    }
    if (status != copyStatus) {
        errln(msg, u_errorName(status));
    }
    std::string tag = loc.toLanguageTag<std::string>(status);
    if (U_FAILURE(status)) {
        errln("loc.toLanguageTag() got Error: %s\n",
              u_errorName(status));
    }
    if (tag != expected) {
        errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
    }
}

void LocaleBuilderTest::TestLocaleBuilder() {
    // The following test data are copy from
    // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
    // "L": +1 = language
    // "S": +1 = script
    // "R": +1 = region
    // "V": +1 = variant
    // "K": +1 = Unicode locale key / +2 = Unicode locale type
    // "A": +1 = Unicode locale attribute
    // "E": +1 = extension letter / +2 = extension value
    // "P": +1 = private use
    // "U": +1 = ULocale
    // "B": +1 = BCP47 language tag
    // "C": Clear all
    // "N": Clear extensions
    // "D": +1 = Unicode locale attribute to be removed
    // "X": indicates an exception must be thrown
    // "T": +1 = expected language tag / +2 = expected locale string
    const char* TESTCASES[][14] = {
        {"L", "en", "R", "us", "T", "en-US", "en_US"},
        {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
        {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
        {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
        {"L", "123", "X"},
        {"R", "us", "T", "und-US", "_US"},
        {"R", "usa", "X"},
        {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
        {"R", "123", "L", "it", "R", "", "T", "it", "it"},
        {"R", "123", "L", "en", "T", "en-123", "en_123"},
        {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
        {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
        {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
        {"S", "latin", "X"},
        {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
        {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
        {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
        {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
        {"V", "123", "X"},
        {"U", "en_US", "T", "en-US", "en_US"},
        {"U", "en_US_WIN", "X"},
        {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
          "fr-FR-1606nict-u-ca-gregory-x-test",
          "fr_FR_1606NICT@calendar=gregorian;x=test"},
        {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
        {"B", "und-CA", "T", "und-CA", "_CA"},
        // Blocked by ICU-20327
        // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
        // "en_US_VAR@x=test"},
        {"B", "en-US-VAR", "X"},
        {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
          "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
        {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
          "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
        {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
          "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
          "ja_JP@attribute=attr1;calendar=gregorian"},
        {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn-true",
          "en@colnumeric=yes"},
        {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
          "th_TH@numbers=thai"},
        {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
        {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
        {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
        {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
        {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
        {"E", "a", "x", "X"},
        {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
        // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
        // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
        // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
        // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
        // key = alphanum alpha
        {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a-yes",
         "en@0a=yes;attribute=aaa-bbb"},
        {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
          "fr_FR@x=yoshito-icu"},
        {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
          "ja_JP@calendar=japanese"},
        {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
          "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
        {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
        {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
          "th@calendar=gregorian;numbers=thai"},
        {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
          "en_US@timezone=America/New_York"},
        {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
          "true", "T", "de-u-co-phonebk-kk-true-ks-level1",
          "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
        {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
          "en_US@calendar=gregorian"},
        {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
        {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
        {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn-true",
          "en_US@colnumeric=yes"},
        {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
        {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
        {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
          "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
        {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
          "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
        {"L", "en", "A", "aa", "X"},
        {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
    };
    UErrorCode status = U_ZERO_ERROR;
    LocaleBuilder bld;
    for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
        const char* (&testCase)[14] = TESTCASES[tidx];
        std::string actions;
        for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
             if (testCase[p] == nullptr) {
                 actions += " (nullptr)";
                 break;
             }
             if (p > 0) actions += " ";
             actions += testCase[p];
        }
        int i = 0;
        const char* method;
        status = U_ZERO_ERROR;
        bld.clear();
        while (true) {
            status = U_ZERO_ERROR;
            UErrorCode copyStatus = U_ZERO_ERROR;
            method = testCase[i++];
            if (strcmp("L", method) == 0) {
                bld.setLanguage(testCase[i++]);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("S", method) == 0) {
                bld.setScript(testCase[i++]);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("R", method) == 0) {
                bld.setRegion(testCase[i++]);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("V", method) == 0) {
                bld.setVariant(testCase[i++]);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("K", method) == 0) {
                const char* key = testCase[i++];
                const char* type = testCase[i++];
                bld.setUnicodeLocaleKeyword(key, type);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("A", method) == 0) {
                bld.addUnicodeLocaleAttribute(testCase[i++]);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("E", method) == 0) {
                const char* key = testCase[i++];
                const char* value = testCase[i++];
                bld.setExtension(key[0], value);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("P", method) == 0) {
                bld.setExtension('x', testCase[i++]);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("U", method) == 0) {
                bld.setLocale(Locale(testCase[i++]));
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("B", method) == 0) {
                bld.setLanguageTag(testCase[i++]);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            }
            // clear / remove
            else if (strcmp("C", method) == 0) {
                bld.clear();
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("N", method) == 0) {
                bld.clearExtensions();
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            } else if (strcmp("D", method) == 0) {
                bld.removeUnicodeLocaleAttribute(testCase[i++]);
                bld.copyErrorTo(copyStatus);
                bld.build(status);
            }
            // result
            else if (strcmp("X", method) == 0) {
                if (U_SUCCESS(status)) {
                    errln("FAIL: No error return - test case: %s", actions.c_str());
                }
            } else if (strcmp("T", method) == 0) {
                status = U_ZERO_ERROR;
                Locale loc = bld.build(status);
                if (status != copyStatus) {
                    errln("copyErrorTo not matching");
                }
                if (U_FAILURE(status) ||
                    strcmp(loc.getName(), testCase[i + 1]) != 0) {
                    errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
                            " for test case: ", actions.c_str());
                }
                std::string langtag = loc.toLanguageTag<std::string>(status);
                if (U_FAILURE(status) || langtag != testCase[i]) {
                    errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
                            " for test case: ", actions.c_str());
                }
                break;
            } else {
                // Unknow test method
                errln("Unknown test case method: There is an error in the test case data.");
                break;
            }
            if (status != copyStatus) {
                errln("copyErrorTo not matching");
            }
            if (U_FAILURE(status)) {
                if (strcmp("X", testCase[i]) == 0) {
                    // This failure is expected
                    break;
                } else {
                    errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
                          " in test case: ", actions.c_str());
                    break;
                }
            }
            if (strcmp("T", method) == 0) {
                break;
            }
        }  // while(true)
    }  // for TESTCASES
}

void LocaleBuilderTest::TestLocaleBuilderBasic() {
    LocaleBuilder bld;
    bld.setLanguage("zh");
    Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");

    bld.setScript("Hant");
    Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");

    bld.setRegion("SG");
    Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");

    bld.setRegion("HK");
    bld.setScript("Hans");
    Verify(bld, "zh-Hans-HK",
           "setRegion('HK') and setScript('Hans') got Error: %s\n");

    bld.setVariant("revised");
    Verify(bld, "zh-Hans-HK-revised",
           "setVariant('revised') got Error: %s\n");

    bld.setUnicodeLocaleKeyword("nu", "thai");
    Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
           "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");

    bld.setUnicodeLocaleKeyword("co", "pinyin");
    Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
           "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");

    bld.setUnicodeLocaleKeyword("nu", "latn");
    Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
           "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");

    bld.setUnicodeLocaleKeyword("nu", nullptr);
    Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
           "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");

    bld.setUnicodeLocaleKeyword("co", nullptr);
    Verify(bld, "zh-Hans-HK-revised",
           "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");

    bld.setScript("");
    Verify(bld, "zh-HK-revised",
           "setScript('') got Error: %s\n");

    bld.setVariant("");
    Verify(bld, "zh-HK",
           "setVariant('') got Error: %s\n");

    bld.setRegion("");
    Verify(bld, "zh",
           "setRegion('') got Error: %s\n");
}

void LocaleBuilderTest::TestSetLanguageWellFormed() {
    // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
    // unicode_language_subtag = alpha{2,3} | alpha{5,8};
    // ICUTC decided also support alpha{4}
    static const char* wellFormedLanguages[] = {
        "",

        // alpha{2}
        "en",
        "NE",
        "eN",
        "Ne",

        // alpha{3}
        "aNe",
        "zzz",
        "AAA",

        // alpha{4}
        "ABCD",
        "abcd",

        // alpha{5}
        "efgij",
        "AbCAD",
        "ZAASD",

        // alpha{6}
        "efgijk",
        "AADGFE",
        "AkDfFz",

        // alpha{7}
        "asdfads",
        "ADSFADF",
        "piSFkDk",

        // alpha{8}
        "oieradfz",
        "IADSFJKR",
        "kkDSFJkR",
    };
    for (const char* lang : wellFormedLanguages) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setLanguage(lang);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("setLanguage(\"%s\") got Error: %s\n",
                  lang, u_errorName(status));
        }
    }
}

void LocaleBuilderTest::TestSetLanguageIllFormed() {
    static const char* illFormed[] = {
        "a",
        "z",
        "A",
        "F",
        "2",
        "0",
        "9"
        "{",
        ".",
        "[",
        "]",
        "\\",

        "e1",
        "N2",
        "3N",
        "4e",
        "e:",
        "43",
        "a9",

        "aN0",
        "z1z",
        "2zz",
        "3A3",
        "456",
        "af)",

        // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
        // "latn",
        // "Arab",
        // "LATN",

        "e)gij",
        "Ab3AD",
        "ZAAS8",

        "efgi[]",
        "AA9GFE",
        "7kD3Fz",
        "as8fads",
        "0DSFADF",
        "'iSFkDk",

        "oieradf+",
        "IADSFJK-",
        "kkDSFJk0",

        // alpha{9}
        "oieradfab",
        "IADSFJKDE",
        "kkDSFJkzf",
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setLanguage(ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
        }
    }
}

void LocaleBuilderTest::TestSetScriptWellFormed() {
    // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
    // unicode_script_subtag = alpha{4} ;
    static const char* wellFormedScripts[] = {
        "",

        "Latn",
        "latn",
        "lATN",
        "laTN",
        "arBN",
        "ARbn",
        "adsf",
        "aADF",
        "BSVS",
        "LATn",
    };
    for (const char* script : wellFormedScripts) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setScript(script);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("setScript(\"%s\") got Error: %s\n",
                  script, u_errorName(status));
        }
    }
}

void LocaleBuilderTest::TestSetScriptIllFormed() {
    static const char* illFormed[] = {
        "a",
        "z",
        "A",
        "F",
        "2",
        "0",
        "9"
        "{",
        ".",
        "[",
        "]",
        "\\",

        "e1",
        "N2",
        "3N",
        "4e",
        "e:",
        "43",
        "a9",

        "aN0",
        "z1z",
        "2zz",
        "3A3",
        "456",
        "af)",

        "0atn",
        "l1tn",
        "lA2N",
        "la4N",
        "arB5",
        "1234",

        "e)gij",
        "Ab3AD",
        "ZAAS8",

        "efgi[]",
        "AA9GFE",
        "7kD3Fz",

        "as8fads",
        "0DSFADF",
        "'iSFkDk",

        "oieradf+",
        "IADSFJK-",
        "kkDSFJk0",

        // alpha{9}
        "oieradfab",
        "IADSFJKDE",
        "kkDSFJkzf",
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setScript(ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setScript(\"%s\") should fail but has no Error\n", ill);
        }
    }
}

void LocaleBuilderTest::TestSetRegionWellFormed() {
    // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
    // unicode_region_subtag = (alpha{2} | digit{3})
    static const char* wellFormedRegions[] = {
        "",

        // alpha{2}
        "en",
        "NE",
        "eN",
        "Ne",

        // digit{3}
        "000",
        "999",
        "123",
        "987"
    };
    for (const char* region : wellFormedRegions) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setRegion(region);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("setRegion(\"%s\") got Error: %s\n",
                  region, u_errorName(status));
        }
    }
}

void LocaleBuilderTest::TestSetRegionIllFormed() {
    static const char* illFormed[] = {
        "a",
        "z",
        "A",
        "F",
        "2",
        "0",
        "9"
        "{",
        ".",
        "[",
        "]",
        "\\",

        "e1",
        "N2",
        "3N",
        "4e",
        "e:",
        "43",
        "a9",

        "aN0",
        "z1z",
        "2zz",
        "3A3",
        "4.6",
        "af)",

        "0atn",
        "l1tn",
        "lA2N",
        "la4N",
        "arB5",
        "1234",

        "e)gij",
        "Ab3AD",
        "ZAAS8",

        "efgi[]",
        "AA9GFE",
        "7kD3Fz",

        "as8fads",
        "0DSFADF",
        "'iSFkDk",

        "oieradf+",
        "IADSFJK-",
        "kkDSFJk0",

        // alpha{9}
        "oieradfab",
        "IADSFJKDE",
        "kkDSFJkzf",
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setRegion(ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setRegion(\"%s\") should fail but has no Error\n", ill);
        }
    }
}

void LocaleBuilderTest::TestSetVariantWellFormed() {
    // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
    // (sep unicode_variant_subtag)*
    // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
    static const char* wellFormedVariants[] = {
        "",

        // alphanum{5}
        "efgij",
        "AbCAD",
        "ZAASD",
        "0AASD",
        "A1CAD",
        "ef2ij",
        "ads3X",
        "owqF4",

        // alphanum{6}
        "efgijk",
        "AADGFE",
        "AkDfFz",
        "0ADGFE",
        "A9DfFz",
        "AADG7E",

        // alphanum{7}
        "asdfads",
        "ADSFADF",
        "piSFkDk",
        "a0dfads",
        "ADSF3DF",
        "piSFkD9",

        // alphanum{8}
        "oieradfz",
        "IADSFJKR",
        "kkDSFJkR",
        "0ADSFJKR",
        "12345679",

        // digit alphanum{3}
        "0123",
        "1abc",
        "20EF",
        "30EF",
        "8A03",
        "3Ax3",
        "9Axy",

        // (sep unicode_variant_subtag)*
        "0123-4567",
        "0ab3-ABCDE",
        "9ax3-xByD9",
        "9ax3-xByD9-adfk934a",

        "0123_4567",
        "0ab3_ABCDE",
        "9ax3_xByD9",
        "9ax3_xByD9_adfk934a",

        "9ax3-xByD9_adfk934a",
        "9ax3_xByD9-adfk934a",
    };
    for (const char* variant : wellFormedVariants) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setVariant(variant);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("setVariant(\"%s\") got Error: %s\n",
                  variant, u_errorName(status));
        }
    }
}

void LocaleBuilderTest::TestSetVariantIllFormed() {
    static const char* illFormed[] = {
        "a",
        "z",
        "A",
        "F",
        "2",
        "0",
        "9"
        "{",
        ".",
        "[",
        "]",
        "\\",

        "e1",
        "N2",
        "3N",
        "4e",
        "e:",
        "43",
        "a9",
        "en",
        "NE",
        "eN",
        "Ne",

        "aNe",
        "zzz",
        "AAA",
        "aN0",
        "z1z",
        "2zz",
        "3A3",
        "4.6",
        "af)",
        "345",
        "923",

        "Latn",
        "latn",
        "lATN",
        "laTN",
        "arBN",
        "ARbn",
        "adsf",
        "aADF",
        "BSVS",
        "LATn",
        "l1tn",
        "lA2N",
        "la4N",
        "arB5",
        "abc3",
        "A3BC",

        "e)gij",
        "A+3AD",
        "ZAA=8",

        "efgi[]",
        "AA9]FE",
        "7k[3Fz",

        "as8f/ds",
        "0DSFAD{",
        "'iSFkDk",

        "oieradf+",
        "IADSFJK-",
        "k}DSFJk0",

        // alpha{9}
        "oieradfab",
        "IADSFJKDE",
        "kkDSFJkzf",
        "123456789",

        "-0123",
        "-0123-4567",
        "0123-4567-",
        "-123-4567",
        "_0123",
        "_0123_4567",
        "0123_4567_",
        "_123_4567",

        "-abcde-figjk",
        "abcde-figjk-",
        "-abcde-figjk-",
        "_abcde_figjk",
        "abcde_figjk_",
        "_abcde_figjk_",
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setVariant(ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setVariant(\"%s\") should fail but has no Error\n", ill);
        }
    }
}

void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
    // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
    // keyword = key (sep type)? ;
    // key = alphanum alpha ;
    // type = alphanum{3,8} (sep alphanum{3,8})* ;
    static const char* wellFormed_key_value[] = {
        "aa", "123",
        "3b", "zyzbcdef",
        "0Z", "1ZB30zk9-abc",
        "cZ", "2ck30zfZ-adsf023-234kcZ",
        "ZZ", "Lant",
        "ko", "",
    };
    for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
                                    wellFormed_key_value[i + 1]);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
                  wellFormed_key_value[i],
                  wellFormed_key_value[i + 1],
                  u_errorName(status));
        }
    }
}

void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
    static const char* illFormed[] = {
        "34",
        "ab-cde",
        "123",
        "b3",
        "zyzabcdef",
        "Z0",
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setUnicodeLocaleKeyword(ill, "abc");
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
                  ill);
        }
    }
}

void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
    static const char* illFormed[] = {
        "34",
        "ab-",
        "-cd",
        "-ef-",
        "zyzabcdef",
        "ab-abc",
        "1ZB30zfk9-abc",
        "2ck30zfk9-adsf023-234kcZ",
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setUnicodeLocaleKeyword("ab", ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
                  ill);
        }
    }
}

void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
    LocaleBuilder bld;
    UErrorCode status = U_ZERO_ERROR;
    Locale loc = bld.setLanguage("fr")
                    .addUnicodeLocaleAttribute("abc")
                    .addUnicodeLocaleAttribute("aBc")
                    .addUnicodeLocaleAttribute("EFG")
                    .addUnicodeLocaleAttribute("efghi")
                    .addUnicodeLocaleAttribute("efgh")
                    .addUnicodeLocaleAttribute("efGhi")
                    .addUnicodeLocaleAttribute("EFg")
                    .addUnicodeLocaleAttribute("hijk")
                    .addUnicodeLocaleAttribute("EFG")
                    .addUnicodeLocaleAttribute("HiJK")
                    .addUnicodeLocaleAttribute("aBc")
                    .build(status);
    if (U_FAILURE(status)) {
        errln("addUnicodeLocaleAttribute() got Error: %s\n",
              u_errorName(status));
    }
    std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
    std::string actual = loc.toLanguageTag<std::string>(status);
    if (U_FAILURE(status) || expected != actual) {
        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
    }

    // remove "efgh" in the middle with different casing.
    loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
    if (U_FAILURE(status)) {
        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
              u_errorName(status));
    }
    expected = "fr-u-abc-efg-efghi-hijk";
    actual = loc.toLanguageTag<std::string>(status);
    if (U_FAILURE(status) || expected != actual) {
        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
    }

    // remove non-existing attributes.
    loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
    if (U_FAILURE(status)) {
        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
              u_errorName(status));
    }
    actual = loc.toLanguageTag<std::string>(status);
    if (U_FAILURE(status) || expected != actual) {
        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
    }

    // remove "abc" in the beginning with different casing.
    loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
    if (U_FAILURE(status)) {
        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
              u_errorName(status));
    }
    expected = "fr-u-efg-efghi-hijk";
    actual = loc.toLanguageTag<std::string>(status);
    if (U_FAILURE(status) || expected != actual) {
        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
    }

    // remove non-existing substring in the end.
    loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
    if (U_FAILURE(status)) {
        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
              u_errorName(status));
    }
    actual = loc.toLanguageTag<std::string>(status);
    if (U_FAILURE(status) || expected != actual) {
        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
    }

    // remove "hijk" in the end with different casing.
    loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
    if (U_FAILURE(status)) {
        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
              u_errorName(status));
    }
    expected = "fr-u-efg-efghi";
    actual = loc.toLanguageTag<std::string>(status);
    if (U_FAILURE(status) || expected != actual) {
        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
    }

    // remove "efghi" in the end with different casing.
    loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
    if (U_FAILURE(status)) {
        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
              u_errorName(status));
    }
    expected = "fr-u-efg";
    actual = loc.toLanguageTag<std::string>(status);
    if (U_FAILURE(status) || expected != actual) {
        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
    }

    // remove "efg" in as the only one, with different casing.
    loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
    if (U_FAILURE(status)) {
        errln("removeUnicodeLocaleAttribute() got Error: %s\n",
              u_errorName(status));
    }
    expected = "fr";
    actual = loc.toLanguageTag<std::string>(status);
    if (U_FAILURE(status) || expected != actual) {
        errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
    }

}

void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
    // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
    // attribute = alphanum{3,8} ;
    static const char* wellFormedAttributes[] = {
        // alphanum{3}
        "AbC",
        "ZAA",
        "0AA",
        "x3A",
        "xa8",

        // alphanum{4}
        "AbCA",
        "ZASD",
        "0ASD",
        "A3a4",
        "zK90",

        // alphanum{5}
        "efgij",
        "AbCAD",
        "ZAASD",
        "0AASD",
        "A1CAD",
        "ef2ij",
        "ads3X",
        "owqF4",

        // alphanum{6}
        "efgijk",
        "AADGFE",
        "AkDfFz",
        "0ADGFE",
        "A9DfFz",
        "AADG7E",

        // alphanum{7}
        "asdfads",
        "ADSFADF",
        "piSFkDk",
        "a0dfads",
        "ADSF3DF",
        "piSFkD9",

        // alphanum{8}
        "oieradfz",
        "IADSFJKR",
        "kkDSFJkR",
    };
    LocaleBuilder bld;
    for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
        if (i % 5 == 0) {
            bld.clear();
        }
        UErrorCode status = U_ZERO_ERROR;
        bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
                  wellFormedAttributes[i], u_errorName(status));
        }
        if (i > 2) {
            bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
            loc = bld.build(status);
            if (U_FAILURE(status)) {
                errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
                      wellFormedAttributes[i - 1], u_errorName(status));
            }
            bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
            loc = bld.build(status);
            if (U_FAILURE(status)) {
                errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
                      wellFormedAttributes[i - 3], u_errorName(status));
            }
        }
    }
}

void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
    static const char* illFormed[] = {
        "aa",
        "34",
        "ab-",
        "-cd",
        "-ef-",
        "zyzabcdef",
        "123456789",
        "ab-abc",
        "1ZB30zfk9-abc",
        "2ck30zfk9-adsf023-234kcZ",
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.addUnicodeLocaleAttribute(ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
                  ill);
        }
    }
}

void LocaleBuilderTest::TestSetExtensionU() {
    LocaleBuilder bld;
    bld.setLanguage("zh");
    Verify(bld, "zh",
           "setLanguage(\"zh\") got Error: %s\n");

    bld.setExtension('u', "co-stroke");
    Verify(bld, "zh-u-co-stroke",
           "setExtension('u', \"co-stroke\") got Error: %s\n");

    bld.setExtension('U', "ca-islamic");
    Verify(bld, "zh-u-ca-islamic",
           "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");

    bld.setExtension('u', "ca-chinese");
    Verify(bld, "zh-u-ca-chinese",
           "setExtension('u', \"ca-chinese\") got Error: %s\n");

    bld.setExtension('U', "co-pinyin");
    Verify(bld, "zh-u-co-pinyin",
           "setExtension('U', \"co-pinyin\") got Error: %s\n");

    bld.setRegion("TW");
    Verify(bld, "zh-TW-u-co-pinyin",
           "setRegion(\"TW\") got Error: %s\n");

    bld.setExtension('U', "");
    Verify(bld, "zh-TW",
           "setExtension('U', \"\") got Error: %s\n");

    bld.setExtension('u', "abc-defg-kr-face");
    Verify(bld, "zh-TW-u-abc-defg-kr-face",
           "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");

    bld.setExtension('U', "ca-japanese");
    Verify(bld, "zh-TW-u-ca-japanese",
           "setExtension('U', \"ca-japanese\") got Error: %s\n");

}

void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
    static const char* wellFormedExtensions[] = {
        // keyword
        //   keyword = key (sep type)? ;
        //   key = alphanum alpha ;
        //   type = alphanum{3,8} (sep alphanum{3,8})* ;
        "3A",
        "ZA",
        "az-abc",
        "zz-123",
        "7z-12345678",
        "kb-A234567Z",
        // (sep keyword)+
        "1z-ZZ",
        "2z-ZZ-123",
        "3z-ZZ-123-cd",
        "0z-ZZ-123-cd-efghijkl",
        // attribute
        "abc",
        "456",
        "87654321",
        "ZABADFSD",
        // (sep attribute)+
        "abc-ZABADFSD",
        "123-ZABADFSD",
        "K2K-12345678",
        "K2K-12345678-zzz",
        // (sep attribute)+ (sep keyword)*
        "K2K-12345678-zz",
        "K2K-12345678-zz-0z",
        "K2K-12345678-9z-AZ-abc",
        "K2K-12345678-zz-9A-234",
        "K2K-12345678-zk0-abc-efg-zz-9k-234",
    };
    for (const char* extension : wellFormedExtensions) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setExtension('u', extension);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("setExtension('u', \"%s\") got Error: %s\n",
                  extension, u_errorName(status));
        }
    }
}

void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
    static const char* illFormed[] = {
        // bad key
        "-",
        "-ab",
        "ab-",
        "abc-",
        "-abc",
        "0",
        "a",
        "A0",
        "z9",
        "09",
        "90",
        // bad keyword
        "AB-A0",
        "AB-efg-A0",
        "xy-123456789",
        "AB-Aa-",
        "AB-Aac-",
        // bad attribute
        "abcdefghi",
        "abcdefgh-",
        "abcdefgh-abcdefghi",
        "abcdefgh-1",
        "abcdefgh-a",
        "abcdefgh-a2345678z",
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setExtension('u', ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setExtension('u', \"%s\") should fail but has no Error\n",
                  ill);
        }
    }
}

void LocaleBuilderTest::TestSetExtensionT() {
    LocaleBuilder bld;
    bld.setLanguage("fr");
    Verify(bld, "fr",
           "setLanguage(\"fr\") got Error: %s\n");

    bld.setExtension('T', "zh");
    Verify(bld, "fr-t-zh",
           "setExtension('T', \"zh\") got Error: %s\n");

    bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
    Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
           "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");

    bld.setExtension('T', "a9-123");
    Verify(bld, "fr-t-a9-123",
           "setExtension('T', \"a9-123\") got Error: %s\n");

    bld.setRegion("MX");
    Verify(bld, "fr-MX-t-a9-123",
           "setRegion(\"MX\") got Error: %s\n");

    bld.setScript("Hans");
    Verify(bld, "fr-Hans-MX-t-a9-123",
           "setScript(\"Hans\") got Error: %s\n");

    bld.setVariant("9abc-abcde");
    Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
           "setVariant(\"9abc-abcde\") got Error: %s\n");

    bld.setExtension('T', "");
    Verify(bld, "fr-Hans-MX-9abc-abcde",
           "bld.setExtension('T', \"\") got Error: %s\n");
}

void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
    // ((sep tlang (sep tfield)*) | (sep tfield)+)
    static const char* wellFormedExtensions[] = {
        // tlang
        //  tlang = unicode_language_subtag (sep unicode_script_subtag)?
        //          (sep unicode_region_subtag)?  (sep unicode_variant_subtag)* ;
        // unicode_language_subtag
        "en",
        "abc",
        "abcde",
        "ABCDEFGH",
        // unicode_language_subtag sep unicode_script_subtag
        "en-latn",
        "abc-arab",
        "ABCDEFGH-Thai",
        // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
        "en-latn-ME",
        "abc-arab-RU",
        "ABCDEFGH-Thai-TH",
        "en-latn-409",
        "abc-arab-123",
        "ABCDEFGH-Thai-456",
        // unicode_language_subtag sep unicode_region_subtag
        "en-ME",
        "abc-RU",
        "ABCDEFGH-TH",
        "en-409",
        "abc-123",
        "ABCDEFGH-456",
        // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
        // sep (sep unicode_variant_subtag)*
        "en-latn-ME-abcde",
        "abc-arab-RU-3abc-abcdef",
        "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
        "en-latn-409-xafsa",
        "abc-arab-123-ADASDF",
        "ABCDEFGH-Thai-456-9sdf-ADASFAS",
        // (sep tfield)+
        "A0-abcde",
        "z9-abcde123",
        "z9-abcde123-a1-abcde",
        // tlang (sep tfield)*
        "fr-A0-abcde",
        "fr-FR-A0-abcde",
        "fr-123-z9-abcde123-a1-abcde",
        "fr-Latn-FR-z9-abcde123-a1-abcde",
        "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
        "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
    };
    for (const char* extension : wellFormedExtensions) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setExtension('t', extension);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("setExtension('t', \"%s\") got Error: %s\n",
                  extension, u_errorName(status));
        }
    }
}

void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
    static const char* illFormed[] = {
        "a",
        "a-",
        "0",
        "9-",
        "-9",
        "-z",
        // "Latn", // Per 2019-01-23 ICUTC, still accept 4alpha. See ICU-20321
        "Latn-",
        "en-",
        "nob-",
        "-z9",
        "a3",
        "a3-",
        "3a",
        "0z-",
        "en-123-a1",
        "en-TH-a1",
        "gab-TH-a1",
        "gab-Thai-a1",
        "gab-Thai-TH-a1",
        "gab-Thai-TH-0bde-a1",
        "gab-Thai-TH-0bde-3b",
        "gab-Thai-TH-0bde-z9-a1",
        "gab-Thai-TH-0bde-z9-3b",
        "gab-Thai-TH-0bde-z9-abcde123-3b",
        "gab-Thai-TH-0bde-z9-abcde123-ab",
        "gab-Thai-TH-0bde-z9-abcde123-ab",
        "gab-Thai-TH-0bde-z9-abcde123-a1",
        "gab-Thai-TH-0bde-z9-abcde123-a1-",
        "gab-Thai-TH-0bde-z9-abcde123-a1-a",
        "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setExtension('t', ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setExtension('t', \"%s\") should fail but has no Error\n",
                  ill);
        }
    }
}

void LocaleBuilderTest::TestSetExtensionPU() {
    LocaleBuilder bld;
    bld.setLanguage("ar");
    Verify(bld, "ar",
           "setLanguage(\"ar\") got Error: %s\n");

    bld.setExtension('X', "a-b-c-d-e");
    Verify(bld, "ar-x-a-b-c-d-e",
           "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");

    bld.setExtension('x', "0-1-2-3");
    Verify(bld, "ar-x-0-1-2-3",
           "setExtension('x', \"0-1-2-3\") got Error: %s\n");

    bld.setExtension('X', "0-12345678-x-x");
    Verify(bld, "ar-x-0-12345678-x-x",
           "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");

    bld.setRegion("TH");
    Verify(bld, "ar-TH-x-0-12345678-x-x",
           "setRegion(\"TH\") got Error: %s\n");

    bld.setExtension('X', "");
    Verify(bld, "ar-TH",
           "setExtension(\"X\") got Error: %s\n");
}

void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
    // ((sep tlang (sep tfield)*) | (sep tfield)+)
    static const char* wellFormedExtensions[] = {
        "a",  // Short subtag
        "z",  // Short subtag
        "0",  // Short subtag, digit
        "9",  // Short subtag, digit
        "a-0",  // Two short subtag, alpha and digit
        "9-z",  // Two short subtag, digit and alpha
        "ab",
        "abc",
        "abcefghi",  // Long subtag
        "87654321",
        "01",
        "234",
        "0a-ab-87654321",  // Three subtags
        "87654321-ab-00-3A",  // Four subtabs
        "a-9-87654321",  // Three subtags with short and long subtags
        "87654321-ab-0-3A",
    };
    for (const char* extension : wellFormedExtensions) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setExtension('x', extension);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("setExtension('x', \"%s\") got Error: %s\n",
                  extension, u_errorName(status));
        }
    }
}

void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
    static const char* illFormed[] = {
        "123456789",  // Too long
        "abcdefghi",  // Too long
        "ab-123456789",  // Second subtag too long
        "abcdefghi-12",  // First subtag too long
        "a-ab-987654321",  // Third subtag too long
        "987654321-a-0-3",  // First subtag too long
    };
    for (const char* ill : illFormed) {
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setExtension('x', ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setExtension('x', \"%s\") should fail but has no Error\n",
                  ill);
        }
    }
}

void LocaleBuilderTest::TestSetExtensionOthers() {
    LocaleBuilder bld;
    bld.setLanguage("fr");
    Verify(bld, "fr",
           "setLanguage(\"fr\") got Error: %s\n");

    bld.setExtension('Z', "ab");
    Verify(bld, "fr-z-ab",
           "setExtension('Z', \"ab\") got Error: %s\n");

    bld.setExtension('0', "xyz12345-abcdefg");
    Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
           "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");

    bld.setExtension('a', "01-12345678-ABcdef");
    Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
           "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");

    bld.setRegion("TH");
    Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
           "setRegion(\"TH\") got Error: %s\n");

    bld.setScript("Arab");
    Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
           "setRegion(\"Arab\") got Error: %s\n");

    bld.setExtension('A', "97");
    Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
           "setExtension('a', \"97\") got Error: %s\n");

    bld.setExtension('a', "");
    Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
           "setExtension('a', \"\") got Error: %s\n");

    bld.setExtension('0', "");
    Verify(bld, "fr-Arab-TH-z-ab",
           "setExtension('0', \"\") got Error: %s\n");
}

void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
    static const char* wellFormedExtensions[] = {
        "ab",
        "abc",
        "abcefghi",
        "01",
        "234",
        "87654321",
        "0a-ab-87654321",
        "87654321-ab-00-3A",
    };

    const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
    const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
    int32_t i = 0;
    for (const char* extension : wellFormedExtensions) {
        char ch = aToZ[i];
        i = (i + 1) % aToZLen;
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setExtension(ch, extension);
        Locale loc = bld.build(status);
        if (U_FAILURE(status)) {
            errln("setExtension('%c', \"%s\") got Error: %s\n",
                  ch, extension, u_errorName(status));
        }
    }

    const char* someChars =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
    const int32_t someCharsLen = static_cast<int32_t>(uprv_strlen(someChars));
    for (int32_t i = 0; i < someCharsLen; i++) {
        char ch = someChars[i];
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
        Locale loc = bld.build(status);
        if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
            if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
                if (U_FAILURE(status)) {
                    errln("setExtension('%c', \"%s\") got Error: %s\n",
                          ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
                }
            }
        } else {
            if (status != U_ILLEGAL_ARGUMENT_ERROR) {
                errln("setExtension('%c', \"%s\") should fail but has no Error\n",
                      ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
            }
        }

    }
}

void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
    static const char* illFormed[] = {
        "0",  // Too short
        "a",  // Too short
        "123456789",  // Too long
        "abcdefghi",  // Too long
        "ab-123456789",  // Second subtag too long
        "abcdefghi-12",  // First subtag too long
        "a-ab-87654321",  // Third subtag too long
        "87654321-a-0-3",  // First subtag too long
    };
    const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
    const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
    int32_t i = 0;
    for (const char* ill : illFormed) {
        char ch = aToZ[i];
        i = (i + 1) % aToZLen;
        UErrorCode status = U_ZERO_ERROR;
        LocaleBuilder bld;
        bld.setExtension(ch, ill);
        Locale loc = bld.build(status);
        if (status != U_ILLEGAL_ARGUMENT_ERROR) {
            errln("setExtension('%c', \"%s\") should fail but has no Error\n",
                  ch, ill);
        }
    }
}

void LocaleBuilderTest::TestSetLocale() {
    LocaleBuilder bld1, bld2;
    UErrorCode status = U_ZERO_ERROR;
    Locale l1 = bld1.setLanguage("en")
        .setScript("Latn")
        .setRegion("MX")
        .setVariant("3456-abcde")
        .addUnicodeLocaleAttribute("456")
        .addUnicodeLocaleAttribute("123")
        .setUnicodeLocaleKeyword("nu", "thai")
        .setUnicodeLocaleKeyword("co", "stroke")
        .setUnicodeLocaleKeyword("ca", "chinese")
        .build(status);
    if (U_FAILURE(status) || l1.isBogus()) {
        errln("build got Error: %s\n", u_errorName(status));
    }
    status = U_ZERO_ERROR;
    Locale l2 = bld1.setLocale(l1).build(status);
    if (U_FAILURE(status) || l2.isBogus()) {
        errln("build got Error: %s\n", u_errorName(status));
    }

    if (l1 != l2) {
        errln("Two locales should be the same, but one is '%s' and the other is '%s'",
              l1.getName(), l2.getName());
    }
}

void LocaleBuilderTest::TestPosixCases() {
    UErrorCode status = U_ZERO_ERROR;
    Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
    if (U_FAILURE(status) || l1.isBogus()) {
        errln("build got Error: %s\n", u_errorName(status));
    }
    LocaleBuilder bld;
    bld.setLanguage("en")
        .setRegion("MX")
        .setScript("Arab")
        .setUnicodeLocaleKeyword("nu", "Thai")
        .setExtension('x', "1");
    // All of above should be cleared by the setLocale call.
    Locale l2 = bld.setLocale(l1).build(status);
    if (U_FAILURE(status) || l2.isBogus()) {
        errln("build got Error: %s\n", u_errorName(status));
    }
    if (l1 != l2) {
        errln("The result locale should be the set as the setLocale %s but got %s\n",
              l1.toLanguageTag<std::string>(status).c_str(),
              l2.toLanguageTag<std::string>(status).c_str());
    }
    Locale posix("en-US-POSIX");
    if (posix != l2) {
        errln("The result locale should be the set as %s but got %s\n",
              posix.getName(), l2.getName());
    }
}