/* * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* CFLocaleIdentifier.c Copyright (c) 2002-2007, Apple Inc. All rights reserved. Responsibility: Christopher Kane CFLocaleIdentifier.c defines - enum value kLocaleIdentifierCStringMax - structs KeyStringToResultString, SpecialCaseUpdates and provides the following data for the functions CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, CFLocaleCreateCanonicalLocaleIdentifierFromString CFLocaleCreateCanonicalLanguageIdentifierFromString 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString; map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString; map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical; map old Apple string oldAppleLocaleToCanonical[n].key to canonical locale string oldAppleLocaleToCanonical[n].result for n = 0..kNumOldAppleLocaleToCanonical-1 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical; map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key to updated replacement localeStringPrefixToCanonical[].result for n = 0..kNumLocaleStringPrefixToCanonical-1 5. static const SpecialCaseUpdates specialCases[]; various special cases for updating region codes, or for updating language codes based on region codes 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults; map locale string region tag localeStringRegionToDefaults[n].key to default substrings to delete localeStringRegionToDefaults[n].result for n = 0..kNumLocaleStringRegionToDefaults-1 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults; map locale string initial part localeStringPrefixToDefaults[n].key to default substrings to delete localeStringPrefixToDefaults[n].result for n = 0..kNumLocaleStringPrefixToDefaults-1 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString; map Apple locale string appleLocaleToLanguageString[].key to equivalent language string appleLocaleToLanguageString[].result for n = 0..kNumAppleLocaleToLanguageString-1 */ #include <CoreFoundation/CFString.h> #include <ctype.h> #include <string.h> #include <stdlib.h> #include <unicode/uloc.h> // Max byte length of locale identifier (ASCII) as C string, including terminating null byte enum { kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100 }; // KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString struct KeyStringToResultString { const char * key; const char * result; }; typedef struct KeyStringToResultString KeyStringToResultString; // SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString struct SpecialCaseUpdates { const char * lang; const char * reg1; const char * update1; const char * reg2; const char * update2; }; typedef struct SpecialCaseUpdates SpecialCaseUpdates; static const char * const regionCodeToLocaleString[] = { // map RegionCode (array index) to canonical locale string // // canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray // -------- ------------ ------------------ ------------ -------- string, if different ] "en_US", // 0 verUS; 0 langEnglish; "fr_FR", // 1 verFrance; 1 langFrench; "en_GB", // 2 verBritain; 0 langEnglish; "de_DE", // 3 verGermany; 2 langGerman; "it_IT", // 4 verItaly; 3 langItalian; "nl_NL", // 5 verNetherlands; 4 langDutch; "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch); "sv_SE", // 7 verSweden; 5 langSwedish; "es_ES", // 8 verSpain; 6 langSpanish; "da_DK", // 9 verDenmark; 7 langDanish; "pt_PT", // 10 verPortugal; 8 langPortuguese; "fr_CA", // 11 verFrCanada; 1 langFrench; "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO" "he_IL", // 13 verIsrael; 10 langHebrew; "ja_JP", // 14 verJapan; 11 langJapanese; "en_AU", // 15 verAustralia; 0 langEnglish; "ar", // 16 verArabic; 12 langArabic; "fi_FI", // 17 verFinland; 13 langFinnish; "fr_CH", // 18 verFrSwiss; 1 langFrench; "de_CH", // 19 verGrSwiss; 2 langGerman; "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono; "is_IS", // 21 verIceland; 15 langIcelandic; "mt_MT", // 22 verMalta; 16 langMaltese; "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # "" "tr_TR", // 24 verTurkey; 17 langTurkish; "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO" "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping "hi_IN", // 33 verIndiaHindi; 21 langHindi; "ur_PK", // 34 verPakistanUrdu; 20 langUrdu; "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping "it_CH", // 36 verItalianSwiss; 3 langItalian; "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en" NULL, // 38 *unassigned; -1 none; * one-way mapping # "" "ro_RO", // 39 verRomania; 37 langRomanian; "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR" "lt_LT", // 41 verLithuania; 24 langLithuanian; "pl_PL", // 42 verPoland; 25 langPolish; "hu_HU", // 43 verHungary; 26 langHungarian; "et_EE", // 44 verEstonia; 27 langEstonian; "lv_LV", // 45 verLatvia; 28 langLatvian; "se", // 46 verSami; 29 langSami; "fo_FO", // 47 verFaroeIsl; 30 langFaroese; "fa_IR", // 48 verIran; 31 langFarsi/Persian; "ru_RU", // 49 verRussia; 32 langRussian; "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots); "ko_KR", // 51 verKorea; 23 langKorean; "zh_CN", // 52 verChina; 33 langSimpChinese; "zh_TW", // 53 verTaiwan; 19 langTradChinese; "th_TH", // 54 verThailand; 22 langThai; "und", // 55 verScriptGeneric; -1 none; # "" // <1.9> "cs_CZ", // 56 verCzech; 38 langCzech; "sk_SK", // 57 verSlovak; 39 langSlovak; "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9> "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic "be_BY", // 61 verBelarus; 46 langBelorussian; "uk_UA", // 62 verUkraine; 45 langUkrainian; NULL, // 63 *unused; -1 none; * one-way mapping # "" "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping "sr_CS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18> "sl_SI", // 66 verSlovenian; 40 langSlovenian; "mk_MK", // 67 verMacedonian; 43 langMacedonian; "hr_HR", // 68 verCroatia; 18 langCroatian; NULL, // 69 *unused; -1 none; * one-way mapping # "" "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE" "pt_BR", // 71 verBrazil; 8 langPortuguese; "bg_BG", // 72 verBulgaria; 44 langBulgarian; "ca_ES", // 73 verCatalonia; 130 langCatalan; "mul", // 74 verMultilingual; -1 none; # "" "gd", // 75 verScottishGaelic; 144 langScottishGaelic; "gv", // 76 verManxGaelic; 145 langManxGaelic; "br", // 77 verBreton; 142 langBreton; "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans; "cy", // 79 verWelsh; 128 langWelsh; "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx> "en_CA", // 82 verEngCanada; 0 langEnglish; "dz_BT", // 83 verBhutan; 137 langDzongkha; "hy_AM", // 84 verArmenian; 51 langArmenian; "ka_GE", // 85 verGeorgian; 52 langGeorgian; "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es" "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping "to_TO", // 88 verTonga; 147 langTongan; "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping "fr_001", // 91 verFrenchUniversal; 1 langFrench; "de_AT", // 92 verAustria; 2 langGerman; "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es" "gu_IN", // 94 verGujarati; 69 langGujarati; "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic "ur_IN", // 96 verIndiaUrdu; 20 langUrdu; "vi_VN", // 97 verVietnam; 80 langVietnamese; "fr_BE", // 98 verFrBelgium; 1 langFrench; "uz_UZ", // 99 verUzbek; 47 langUzbek; "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # "" "nn_NO", // 101 verNynorsk; 151 langNynorsk; # "" "af_ZA", // 102 verAfrikaans; 141 langAfrikaans; "eo", // 103 verEsperanto; 94 langEsperanto; "mr_IN", // 104 verMarathi; 66 langMarathi; "bo", // 105 verTibetan; 63 langTibetan; "ne_NP", // 106 verNepal; 64 langNepali; "kl", // 107 verGreenland; 149 langGreenlandic; "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry) }; enum { kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *) }; static const char * const langCodeToLocaleString[] = { // map LangCode (array index) to canonical locale string // // canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray // -------- -------------- ---------- -------- string, if different ] "en", // 0 langEnglish; "fr", // 1 langFrench; "de", // 2 langGerman; "it", // 3 langItalian; "nl", // 4 langDutch; "sv", // 5 langSwedish; "es", // 6 langSpanish; "da", // 7 langDanish; "pt", // 8 langPortuguese; "nb", // 9 langNorwegian (Bokmal); # "no" "he", // 10 langHebrew -Hebr; "ja", // 11 langJapanese -Jpan; "ar", // 12 langArabic -Arab; "fi", // 13 langFinnish; "el", // 14 langGreek (modern)-Grek-mono; "is", // 15 langIcelandic; "mt", // 16 langMaltese -Latn; "tr", // 17 langTurkish -Latn; "hr", // 18 langCroatian; "zh-Hant", // 19 langTradChinese; # "zh" "ur", // 20 langUrdu -Arab; "hi", // 21 langHindi -Deva; "th", // 22 langThai -Thai; "ko", // 23 langKorean -Hang; "lt", // 24 langLithuanian; "pl", // 25 langPolish; "hu", // 26 langHungarian; "et", // 27 langEstonian; "lv", // 28 langLatvian; "se", // 29 langSami; "fo", // 30 langFaroese; "fa", // 31 langFarsi/Persian -Arab; "ru", // 32 langRussian -Cyrl; "zh-Hans", // 33 langSimpChinese; # "zh" "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl" "ga", // 35 langIrishGaelic (no dots); "sq", // 36 langAlbanian; no region codes "ro", // 37 langRomanian; "cs", // 38 langCzech; "sk", // 39 langSlovak; "sl", // 40 langSlovenian; "yi", // 41 langYiddish -Hebr; no region codes "sr", // 42 langSerbian -Cyrl; "mk", // 43 langMacedonian -Cyrl; "bg", // 44 langBulgarian -Cyrl; "uk", // 45 langUkrainian -Cyrl; "be", // 46 langBelorussian -Cyrl; "uz-Cyrl", // 47 langUzbek -Cyrl; also -Latn, -Arab "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az" "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az" "hy", // 51 langArmenian -Armn; "ka", // 52 langGeorgian -Geor; "mo", // 53 langMoldavian -Cyrl; no region codes "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab "tg-Cyrl", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn" "mn-Cyrl", // 58 langMongolianCyr -Cyrl; no region codes # "mn" "ps", // 59 langPashto -Arab; no region codes "ku", // 60 langKurdish -Arab; no region codes "ks", // 61 langKashmiri -Arab; no region codes "sd", // 62 langSindhi -Arab; no region codes "bo", // 63 langTibetan -Tibt; "ne", // 64 langNepali -Deva; "sa", // 65 langSanskrit -Deva; no region codes "mr", // 66 langMarathi -Deva; "bn", // 67 langBengali -Beng; "as", // 68 langAssamese -Beng; no region codes "gu", // 69 langGujarati -Gujr; "pa", // 70 langPunjabi -Guru; "or", // 71 langOriya -Orya; no region codes "ml", // 72 langMalayalam -Mlym; no region codes "kn", // 73 langKannada -Knda; no region codes "ta", // 74 langTamil -Taml; no region codes "te", // 75 langTelugu -Telu; no region codes "si", // 76 langSinhalese -Sinh; no region codes "my", // 77 langBurmese -Mymr; no region codes "km", // 78 langKhmer -Khmr; no region codes "lo", // 79 langLao -Laoo; no region codes "vi", // 80 langVietnamese -Latn; "id", // 81 langIndonesian -Latn; no region codes "tl", // 82 langTagalog -Latn; no region codes "ms", // 83 langMalayRoman -Latn; no region codes # "ms" "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms" "am", // 85 langAmharic -Ethi; no region codes "ti", // 86 langTigrinya -Ethi; no region codes "om", // 87 langOromo -Ethi; no region codes "so", // 88 langSomali -Latn; no region codes "sw", // 89 langSwahili -Latn; no region codes "rw", // 90 langKinyarwanda -Latn; no region codes "rn", // 91 langRundi -Latn; no region codes "ny", // 92 langNyanja/Chewa -Latn; no region codes # "" "mg", // 93 langMalagasy -Latn; no region codes "eo", // 94 langEsperanto -Latn; NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap) NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap) NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap) "cy", // 128 langWelsh -Latn; "eu", // 129 langBasque -Latn; no region codes "ca", // 130 langCatalan -Latn; "la", // 131 langLatin -Latn; no region codes "qu", // 132 langQuechua -Latn; no region codes "gn", // 133 langGuarani -Latn; no region codes "ay", // 134 langAymara -Latn; no region codes "tt-Cyrl", // 135 langTatar -Cyrl; no region codes "ug", // 136 langUighur -Arab; no region codes "dz", // 137 langDzongkha -Tibt; "jv", // 138 langJavaneseRom -Latn; no region codes "su", // 139 langSundaneseRom -Latn; no region codes "gl", // 140 langGalician -Latn; no region codes "af", // 141 langAfrikaans -Latn; "br", // 142 langBreton -Latn; "iu", // 143 langInuktitut -Cans; "gd", // 144 langScottishGaelic; "gv", // 145 langManxGaelic -Latn; "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx> "to", // 147 langTongan -Latn; "grc", // 148 langGreekAncient -Grek-poly; # "el" "kl", // 149 langGreenlandic -Latn; "az-Latn", // 150 langAzerbaijanRoman -Latn; no region codes # "az" "nn", // 151 langNynorsk -Latn; # (no entry) }; enum { kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *) }; static const KeyStringToResultString oldAppleLocaleToCanonical[] = { // Map obsolete/old-style Apple strings to canonical // Must be sorted according to how strcmp compares the strings in the first column // // non-canonical canonical [ comment ] # source/reason for non-canonical string // string string // ------------- --------- { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray { "Amharic", "am" }, // # __CFBundleLanguageNamesArray { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray { "Assamese", "as" }, // # __CFBundleLanguageNamesArray { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn") { "Basque", "eu" }, // # __CFBundleLanguageNamesArray { "Belarusian", "be" }, // # handle other names { "Belorussian", "be" }, // # handle other names { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above { "Breton", "br" }, // # __CFBundleLanguageNamesArray { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray { "Burmese", "my" }, // # __CFBundleLanguageNamesArray { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray { "Chewa", "ny" }, // # handle other names { "Chichewa", "ny" }, // # handle other names { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans") { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray { "Czech", "cs" }, // # __CFBundleLanguageNamesArray { "Danish", "da" }, // # __CFBundleLanguageNamesArray { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE") { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray { "English", "en" }, // # __CFBundleLanguageNamesArray { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray { "Estonian", "et" }, // # __CFBundleLanguageNamesArray { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray { "Flemish", "nl-BE" }, // # handle other names { "French", "fr" }, // # __CFBundleLanguageNamesArray { "Galician", "gl" }, // # __CFBundleLanguageNamesArray { "Gallegan", "gl" }, // # handle other names { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray { "German", "de" }, // # __CFBundleLanguageNamesArray { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc") { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray { "Hawaiian", "haw" }, // # handle new languages { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots") { "Italian", "it" }, // # __CFBundleLanguageNamesArray { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray { "Kalaallisut", "kl" }, // # handle other names { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray { "Khmer", "km" }, // # __CFBundleLanguageNamesArray { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray { "Korean", "ko" }, // # __CFBundleLanguageNamesArray { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray { "Lao", "lo" }, // # __CFBundleLanguageNamesArray { "Latin", "la" }, // # __CFBundleLanguageNamesArray { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab") { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray { "Manx", "gv" }, // # __CFBundleLanguageNamesArray { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl") { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no") { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray) { "Oriya", "or" }, // # __CFBundleLanguageNamesArray { "Oromo", "om" }, // # __CFBundleLanguageNamesArray { "Panjabi", "pa" }, // # handle other names { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray { "Persian", "fa" }, // # handle other names { "Polish", "pl" }, // # __CFBundleLanguageNamesArray { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray { "Portuguese, Brazilian", "pt-BR" }, // # handle other names { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray { "Pushto", "ps" }, // # handle other names { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray { "Ruanda", "rw" }, // # handle other names { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray { "Russian", "ru" }, // # __CFBundleLanguageNamesArray { "Sami", "se" }, // # __CFBundleLanguageNamesArray { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray { "Simplified Chinese", "zh-Hans" }, // # handle other names { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray { "Somali", "so" }, // # __CFBundleLanguageNamesArray { "Spanish", "es" }, // # __CFBundleLanguageNamesArray { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray { "Tagalog", "tl" }, // # __CFBundleLanguageNamesArray { "Tajik", "tg" }, // # handle other names { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray { "Telugu", "te" }, // # __CFBundleLanguageNamesArray { "Thai", "th" }, // # __CFBundleLanguageNamesArray { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray { "Tongan", "to" }, // # __CFBundleLanguageNamesArray { "Traditional Chinese", "zh-Hant" }, // # handle other names { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString { "az.La", "az-Latn" }, // # from old LocaleRefGetPartString { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes { "bn_??", "bn" }, // # from old LocaleRefGetPartString { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes { "el.El-P", "grc" }, // # from old LocaleRefGetPartString { "en-ascii", "en_001" }, // # from earlier version of tables in this file! { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes { "es_XL", "es_419" }, // # from earlier version of tables in this file! { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8> { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8> { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8> { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8> { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes { "jv.La", "jv" }, // # logical extension // <1.9> { "jw.La", "jv" }, // # from old LocaleRefGetPartString { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString { "kl.La", "kl" }, // # from old LocaleRefGetPartString { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9> { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes { "mn.Cy", "mn-Cyrl" }, // # from old LocaleRefGetPartString { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString { "ms.La", "ms" }, // # from old LocaleRefGetPartString { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString // { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical // { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical // { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical { "pa_??", "pa" }, // # from old LocaleRefGetPartString { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes { "sr_??", "sr_CS" }, // # from old MapScriptInfoAndISOCodes // <1.18> { "su.La", "su" }, // # from old LocaleRefGetPartString { "yi.He", "yi" }, // # from old LocaleRefGetPartString { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file! { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file! { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString }; enum { kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString) }; static const KeyStringToResultString localeStringPrefixToCanonical[] = { // Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code. // (special cases for 'sh' handled separately) // First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column. // // non-canonical canonical [ comment ] # source/reason for non-canonical string // prefix prefix // ------------- --------- { "afr", "af" }, // Afrikaans { "alb", "sq" }, // Albanian { "amh", "am" }, // Amharic { "ara", "ar" }, // Arabic { "arm", "hy" }, // Armenian { "asm", "as" }, // Assamese { "aym", "ay" }, // Aymara { "aze", "az" }, // Azerbaijani { "baq", "eu" }, // Basque { "bel", "be" }, // Belarusian { "ben", "bn" }, // Bengali { "bih", "bh" }, // Bihari { "bod", "bo" }, // Tibetan { "bos", "bs" }, // Bosnian { "bre", "br" }, // Breton { "bul", "bg" }, // Bulgarian { "bur", "my" }, // Burmese { "cat", "ca" }, // Catalan { "ces", "cs" }, // Czech { "che", "ce" }, // Chechen { "chi", "zh" }, // Chinese { "cor", "kw" }, // Cornish { "cos", "co" }, // Corsican { "cym", "cy" }, // Welsh { "cze", "cs" }, // Czech { "dan", "da" }, // Danish { "deu", "de" }, // German { "dut", "nl" }, // Dutch { "dzo", "dz" }, // Dzongkha { "ell", "el" }, // Greek, Modern (1453-) { "eng", "en" }, // English { "epo", "eo" }, // Esperanto { "est", "et" }, // Estonian { "eus", "eu" }, // Basque { "fao", "fo" }, // Faroese { "fas", "fa" }, // Persian { "fin", "fi" }, // Finnish { "fra", "fr" }, // French { "fre", "fr" }, // French { "geo", "ka" }, // Georgian { "ger", "de" }, // German { "gla", "gd" }, // Gaelic,Scottish { "gle", "ga" }, // Irish { "glg", "gl" }, // Gallegan { "glv", "gv" }, // Manx { "gre", "el" }, // Greek, Modern (1453-) { "grn", "gn" }, // Guarani { "guj", "gu" }, // Gujarati { "heb", "he" }, // Hebrew { "hin", "hi" }, // Hindi { "hrv", "hr" }, // Croatian { "hun", "hu" }, // Hungarian { "hye", "hy" }, // Armenian { "i-hak", "zh-hakka" }, // Hakka # deprecated RFC 3066 { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066 { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066 { "ice", "is" }, // Icelandic { "iku", "iu" }, // Inuktitut { "ile", "ie" }, // Interlingue { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989) { "ina", "ia" }, // Interlingua { "ind", "id" }, // Indonesian { "isl", "is" }, // Icelandic { "ita", "it" }, // Italian { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989) { "jav", "jv" }, // Javanese { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001) { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989) { "jpn", "ja" }, // Japanese { "kal", "kl" }, // Kalaallisut { "kan", "kn" }, // Kannada { "kas", "ks" }, // Kashmiri { "kat", "ka" }, // Georgian { "kaz", "kk" }, // Kazakh { "khm", "km" }, // Khmer { "kin", "rw" }, // Kinyarwanda { "kir", "ky" }, // Kirghiz { "kor", "ko" }, // Korean { "kur", "ku" }, // Kurdish { "lao", "lo" }, // Lao { "lat", "la" }, // Latin { "lav", "lv" }, // Latvian { "lit", "lt" }, // Lithuanian { "ltz", "lb" }, // Letzeburgesch { "mac", "mk" }, // Macedonian { "mal", "ml" }, // Malayalam { "mar", "mr" }, // Marathi { "may", "ms" }, // Malay { "mkd", "mk" }, // Macedonian { "mlg", "mg" }, // Malagasy { "mlt", "mt" }, // Maltese { "mol", "mo" }, // Moldavian { "mon", "mn" }, // Mongolian { "msa", "ms" }, // Malay { "mya", "my" }, // Burmese { "nep", "ne" }, // Nepali { "nld", "nl" }, // Dutch { "nno", "nn" }, // Norwegian Nynorsk { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString { "nob", "nb" }, // Norwegian Bokmal { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString { "oci", "oc" }, // Occitan/Provencal { "ori", "or" }, // Oriya { "orm", "om" }, // Oromo,Galla { "pan", "pa" }, // Panjabi { "per", "fa" }, // Persian { "pol", "pl" }, // Polish { "por", "pt" }, // Portuguese { "pus", "ps" }, // Pushto { "que", "qu" }, // Quechua { "roh", "rm" }, // Raeto-Romance { "ron", "ro" }, // Romanian { "rum", "ro" }, // Romanian { "run", "rn" }, // Rundi { "rus", "ru" }, // Russian { "san", "sa" }, // Sanskrit { "scc", "sr" }, // Serbian { "scr", "hr" }, // Croatian { "sin", "si" }, // Sinhalese { "slk", "sk" }, // Slovak { "slo", "sk" }, // Slovak { "slv", "sl" }, // Slovenian { "sme", "se" }, // Sami,Northern { "snd", "sd" }, // Sindhi { "som", "so" }, // Somali { "spa", "es" }, // Spanish { "sqi", "sq" }, // Albanian { "srp", "sr" }, // Serbian { "sun", "su" }, // Sundanese { "swa", "sw" }, // Swahili { "swe", "sv" }, // Swedish { "tam", "ta" }, // Tamil { "tat", "tt" }, // Tatar { "tel", "te" }, // Telugu { "tgk", "tg" }, // Tajik { "tgl", "tl" }, // Tagalog { "tha", "th" }, // Thai { "tib", "bo" }, // Tibetan { "tir", "ti" }, // Tigrinya { "ton", "to" }, // Tongan { "tuk", "tk" }, // Turkmen { "tur", "tr" }, // Turkish { "uig", "ug" }, // Uighur { "ukr", "uk" }, // Ukrainian { "urd", "ur" }, // Urdu { "uzb", "uz" }, // Uzbek { "vie", "vi" }, // Vietnamese { "wel", "cy" }, // Welsh { "yid", "yi" }, // Yiddish { "zho", "zh" }, // Chinese }; enum { kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString) }; static const SpecialCaseUpdates specialCases[] = { // Data for special cases // a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was // replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after // the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! However, ICU // and RFC 3066bis will continue to use YU for this. So now CS is ambiguous. We guess as follows: If we // see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS to YU. // b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and // deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use // hr; if there is a region tag of (now) YU we use sr; else we do not change it (not enough info). // c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the // "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB). { NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB) { NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05) { "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic) { "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia) { NULL, "-YU", "CS", NULL, NULL }, // then always change YU to CS (map old Yugoslavia code to new 2003-07 ISO code // for Serbia & Montenegro per RFC3066bis & ICU) // <1.18> // Note: do this after fixing CS for cs/sk as above. { "sh", "-HR", "hr", "-CS", "sr" }, // if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian) if we find // HR (Croatia) or to 'sr' (Serbian) if we find CS (Serbia & Montenegro, Yugoslavia). // <1.18> // Note: Do this after changing YU to CS as above. { NULL, NULL, NULL, NULL, NULL } // terminator }; static const KeyStringToResultString localeStringRegionToDefaults[] = { // For some region-code suffixes, there are default substrings to strip off for canonical string. // Must be sorted according to how strcmp compares the strings in the first column // // region default writing // suffix system tags, strip comment // -------- ------------- --------- { "_CN", "-Hans" }, // mainland China, default is simplified { "_HK", "-Hant" }, // Hong Kong, default is traditional { "_MO", "-Hant" }, // Macao, default is traditional { "_SG", "-Hans" }, // Singapore, default is simplified { "_TW", "-Hant" }, // Taiwan, default is traditional }; enum { kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString) }; static const KeyStringToResultString localeStringPrefixToDefaults[] = { // For some initial portions of language tag, there are default substrings to strip off for canonical string. // Must be sorted according to how strcmp compares the strings in the first column // // language default writing // tag prefix system tags, strip comment // -------- ------------- --------- { "ab-", "-Cyrl" }, // Abkhazian { "af-", "-Latn" }, // Afrikaans { "am-", "-Ethi" }, // Amharic { "ar-", "-Arab" }, // Arabic { "as-", "-Beng" }, // Assamese { "ay-", "-Latn" }, // Aymara { "be-", "-Cyrl" }, // Belarusian { "bg-", "-Cyrl" }, // Bulgarian { "bn-", "-Beng" }, // Bengali { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script) { "br-", "-Latn" }, // Breton (? not Suppress-Script) { "bs-", "-Latn" }, // Bosnian { "ca-", "-Latn" }, // Catalan { "cs-", "-Latn" }, // Czech { "cy-", "-Latn" }, // Welsh { "da-", "-Latn" }, // Danish { "de-", "-Latn -1901" }, // German, traditional orthography { "dv-", "-Thaa" }, // Divehi/Maldivian { "dz-", "-Tibt" }, // Dzongkha { "el-", "-Grek" }, // Greek (modern, monotonic) { "en-", "-Latn" }, // English { "eo-", "-Latn" }, // Esperanto { "es-", "-Latn" }, // Spanish { "et-", "-Latn" }, // Estonian { "eu-", "-Latn" }, // Basque { "fa-", "-Arab" }, // Farsi { "fi-", "-Latn" }, // Finnish { "fo-", "-Latn" }, // Faroese { "fr-", "-Latn" }, // French { "ga-", "-Latn" }, // Irish { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script) { "gl-", "-Latn" }, // Galician { "gn-", "-Latn" }, // Guarani { "gu-", "-Gujr" }, // Gujarati { "gv-", "-Latn" }, // Manx { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script) { "he-", "-Hebr" }, // Hebrew { "hi-", "-Deva" }, // Hindi { "hr-", "-Latn" }, // Croatian { "hu-", "-Latn" }, // Hungarian { "hy-", "-Armn" }, // Armenian { "id-", "-Latn" }, // Indonesian { "is-", "-Latn" }, // Icelandic { "it-", "-Latn" }, // Italian { "ja-", "-Jpan" }, // Japanese { "ka-", "-Geor" }, // Georgian { "kk-", "-Cyrl" }, // Kazakh { "kl-", "-Latn" }, // Kalaallisut/Greenlandic { "km-", "-Khmr" }, // Central Khmer { "kn-", "-Knda" }, // Kannada { "ko-", "-Hang" }, // Korean (? not Suppress-Script) { "kok-", "-Deva" }, // Konkani { "la-", "-Latn" }, // Latin { "lb-", "-Latn" }, // Luxembourgish { "lo-", "-Laoo" }, // Lao { "lt-", "-Latn" }, // Lithuanian { "lv-", "-Latn" }, // Latvian { "mg-", "-Latn" }, // Malagasy { "mk-", "-Cyrl" }, // Macedonian { "ml-", "-Mlym" }, // Malayalam { "mo-", "-Latn" }, // Moldavian { "mr-", "-Deva" }, // Marathi { "ms-", "-Latn" }, // Malay { "mt-", "-Latn" }, // Maltese { "my-", "-Mymr" }, // Burmese/Myanmar { "nb-", "-Latn" }, // Norwegian Bokmal { "ne-", "-Deva" }, // Nepali { "nl-", "-Latn" }, // Dutch { "nn-", "-Latn" }, // Norwegian Nynorsk { "ny-", "-Latn" }, // Chichewa/Nyanja { "om-", "-Latn" }, // Oromo { "or-", "-Orya" }, // Oriya { "pa-", "-Guru" }, // Punjabi { "pl-", "-Latn" }, // Polish { "ps-", "-Arab" }, // Pushto { "pt-", "-Latn" }, // Portuguese { "qu-", "-Latn" }, // Quechua { "rn-", "-Latn" }, // Rundi { "ro-", "-Latn" }, // Romanian { "ru-", "-Cyrl" }, // Russian { "rw-", "-Latn" }, // Kinyarwanda { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script) { "se-", "-Latn" }, // Sami (? not Suppress-Script) { "si-", "-Sinh" }, // Sinhala { "sk-", "-Latn" }, // Slovak { "sl-", "-Latn" }, // Slovenian { "so-", "-Latn" }, // Somali { "sq-", "-Latn" }, // Albanian { "sv-", "-Latn" }, // Swedish { "sw-", "-Latn" }, // Swahili { "ta-", "-Taml" }, // Tamil { "te-", "-Telu" }, // Telugu { "th-", "-Thai" }, // Thai { "ti-", "-Ethi" }, // Tigrinya { "tl-", "-Latn" }, // Tagalog { "tn-", "-Latn" }, // Tswana { "to-", "-Latn" }, // Tonga of Tonga Islands { "tr-", "-Latn" }, // Turkish { "uk-", "-Cyrl" }, // Ukrainian { "ur-", "-Arab" }, // Urdu { "vi-", "-Latn" }, // Vietnamese { "wo-", "-Latn" }, // Wolof { "xh-", "-Latn" }, // Xhosa { "yi-", "-Hebr" }, // Yiddish { "zh-", "-Hani" }, // Chinese (? not Suppress-Script) { "zu-", "-Latn" }, // Zulu }; enum { kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString) }; static const KeyStringToResultString appleLocaleToLanguageString[] = { // Map locale strings that Apple uses as language IDs to real language strings. // Must be sorted according to how strcmp compares the strings in the first column. // Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now // handled in the code. <1.19> // // locale lang [ comment ] // string string // ------- ------- { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] { "zh_CN", "zh-Hans" }, // mainland China => simplified { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used { "zh_TW", "zh-Hant" }, // Taiwan => traditional }; enum { kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString) }; static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = { // Map locale strings that Apple uses as language IDs to real language strings. // Must be sorted according to how strcmp compares the strings in the first column. // // locale lang [ comment ] // string string // ------- ------- { "de_AT", "de-AT" }, // Austrian German { "de_CH", "de-CH" }, // Swiss German // { "de_DE", "de-DE" }, // German for Germany (default), not currently used { "en_AU", "en-AU" }, // Australian English { "en_CA", "en-CA" }, // Canadian English { "en_GB", "en-GB" }, // British English // { "en_IE", "en-IE" }, // Irish English, not currently used { "en_US", "en-US" }, // U.S. English { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] // { "fr_BE", "fr-BE" }, // Belgian French, not currently used { "fr_CA", "fr-CA" }, // Canadian French { "fr_CH", "fr-CH" }, // Swiss French // { "fr_FR", "fr-FR" }, // French for France (default), not currently used { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium // { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used { "pt_BR", "pt-BR" }, // Brazilian Portuguese { "pt_PT", "pt-PT" }, // Portuguese for Portugal { "zh_CN", "zh-Hans" }, // mainland China => simplified { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used { "zh_TW", "zh-Hant" }, // Taiwan => traditional }; enum { kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString) }; struct LocaleToLegacyCodes { const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_' RegionCode regCode; LangCode langCode; CFStringEncoding encoding; }; typedef struct LocaleToLegacyCodes LocaleToLegacyCodes; static const LocaleToLegacyCodes localeToLegacyCodes[] = { // locale RegionCode LangCode CFStringEncoding { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab; { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng; { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn; { "az", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // assume "az" defaults to -Cyrl { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab; { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl; { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn; { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl; { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl; { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng; { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt; { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn; { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn; { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn; { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn; { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn; { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ }, { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ }, { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ }, { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt; { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ }, { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten) { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ }, { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ }, { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ }, { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ }, { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ }, { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ }, { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn; { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten) { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ }, { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn; { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab; { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ }, { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ }, { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten) { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ }, { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ }, { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ }, { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ }, { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after) { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ }, { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn; { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn; { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it) { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr; { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn; { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr; { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva; { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ }, { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ }, { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn; { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn; { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ }, { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ }, { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans; { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan; { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn; { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor; { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn; { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr; { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda; { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang; { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab; { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab; { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn; { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo; { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ }, { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ }, { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn; { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl; { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym; { "mn", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // "mn" defaults to -Mong { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl; { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong; { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl; { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva; { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn; { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab; { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn; { "mul", 74/*verMultilingual*/, -1, 0 }, { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr; { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ }, { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva; { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ }, { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ }, { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn; { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi; { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya; { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru; { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ }, { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab; { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten) { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ }, { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn; { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn; { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ }, { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl; { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn; { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva; { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab; { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ }, { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh; { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ }, { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ }, { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn; { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ }, { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl; { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn; { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ }, { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn; { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml; { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl" { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai; { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi; { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn; { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn; { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn; { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab; { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl; { "und", 55/*verScriptGeneric*/, -1, 0 }, { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr; { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, }; enum { kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0]) }; /* For reference here is a list of ICU locales with variants and how some of them are canonicalized with the ICU function uloc_canonicalize: ICU 3.0 has: en_US_POSIX x no change hy_AM_REVISED x no change ja_JP_TRADITIONAL -> ja_JP@calendar=japanese th_TH_TRADITIONAL -> th_TH@calendar=buddhist ICU 2.8 also had the following (now obsolete): ca_ES_PREEURO de__PHONEBOOK -> de@collation=phonebook de_AT_PREEURO de_DE_PREEURO de_LU_PREEURO el_GR_PREEURO en_BE_PREEURO en_GB_EURO -> en_GB@currency=EUR en_IE_PREEURO -> en_IE@currency=IEP es__TRADITIONAL -> es@collation=traditional es_ES_PREEURO eu_ES_PREEURO fi_FI_PREEURO fr_BE_PREEURO fr_FR_PREEURO -> fr_FR@currency=FRF fr_LU_PREEURO ga_IE_PREEURO gl_ES_PREEURO hi__DIRECT -> hi@collation=direct it_IT_PREEURO nl_BE_PREEURO nl_NL_PREEURO pt_PT_PREEURO zh__PINYIN -> zh@collation=pinyin zh_TW_STROKE -> zh_TW@collation=stroke */ // _CompareTestEntryToTableEntryKey // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) // comparison function for bsearch static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key ); } // _CompareTestEntryPrefixToTableEntryKey // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) // Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'. // Do the following instead of strlen & strncmp so we don't walk tableEntry key twice. static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; while ( *testPtr == *tablePtr && *tablePtr != 0 ) { testPtr++; tablePtr++; } if ( *tablePtr != 0 ) { // strings are different, and the string in the table has not run out; // i.e. the table entry is not a prefix of the text string. return ( *testPtr < *tablePtr )? -1: 1; } return 0; } // _CompareLowerTestEntryPrefixToTableEntryKey // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) // Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'. // Lowercases the test string before comparison (the table should already have lowercased entries). static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; char lowerTestChar; while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9> testPtr++; tablePtr++; } if ( *tablePtr != 0 ) { // strings are different, and the string in the table has not run out; // i.e. the table entry is not a prefix of the text string. if (lowerTestChar == '_') // <1.9> return -1; // <1.9> return ( lowerTestChar < *tablePtr )? -1: 1; } // The string in the table has run out. If the test string char is not alnum, // then the string matches, else the test string sorts after. return ( !isalnum(lowerTestChar) )? 0: 1; } // _DeleteCharsAtPointer // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) // remove _length_ characters from the beginning of the string indicated by _stringPtr_ // (we know that the string has at least _length_ characters in it) static void _DeleteCharsAtPointer(char *stringPtr, int length) { do { *stringPtr = stringPtr[length]; } while (*stringPtr++ != 0); } // _CopyReplacementAtPointer // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) // Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) { while (*replacementPtr != 0) { *stringPtr++ = *replacementPtr++; } } // _CheckForTag // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) { return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) ); } // _ReplacePrefix // Move this code from _UpdateFullLocaleString into separate function // <1.10> static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) { int newPrefixLen = strlen(newPrefix); int lengthDelta = newPrefixLen - oldPrefixLen; if (lengthDelta < 0) { // replacement is shorter, delete chars by shifting tail of string _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta); } else if (lengthDelta > 0) { // replacement is longer... int stringLen = strlen(locString); if (stringLen + lengthDelta < locStringMaxLen) { // make room by shifting tail of string char * tailShiftPtr = locString + stringLen; char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift while (tailShiftPtr >= tailStartPtr) { tailShiftPtr[lengthDelta] = *tailShiftPtr; tailShiftPtr--; } } else { // no room, can't do substitution newPrefix = NULL; } } if (newPrefix) { // do the substitution _CopyReplacementAtPointer(locString, newPrefix); } } // _UpdateFullLocaleString // Given a locale string that uses standard codes (not a special old-style Apple string), // update all the language codes and region codes to latest versions, map 3-letter // language codes to 2-letter codes if possible, and normalize casing. If requested, return // pointers to a language-region variant subtag (if present) and a region tag (if present). // (add locStringMaxLen parameter) // <1.10> static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen, char **langRegSubtagRef, char **regionTagRef, char varKeyValueString[]) // <1.17> { KeyStringToResultString testEntry; KeyStringToResultString * foundEntry; const SpecialCaseUpdates * specialCasePtr; char * inLocalePtr; char * subtagPtr; char * langRegSubtag = NULL; char * regionTag = NULL; char * variantTag = NULL; Boolean subtagHasDigits, pastPrimarySubtag, hadRegion; // 1. First replace any non-canonical prefix (case insensitive) with canonical // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.) testEntry.key = inLocaleString; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical, sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey ); if (foundEntry) { // replace key (at beginning of string) with result _ReplacePrefix(inLocaleString, locStringMaxLen, strlen(foundEntry->key), foundEntry->result); // <1.10> } // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes inLocalePtr = inLocaleString; subtagPtr = inLocaleString; subtagHasDigits = false; pastPrimarySubtag = false; hadRegion = false; while ( true ) { if ( isalpha(*inLocalePtr) ) { // if not past a region tag, then lowercase, else uppercase *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr); } else if ( isdigit(*inLocalePtr) ) { subtagHasDigits = true; } else { if (!pastPrimarySubtag) { // may have a NULL primary subtag if (subtagHasDigits) { break; } pastPrimarySubtag = true; } else if (!hadRegion) { // We are after any primary language subtag, but not past any region tag. // This subtag is preceded by '-' or '_'. int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_' if (subtagLength == 3 && !subtagHasDigits) { // potential ISO 3166 code for region or language variant; if so, needs uppercasing if (*subtagPtr == '_') { regionTag = subtagPtr; hadRegion = true; subtagPtr[1] = toupper(subtagPtr[1]); subtagPtr[2] = toupper(subtagPtr[2]); } else if (langRegSubtag == NULL) { langRegSubtag = subtagPtr; subtagPtr[1] = toupper(subtagPtr[1]); subtagPtr[2] = toupper(subtagPtr[2]); } } else if (subtagLength == 4 && subtagHasDigits) { // potential UN M.49 region code if (*subtagPtr == '_') { regionTag = subtagPtr; hadRegion = true; } else if (langRegSubtag == NULL) { langRegSubtag = subtagPtr; } } else if (subtagLength == 5 && !subtagHasDigits) { // ISO 15924 script code, uppercase just the first letter subtagPtr[1] = toupper(subtagPtr[1]); } else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17> hadRegion = true; } if (!hadRegion) { // convert improper '_' to '-' *subtagPtr = '-'; } } else { variantTag = subtagPtr; // <1.17> } if (*inLocalePtr == '-' || *inLocalePtr == '_') { subtagPtr = inLocalePtr; subtagHasDigits = false; } else { break; } } inLocalePtr++; } // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669] // If so, copy the keywords to varKeyValueString and delete the variant tag // from the original string (but don't otherwise use the ICU canonicalization). varKeyValueString[0] = 0; if (variantTag) { UErrorCode icuStatus; int icuCanonStringLen; char * varKeyValueStringPtr = varKeyValueString; icuStatus = U_ZERO_ERROR; icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus ); if ( U_SUCCESS(icuStatus) ) { char * icuCanonStringPtr = varKeyValueString; if (icuCanonStringLen >= locStringMaxLen) icuCanonStringLen = locStringMaxLen - 1; varKeyValueString[icuCanonStringLen] = 0; while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR) ++icuCanonStringPtr; if (*icuCanonStringPtr != 0) { // the canonicalized string has keywords // delete the variant tag in the original string (and other trailing '_' or '-') *variantTag-- = 0; while (*variantTag == '_') *variantTag-- = 0; // delete all of the canonicalized string except the keywords while (*icuCanonStringPtr != 0) *varKeyValueStringPtr++ = *icuCanonStringPtr++; } *varKeyValueStringPtr = 0; } } // 4. Handle special cases of updating region codes, or updating language codes based on // region code. for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) { if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) { // OK, we matched any language specified. Now what needs updating? char * foundTag; if ( isupper(specialCasePtr->update1[0]) ) { // updating a region code if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) { _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1); } if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) { _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1); } } else { // updating the language, there will be two choices based on region if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) || ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) { _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1); } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) || ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) { _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2); } } } } // 5. return pointers if requested. if (langRegSubtagRef != NULL) { *langRegSubtagRef = langRegSubtag; } if (regionTagRef != NULL) { *regionTagRef = regionTag; } } // _RemoveSubstringsIfPresent // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) // substringList is a list of space-separated substrings to strip if found in localeString static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) { while (*substringList != 0) { char currentSubstring[kLocaleIdentifierCStringMax]; int substringLength = 0; char * foundSubstring; // copy current substring & get its length while ( isgraph(*substringList) ) { currentSubstring[substringLength++] = *substringList++; } // move to next substring while ( isspace(*substringList) ) { substringList++; } // search for current substring in locale string if (substringLength == 0) continue; currentSubstring[substringLength] = 0; foundSubstring = strstr(localeString, currentSubstring); // if substring is found, delete it if (foundSubstring) { _DeleteCharsAtPointer(foundSubstring, substringLength); } } } // _GetKeyValueString // <1.10> // Removes any key-value string from inLocaleString, puts canonized version in keyValueString static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) { char * inLocalePtr = inLocaleString; while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) { inLocalePtr++; } if (*inLocalePtr != 0) { // we found a key-value section char * keyValuePtr = keyValueString; *keyValuePtr = *inLocalePtr; *inLocalePtr = 0; do { if ( *(++inLocalePtr) != ' ' ) { *(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11> } } while (*inLocalePtr != 0); } else { keyValueString[0] = 0; } } static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) { if (keyValueString[0] != 0) { UErrorCode uerr = U_ZERO_ERROR; UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr); if ( uenum != NULL ) { const char * keyword; int32_t length; char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value while ( U_SUCCESS(uerr) ) { keyword = uenum_next(uenum, &length, &uerr); if ( keyword == NULL ) { break; } length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr ); length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr ); } uenum_close(uenum); } } } __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) { char inLocaleString[kLocaleIdentifierCStringMax]; CFStringRef outStringRef = NULL; if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { KeyStringToResultString testEntry; KeyStringToResultString * foundEntry; char keyValueString[sizeof(inLocaleString)]; // <1.10> char varKeyValueString[sizeof(inLocaleString)]; // <1.17> _GetKeyValueString(inLocaleString, keyValueString); // <1.10> testEntry.result = NULL; // A. First check if input string matches an old-style string that has a replacement // (do this before case normalization) testEntry.key = inLocaleString; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); if (foundEntry) { // It does match, so replace old string with new strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); varKeyValueString[0] = 0; } else { // B. No match with an old-style string, use input string but update codes, normalize case, etc. _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), NULL, NULL, varKeyValueString); // <1.10><1.17> } // C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string // 1. Strip defaults in input string based on initial part of locale string // (mainly to strip default script tag for a language) testEntry.key = inLocaleString; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); if (foundEntry) { // The input string begins with a character sequence for which // there are default substrings which should be stripped if present _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); } // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string testEntry.key = inLocaleString; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageStringForCFBundle, kNumAppleLocaleToLanguageStringForCFBundle, sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); if (foundEntry) { // it does match strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); } else { // just delete the region tag and anything after char * inLocalePtr = inLocaleString; while (*inLocalePtr != 0 && *inLocalePtr != '_') { inLocalePtr++; } *inLocalePtr = 0; } // D. Re-append any key-value strings, now canonical // <1.10><1.17> _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); // All done, return what we came up with. outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); } return outStringRef; } CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { char inLocaleString[kLocaleIdentifierCStringMax]; CFStringRef outStringRef = NULL; if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { KeyStringToResultString testEntry; KeyStringToResultString * foundEntry; char keyValueString[sizeof(inLocaleString)]; // <1.10> char varKeyValueString[sizeof(inLocaleString)]; // <1.17> _GetKeyValueString(inLocaleString, keyValueString); // <1.10> testEntry.result = NULL; // A. First check if input string matches an old-style string that has a replacement // (do this before case normalization) testEntry.key = inLocaleString; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); if (foundEntry) { // It does match, so replace old string with new strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); varKeyValueString[0] = 0; } else { char * langRegSubtag = NULL; char * regionTag = NULL; // B. No match with an old-style string, use input string but update codes, normalize case, etc. _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17><1.19> // if the language part already includes a regional variant, then delete any region tag. <1.19> if (langRegSubtag && regionTag) *regionTag = 0; } // C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string // 1. Strip defaults in input string based on initial part of locale string // (mainly to strip default script tag for a language) testEntry.key = inLocaleString; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); if (foundEntry) { // The input string begins with a character sequence for which // there are default substrings which should be stripped if present _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); } // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string testEntry.key = inLocaleString; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString, sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); if (foundEntry) { // it does match strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); } else { // skip to any region tag or java-type variant char * inLocalePtr = inLocaleString; while (*inLocalePtr != 0 && *inLocalePtr != '_') { inLocalePtr++; } // if there is still a region tag, turn it into a language variant <1.19> if (*inLocalePtr == '_') { // handle 3-digit regions in addition to 2-letter ones char * regionTag = inLocalePtr++; long expectedLength = 0; if ( isalpha(*inLocalePtr) ) { while ( isalpha(*(++inLocalePtr)) ) ; expectedLength = 3; } else if ( isdigit(*inLocalePtr) ) { while ( isdigit(*(++inLocalePtr)) ) ; expectedLength = 4; } *regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0; } // anything else at/after '_' just gets deleted *inLocalePtr = 0; } // D. Re-append any key-value strings, now canonical // <1.10><1.17> _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); // All done, return what we came up with. outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); } return outStringRef; } CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { char inLocaleString[kLocaleIdentifierCStringMax]; CFStringRef outStringRef = NULL; if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { KeyStringToResultString testEntry; KeyStringToResultString * foundEntry; char keyValueString[sizeof(inLocaleString)]; // <1.10> char varKeyValueString[sizeof(inLocaleString)]; // <1.17> _GetKeyValueString(inLocaleString, keyValueString); // <1.10> testEntry.result = NULL; // A. First check if input string matches an old-style Apple string that has a replacement // (do this before case normalization) testEntry.key = inLocaleString; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); if (foundEntry) { // It does match, so replace old string with new // <1.10> strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); varKeyValueString[0] = 0; } else { char * langRegSubtag = NULL; char * regionTag = NULL; // B. No match with an old-style string, use input string but update codes, normalize case, etc. _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17> // C. Now strip defaults that are implied by other fields. // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter. if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) { _DeleteCharsAtPointer(langRegSubtag, 3); } // 2. Strip defaults in input string based on final region tag in locale string // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO) if ( regionTag ) { testEntry.key = regionTag; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults, sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); if (foundEntry) { _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); } } // 3. Strip defaults in input string based on initial part of locale string // (mainly to strip default script tag for a language) testEntry.key = inLocaleString; foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); if (foundEntry) { // The input string begins with a character sequence for which // there are default substrings which should be stripped if present _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); } } // D. Re-append any key-value strings, now canonical // <1.10><1.17> _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); // Now create the CFString (even if empty!) outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); } return outStringRef; } // CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on // the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) { CFStringRef result = NULL; if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) { const char *localeString = regionCodeToLocaleString[rcode]; if (localeString != NULL && *localeString != '\0') { result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); } } if (result) return result; if (0 <= lcode && lcode < kNumLangCodeToLocaleString) { const char *localeString = langCodeToLocaleString[lcode]; if (localeString != NULL && *localeString != '\0') { result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); } } return result; } CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) { char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); UErrorCode icuStatus = U_ZERO_ERROR; int32_t length = 0; // Extract the C string locale ID, for ICU CFIndex outBytes = 0; CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes); cLocaleID[outBytes] = '\0'; // Get the components length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); if (U_SUCCESS(icuStatus) && length > 0) { CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); CFDictionaryAddValue(working, kCFLocaleLanguageCode, string); CFRelease(string); } icuStatus = U_ZERO_ERROR; length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); if (U_SUCCESS(icuStatus) && length > 0) { CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); CFDictionaryAddValue(working, kCFLocaleScriptCode, string); CFRelease(string); } icuStatus = U_ZERO_ERROR; length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); if (U_SUCCESS(icuStatus) && length > 0) { CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); CFDictionaryAddValue(working, kCFLocaleCountryCode, string); CFRelease(string); } icuStatus = U_ZERO_ERROR; length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); if (U_SUCCESS(icuStatus) && length > 0) { CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); CFDictionaryAddValue(working, kCFLocaleVariantCode, string); CFRelease(string); } icuStatus = U_ZERO_ERROR; // Now get the keywords; open an enumerator on them UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus); const char *locKey = NULL; int32_t locKeyLen = 0; while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus)) { char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // Get the value for this keyword if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0 && U_SUCCESS(icuStatus)) { CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, strlen(locKey), kCFStringEncodingASCII, true); CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, strlen(locValue), kCFStringEncodingASCII, true); if (key && value) CFDictionaryAddValue(working, key, value); if (key) CFRelease(key); if (value) CFRelease(value); } } uenum_close(iter); // Convert to an immutable dictionary and return CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working); CFRelease(working); return result; } typedef struct __AppendContext { char separator; CFMutableStringRef working; } __AppendContext; static void __AppendKeywords(const void *k, const void *v, void *c) { __AppendContext *context = (__AppendContext *) c; CFStringRef key = (CFStringRef) k; CFStringRef value = (CFStringRef) v; if (CFEqual(key, kCFLocaleLanguageCode) || CFEqual(key, kCFLocaleScriptCode) || CFEqual(key, kCFLocaleCountryCode) || CFEqual(key, kCFLocaleVariantCode)) return; CFStringAppendFormat(context->working, NULL, CFSTR("%c%@%c%@"), context->separator, key, ULOC_KEYWORD_ASSIGN, value); context->separator = ULOC_KEYWORD_ITEM_SEPARATOR; } CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) { CFMutableStringRef working = CFStringCreateMutable(allocator, 0); CFStringRef value = NULL; bool country = false; __AppendContext context = {ULOC_KEYWORD_SEPARATOR, working}; if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleLanguageCode))) { CFStringAppend(working, value); } if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleScriptCode))) { CFStringAppendFormat(working, NULL, CFSTR("_%@"), value); } if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleCountryCode))) { CFStringAppendFormat(working, NULL, CFSTR("_%@"), value); country = true; } if ((value = (CFStringRef) CFDictionaryGetValue(dictionary, kCFLocaleVariantCode))) { if (!country) CFStringAppend(working, CFSTR("_")); CFStringAppendFormat(working, NULL, CFSTR("_%@"), value); } // Now iterate through any remaining entries and append as keywords CFDictionaryApplyFunction(dictionary, __AppendKeywords, &context); // Convert to immutable string and return CFStringRef result = (CFStringRef)CFStringCreateCopy(allocator, working); CFRelease(working); return result; }