mac-encodings.txt   [plain text]


# The items on the left are names of CFStringEncoding values (without the leading kCFStringEncoding).
# The items on the right are IANA character set names. Names listed in character-sets.txt are not
# repeated here; mentioning any one character set from a group in there pulls in all the aliases in
# that group.

MacRoman: macintosh, xmacroman
WindowsLatin1: windows-1252, winlatin1, xansi
ISOLatin1: ISO-8859-1, 88591
NextStepLatin: x-nextstep
ASCII: US-ASCII, isoir6us
Unicode: ISO-10646-UCS-2, ucs2, unicode, utf16
Unicode, BigEndian: UTF-16BE, unicodefffe
Unicode, LittleEndian: UTF-16LE, unicodefeff
UTF8: UTF-8, unicode11utf8, unicode20utf8, xunicode20utf8
NonLossyASCII

MacJapanese, IsJapanese: x-mac-japanese
MacChineseTrad: x-mac-chinesetrad, xmactradchinese
MacKorean: x-mac-korean
#MacArabic: x-mac-arabic
#MacHebrew: x-mac-hebrew
MacGreek: x-mac-greek
MacCyrillic: x-mac-cyrillic, xmacukrainian
#MacDevanagari: x-mac-devanagari
#MacGurmukhi: x-mac-gurmukhi
#MacGujarati: x-mac-gujarati
#MacOriya
#MacBengali
#MacTamil
#MacTelugu
#MacKannada
#MacMalayalam
#MacSinhalese
#MacBurmese
#MacKhmer
#MacThai: x-mac-thai
#MacLaotian
MacGeorgian
MacArmenian
MacChineseSimp: x-mac-chinesesimp, xmacsimpchinese
#MacTibetan: x-mac-tibetan
#MacMongolian
#MacEthiopic
#MacCentralEurRoman: x-mac-centraleurroman, xmacce
#MacVietnamese
#MacExtArabic
#
#MacSymbol: x-mac-symbol
#MacDingbats: x-mac-dingbats
#MacTurkish: x-mac-turkish
#MacCroatian: x-mac-croatian
#MacIcelandic: x-mac-icelandic
#MacRomanian: x-mac-romanian
#MacCeltic
#MacGaelic
#
#MacFarsi: x-mac-farsi
#
#MacUkrainian

#MacInuit
#MacVT100: x-mac-vt100
#
ISOLatin2: ISO-8859-2
ISOLatin3: ISO-8859-3
ISOLatin4: ISO-8859-4
ISOLatinCyrillic: ISO-8859-5
#ISOLatinArabic: ISO-8859-6
ISOLatinGreek: ISO-8859-7
#ISOLatinHebrew: ISO-8859-8-I, logical
#ISOLatinHebrew, VisualOrdering: ISO-8859-8, dos862, visual
ISOLatin5: ISO-8859-9
ISOLatin6: ISO-8859-10
#ISOLatinThai: ISO-8859-11
ISOLatin7: ISO-8859-13
ISOLatin8: ISO-8859-14
ISOLatin9: ISO-8859-15, csisolatin9, l9
ISOLatin10: ISO-8859-16
#
DOSLatinUS: cp437
DOSGreek: cp737, ibm737
#DOSBalticRim: cp500, cp775
DOSLatin1: cp850
DOSGreek1
DOSLatin2: cp852
DOSCyrillic
DOSTurkish: cp857
DOSPortuguese
DOSIcelandic: cp861
#DOSHebrew
DOSCanadianFrench
#DOSArabic: cp864, dos720
DOSNordic
DOSRussian: cp866
DOSGreek2: cp869
#DOSThai: cp874, dos874, tis620, windows874
DOSJapanese, IsJapanese: cp932, cswindows31j, xmscp932
DOSChineseSimplif
DOSKorean
DOSChineseTrad: cp950
WindowsLatin2: windows-1250, winlatin2, xcp1250
WindowsCyrillic: windows-1251, wincyrillic, xcp1251
WindowsGreek: windows-1253, wingreek
WindowsLatin5: windows-1254, winturkish
#WindowsHebrew: windows-1255, winhebrew
#WindowsArabic: windows-1256, cp1256, winarabic
#WindowsBalticRim: windows-1257, winbaltic
WindowsKoreanJohab: johab
#WindowsVietnamese: windows-1258, winvietnamese
#
JIS_X0201_76, IsJapanese: JIS_X0201
JIS_X0208_83, IsJapanese: JIS_X0208-1983
JIS_X0208_90, IsJapanese: JIS_X0208-1990
JIS_X0212_90, IsJapanese: JIS_X0212-1990
JIS_C6226_78, IsJapanese: JIS_C6226-1978
ShiftJIS_X0213_00, IsJapanese: Shift_JIS_X0213-2000
GB_2312_80
GBK_95
GB_18030_2000: GB18030
KSC_5601_87
KSC_5601_92_Johab
CNS_11643_92_P1
CNS_11643_92_P2
CNS_11643_92_P3
#
ISO_2022_JP, IsJapanese: ISO-2022-JP, jis7
ISO_2022_JP_2, IsJapanese: ISO-2022-JP-2
ISO_2022_JP_1, IsJapanese: ISO-2022-JP-1
ISO_2022_JP_3, IsJapanese: ISO-2022-JP-3
ISO_2022_CN: ISO-2022-CN
ISO_2022_CN_EXT: ISO-2022-CN-EXT
ISO_2022_KR: ISO-2022-KR
#
EUC_JP, IsJapanese: EUC-JP, xeuc, xeucjp
EUC_CN: EUC-CN, cngb, csgb231280, gb2312, gb231280, gbk, xeuccn, xgbk
EUC_TW: EUC-TW
EUC_KR: EUC-KR, cp949, ksc5601
#
ShiftJIS, IsJapanese: Shift_JIS, sjis, xsjis
KOI8_R: KOI8-R, koi, koi8
Big5: Big5, cnbig5, xxbig5
MacRomanLatin1: x-mac-roman-latin1
HZ_GB_2312: HZ-GB-2312
Big5_HKSCS_1999: Big5-HKSCS
KOI8_U: KOI8-U
#
EBCDIC_US
EBCDIC_CP037: cp037
#
# I found these additional character set names from the Perl module I18N-Charset-1.17,
# but could not figure out whether I should do anything with them. -Darin
#
# Adobe-Standard-Encoding === adobe-standard
# Adobe-Symbol-Encoding === adobe-symbol
# EBCDIC-ES === ebcdic-cp-es
# EBCDIC-FR === ebcdic-cp-fr
# EBCDIC-IT === ebcdic-cp-it
# EBCDIC-UK === ebcdic-cp-gb
# EBCDIC-US === ebcdic-cp-us
# EBCDIC-FI-SE === ebcdic-cp-fi
# UTF-7 === Unicode-2-0-utf-7
# ISO-10646-UCS-4 === ucs4