/// Copyright (C) 2004, 2007 Free Software Foundation, Inc. /// * /// Author: Alexander Gnauck AG-Software /// * /// This file is part of GNU Libidn. /// * /// This library is free software; you can redistribute it and/or /// modify it under the terms of the GNU Lesser General Public License /// as published by the Free Software Foundation; either version 2.1 of /// the License, or (at your option) any later version. /// * /// This library is distributed in the hope that it will be useful, but /// WITHOUT ANY WARRANTY; without even the implied warranty of /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU /// Lesser General Public License for more details. /// * /// You should have received a copy of the GNU Lesser General Public /// License along with this library; if not, write to the Free Software /// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 /// USA /// using System; using System.IO; using System.Collections; using System.Text; namespace gnu.inet.encoding.misc { public class GenerateNFKC { internal static string stripComment(string sIn) { int c = sIn.IndexOf('#'); if (c == -1) { return sIn; } else { return sIn.Substring(0, (c) - (0)); } } internal static string[] split(string sIn, char sep) { StringBuilder sb = new StringBuilder(sIn); int c = 0; for (int i = 0; i < sb.Length; i++) { if (sb[i] == sep) { c++; } } string[] sOut = new string[c + 1]; c = 0; int l = 0; for (int i = 0; i < sb.Length; i++) { if (sb[i] == sep) { if (l >= i) { sOut[c] = ""; } else { // TODO, check this sOut[c] = sb.ToString(l, i-l); } l = i + 1; c++; } } if (l < sb.Length) { sOut[c] = sb.ToString(l, sb.Length - l); } return sOut; } internal static bool isCompatibilityMapping(string sIn) { return sIn.Length > 0 && sIn[0] == '<'; } internal static string stripCompatibilityTag(string sIn) { return sIn.Substring(sIn.IndexOf('>') + 2); } internal static string toString(string sIn) { StringBuilder sOut = new StringBuilder(); string[] chars = split(sIn, ' '); for (int i = 0; i < chars.Length; i++) { if (chars[i].Equals("005C")) { sOut.Append("\\\\"); } else if (chars[i].Equals("0022")) { sOut.Append("\\\""); } else { sOut.Append("\\u"); sOut.Append(chars[i]); } } return sOut.ToString(); } internal static string decompose(string sIn, SortedList mappings) { StringBuilder sOut = new StringBuilder(); string[] c = split(sIn, ' '); for (int i = 0; i < c.Length; i++) { if (mappings.ContainsKey(c[i])) { if (sOut.Length > 0) { sOut.Append(" "); } sOut.Append(decompose((string)mappings[c[i]], mappings)); } else { if (sOut.Length > 0) { sOut.Append(" "); } sOut.Append(c[i]); } } return sOut.ToString(); } public static void Generate() { // Check if the unicode files exist { FileInfo f1 = new FileInfo("CompositionExclusions.txt"); FileInfo f2 = new FileInfo("UnicodeData.txt"); bool tmpBool; if (File.Exists(f1.FullName)) tmpBool = true; else tmpBool = Directory.Exists(f1.FullName); bool tmpBool2; if (File.Exists(f2.FullName)) tmpBool2 = true; else tmpBool2 = Directory.Exists(f2.FullName); if (!tmpBool || !tmpBool2) { Console.WriteLine("Unable to find UnicodeData.txt or CompositionExclusions.txt."); Console.WriteLine("Please download the latest version of these file from:"); Console.WriteLine("http://www.unicode.org/Public/UNIDATA/"); System.Environment.Exit(1); } } ArrayList exclusions = new ArrayList(); { StreamReader r = new StreamReader("CompositionExclusions.txt", System.Text.Encoding.Default); string line; while (null != (line = r.ReadLine())) { line = stripComment(line); line = line.Trim(); if (line.Length == 0) { // Empty line } else if (line.Length == 4) { exclusions.Add(line); } else { // Skip code points > 0xffff } } r.Close(); } // Read UnicodeData SortedList canonical = new SortedList(); SortedList compatibility = new SortedList(); SortedList combiningClasses = new SortedList(); { StreamReader r = new StreamReader("UnicodeData.txt", Encoding.Default); string line; while (null != (line = r.ReadLine())) { line = stripComment(line); line = line.Trim(); if (line.Length == 0) { // Empty line } else { string[] f = split(line, ';'); if (f[0].Length == 4) { if (!f[5].Equals("")) { if (isCompatibilityMapping(f[5])) { compatibility[f[0]] = stripCompatibilityTag(f[5]); } else { compatibility[f[0]] = f[5]; if (!exclusions.Contains(f[0])) { canonical[f[0]] = f[5]; } } } if (!f[3].Equals("0")) { //UPGRADE_TODO: Method 'java.lang.Integer.parseInt' was converted to 'System.Convert.ToInt32' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" combiningClasses[(int)System.Convert.ToInt32(f[0], 16)] = f[3]; } } else { // Skip code points > 0xffff } } } r.Close(); } // Recursively apply compatibility mappings while (true) { bool replaced = false; IEnumerator i = new HashSet(compatibility.Keys).GetEnumerator(); while (i.MoveNext()) { string k = (string)i.Current; string v = (string)compatibility[k]; string d = decompose(v, compatibility); if (!d.Equals(v)) { replaced = true; compatibility[k] = d; } } if (!replaced) { break; } } // Eliminate duplicate mappings SortedList compatibilityKeys = new SortedList(); ArrayList compatibilityMappings = new ArrayList(); { IEnumerator i = new HashSet(compatibility.Keys).GetEnumerator(); while (i.MoveNext()) { string k = (string)i.Current; string v = (string)compatibility[k]; int index = compatibilityMappings.IndexOf(v); if (index == -1) { index = compatibilityMappings.Count; compatibilityMappings.Add(v); } compatibilityKeys[k] = (int)index; } } // Create composition tables SortedList firstMap = new SortedList(); SortedList secondMap = new SortedList(); { IEnumerator i = new HashSet(canonical.Keys).GetEnumerator(); while (i.MoveNext()) { string k = (string)i.Current; string v = (string)canonical[k]; string[] s = split(v, ' '); if (s.Length == 2) { // If both characters have the same combining class, they // won't be combined (in the sequence AB, B is blocked from // A if both have the same combining class) string cc1 = (string)combiningClasses[(int)System.Convert.ToInt32(s[0], 16)]; string cc2 = (string)combiningClasses[(int)System.Convert.ToInt32(s[1], 16)]; if (cc1 != null || (cc1 != null && cc1.Equals(cc2))) { // Ignore this composition // TODO check this //i.remove(); canonical.Remove(k); continue; } if (firstMap.ContainsKey(s[0])) { int c = (int)firstMap[s[0]]; firstMap[s[0]] = (int)(c + 1); } else { firstMap[s[0]] = 1; } if (secondMap.ContainsKey(s[1])) { int c = (int)secondMap[s[1]]; secondMap[s[1]] = (int)(c + 1); } else { secondMap[s[1]] = 1; } } else if (s.Length > 2) { Console.WriteLine("? wrong canonical mapping for " + k); System.Environment.Exit(1); } } } SortedList singleFirstComposition = new SortedList(); SortedList singleSecondComposition = new SortedList(); SortedList complexComposition = new SortedList(); int composeLookupMax = 0; { IEnumerator i = new HashSet(canonical.Keys).GetEnumerator(); while (i.MoveNext()) { string k = (string) i.Current; string v = (string) canonical[k]; string[] s = split(v, ' '); if (s.Length == 2) { // TODO, check this int first = 0; if(firstMap.Contains(s[0])) first = (int) firstMap[s[0]]; int second = 0; if (secondMap.Contains(s[1])) second = (int) secondMap[s[1]]; // TODO, check this if (first == 1) { singleFirstComposition[s[0]] = new string[] { s[1], k }; composeLookupMax = System.Math.Max(composeLookupMax, System.Convert.ToInt32(s[0], 16)); } else if (second == 1) { singleSecondComposition[s[1]] = new string[] { s[0], k }; composeLookupMax = System.Math.Max(composeLookupMax, System.Convert.ToInt32(s[1], 16)); } else { if (complexComposition.ContainsKey(s[0])) { SortedList m = (SortedList)complexComposition[s[0]]; if (m.ContainsKey(s[1])) { Console.WriteLine("? ambiguous canonical mapping for " + s[0]); System.Environment.Exit(1); } m[s[1]] = k; } else { SortedList m = new SortedList(); m[s[1]] = k; complexComposition[s[0]] = m; } composeLookupMax = System.Math.Max(composeLookupMax, System.Convert.ToInt32(s[0], 16)); composeLookupMax = System.Math.Max(composeLookupMax, System.Convert.ToInt32(s[1], 16)); } } } } Console.WriteLine("Generating CombiningClass.cs file..."); // Dump combining classes { StreamWriter w = new StreamWriter("CombiningClass.cs", false, Encoding.Default); w.WriteLine("// Do not edit !!!"); w.WriteLine("// this file is generated automatically"); w.WriteLine(); w.WriteLine("public class CombiningClass"); w.WriteLine("{"); w.WriteLine("\tpublic static readonly int[,] c = new int[,] {"); System.Text.StringBuilder index = new System.Text.StringBuilder(); int count = 0; for (int i = 0; i < 256; i++) { bool empty = true; StringBuilder page = new StringBuilder(); page.Append(" { /* Page " + i + " */"); for (int j = 0; j < 256; j++) { int c = (int)((i << 8) + j); string cc = (string)combiningClasses[c]; if (0 == (j & 31)) { page.Append("\r\n "); } if (cc == null) { page.Append("0, "); } else { page.Append(cc + ", "); empty = false; } } page.Append("\r\n },"); index.Append(" "); if (!empty) { w.WriteLine(page.ToString()); index.Append(count++); index.Append(",\r\n"); } else { index.Append("-1,\r\n"); } } w.WriteLine(" };\r\n"); w.WriteLine("\tpublic static readonly int[] i = new int[] {"); w.Write(index.ToString()); w.WriteLine(" };"); w.WriteLine("}"); w.Close(); } //Console.WriteLine(" Ok."); Console.WriteLine("Generating DecompositionKeys.cs file..."); // Dump compatibility decomposition { StreamWriter w = new StreamWriter("DecompositionKeys.cs", false, Encoding.Default); w.WriteLine("// Do not edit !!!"); w.WriteLine("// this file is generated automatically"); w.WriteLine(); w.WriteLine("public class DecompositionKeys"); w.WriteLine("{"); w.WriteLine("\tpublic static readonly int[] k = new int[] {"); IEnumerator i = new HashSet(compatibilityKeys.Keys).GetEnumerator(); while (i.MoveNext()) { string k = (string)i.Current; int index = ((int)compatibilityKeys[k]); w.WriteLine(" '\\u" + k + "', " + index + ","); } w.WriteLine(" };"); w.WriteLine("}"); w.Close(); } //Console.WriteLine(" Ok."); Console.WriteLine("Generating DecompositionMappings.cs file..."); { StreamWriter w = new StreamWriter("DecompositionMappings.cs", false, Encoding.Default); w.WriteLine("// Do not edit !!!"); w.WriteLine("// this file is generated automatically"); w.WriteLine(); w.WriteLine("public class DecompositionMappings"); w.WriteLine("{"); w.WriteLine("\tpublic static readonly string[] m = new string[] {"); IEnumerator i = compatibilityMappings.GetEnumerator(); while (i.MoveNext()) { string m = (string)i.Current; w.WriteLine(" \"" + toString(m) + "\","); } w.WriteLine(" };"); w.WriteLine("}"); w.Close(); } //Console.WriteLine(" Ok."); Console.WriteLine("Generating Composition.cs file..."); // Dump canonical composition { StreamWriter w = new StreamWriter("Composition.cs", false, Encoding.Default); w.WriteLine("// Do not edit !!!"); w.WriteLine("// this file is generated automatically"); w.WriteLine(); w.WriteLine("public class Composition"); w.WriteLine("{"); IEnumerator i; int index = 0; SortedList indices = new SortedList(); i = new HashSet(complexComposition.Keys).GetEnumerator(); while (i.MoveNext()) { string s0 = (string)i.Current; indices[(int)System.Convert.ToInt32(s0, 16)] = (int)index; index++; } int multiSecondStart = index; w.WriteLine("\t/* jagged Array */"); w.WriteLine("\tpublic static readonly char[][] multiFirst = new char[][] {"); //w.WriteLine(" public final static char[][] multiFirst = new char[][] {"); i = new HashSet(complexComposition.Keys).GetEnumerator(); while (i.MoveNext()) { string s0 = (string)i.Current; SortedList m = (SortedList)complexComposition[s0]; SortedList line = new SortedList(); int maxIndex = 1; System.Collections.IEnumerator i2 = new HashSet(m.Keys).GetEnumerator(); while (i2.MoveNext()) { string s1 = (string)i2.Current; string k = (string)m[s1]; int s1i = (int)System.Convert.ToInt32(s1, 16); if (!indices.ContainsKey(s1i)) { indices[s1i] = (int)index; index++; } line[indices[s1i]] = k; maxIndex = System.Math.Max(maxIndex, ((int)indices[s1i])); } w.Write("\tnew char[] { "); for (int j = multiSecondStart; j <= maxIndex; j++) { if (line.ContainsKey((int)j)) { string s = (string)line[(int)j]; w.Write("'" + toString(s) + "', "); } else { //w.Write(" 0, "); w.Write("'" + toString("0000") + "', "); } } w.WriteLine("},"); } w.WriteLine(" };"); int singleFirstStart = index; w.WriteLine("\tpublic static readonly char[,] singleFirst = new char[,] {"); i = new HashSet(singleFirstComposition.Keys).GetEnumerator(); while (i.MoveNext()) { string k = (string)i.Current; string[] v = ((string[])singleFirstComposition[k]); w.WriteLine(" { '" + toString(v[0]) + "', '" + toString(v[1]) + "' },"); if (indices.ContainsKey((int)System.Convert.ToInt32(k, 16))) { Console.WriteLine(k + " already indexed!"); } indices[(int)System.Convert.ToInt32(k, 16)] = (int)index; index++; } w.WriteLine(" };"); int singleSecondStart = index; w.WriteLine("\tpublic static readonly char[,] singleSecond = new char[,] {"); i = new HashSet(singleSecondComposition.Keys).GetEnumerator(); while (i.MoveNext()) { string k = (string)i.Current; string[] v = ((string[])singleSecondComposition[k]); w.WriteLine(" { '" + toString(v[0]) + "', '" + toString(v[1]) + "' },"); indices[(int)System.Convert.ToInt32(k, 16)] = (int)index; index++; } w.WriteLine(" };"); w.WriteLine("\tpublic static readonly int multiSecondStart = " + multiSecondStart + ";"); w.WriteLine("\tpublic static readonly int singleFirstStart = " + singleFirstStart + ";"); w.WriteLine("\tpublic static readonly int singleSecondStart = " + singleSecondStart + ";"); System.Text.StringBuilder compositionPages = new System.Text.StringBuilder(); w.WriteLine("\tpublic static readonly int[] composePage = new int[] {"); int pageCount = 0; for (int j = 0; j * 256 < composeLookupMax + 255; j++) { bool empty = true; StringBuilder page = new StringBuilder(); for (int k = 0; k < 256; k++) { if (k % 16 == 0) { page.Append("\r\n "); } if (indices.ContainsKey((int)(j * 256 + k))) { page.Append(indices[(int)(j * 256 + k)]); page.Append(", "); empty = false; } else { page.Append("-1, "); } } if (empty) { w.WriteLine(" -1,"); } else { w.WriteLine(" " + pageCount + ","); compositionPages.Append("\t{"); compositionPages.Append(page); compositionPages.Append("\r\n },\r\n"); pageCount++; } } w.WriteLine(" };"); //w.WriteLine("\t/* jagged Array */"); w.WriteLine("\tpublic static readonly int[,] composeData = new int[,] {"); w.Write(compositionPages); w.WriteLine(" };"); w.WriteLine("}"); w.Close(); } //Console.WriteLine(" Ok."); Console.WriteLine("Finished!"); } } }