TestNFKC.java [plain text]

/**
 * Copyright (C) 2004, 2005, 2006, 2007  Free Software Foundation, Inc.
 *
 * Author: Oliver Hitz
 *
 * This file is part of GNU Libidn.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 */

import gnu.inet.encoding.NFKC;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

public class TestNFKC
{
  static String stripComment(String in)
  {
    int c = in.indexOf('#');
    if (c == -1) {
      return in;
    } else {
      return in.substring(0, c);
    }
  }

  static String[] split(String in, char sep)
  {
    StringBuffer sb = new StringBuffer(in);
    int c = 0;
    for (int i = 0; i < sb.length(); i++) {
      if (sb.charAt(i) == sep) {
	c++;
      }
    }

    String out[] = new String[c+1];
    c = 0;
    int l = 0;
    for (int i = 0; i < sb.length(); i++) {
      if (sb.charAt(i) == sep) {
	if (l >= i) {
	  out[c] = "";
	} else {
	  out[c] = sb.substring(l, i);
	}
        l = i+1;
	c++;
      }
    }
    if (l < sb.length()) {
      out[c] = sb.substring(l);
    }
    return out;
  }

  static boolean containsHighChar(String in)
  {
    String[] s = split(in, ' ');
    for (int i = 0; i < s.length; i++) {
      if (s[i].length() != 4) {
	return true;
      }
    }
    return false;
  }

  static String evalUnicode(String in)
  {
    StringBuffer out = new StringBuffer();
    String[] s = split(in, ' ');
    for (int i = 0; i < s.length; i++) {
      out.append((char) Integer.parseInt(s[i], 16));
    }
    return out.toString();
  }

  static String toUnicode(String in)
  {
    StringBuffer out = new StringBuffer();
    for (int i = 0; i < in.length(); i++) {
      int c = in.charAt(i);
      out.append(Integer.toHexString(c));
      out.append(" ");
    }
    return out.toString();
  }

  public static void main(String[] args)
    throws Exception
  {
    if (args.length > 0) {
      System.out.println("Input: "+args[0]);     
      System.out.println("Output: "+NFKC.normalizeNFKC(args[0]));
    } else {
      // Check if the normalization test file exists
      File f = new File("NormalizationTest.txt");
      if (!f.exists()) {
	System.err.println("Unable to find NormalizationTest.txt.");
	System.err.println("Please download the latest version of this file from:");
	System.err.println("http://www.unicode.org/Public/UNIDATA/");
	System.exit(1);
      }

      BufferedReader r = new BufferedReader(new FileReader(f));
      
      String line;
      while (null != (line = r.readLine())) {
	line = stripComment(line);
	line = line.trim();
	if (line.length() == 0) {
	  // Empty line
	} else {
	  String[] cols = split(line, ';');
	  
	  if (!containsHighChar(cols[0]) &&
	      !containsHighChar(cols[1]) &&
	      !containsHighChar(cols[2]) &&
	      !containsHighChar(cols[3]) &&
	      !containsHighChar(cols[4])) {
	    
	    String c1 = evalUnicode(cols[0]);
	    String c2 = evalUnicode(cols[1]);
	    String c3 = evalUnicode(cols[2]);
	    String c4 = evalUnicode(cols[3]);
	    String c5 = evalUnicode(cols[4]);
	    
	    String nc1 = NFKC.normalizeNFKC(c1);
	    String nc2 = NFKC.normalizeNFKC(c2);
	    String nc3 = NFKC.normalizeNFKC(c3);
	    String nc4 = NFKC.normalizeNFKC(c4);
	    String nc5 = NFKC.normalizeNFKC(c5);
	    
	    if (!nc1.equals(c4) || !nc2.equals(c4) || !nc3.equals(c4) || !nc4.equals(c4) || !nc5.equals(c4)) {
	      System.out.println("Error at `"+line+"'");
	      System.out.println("NFKC(c1) = "+toUnicode(nc1)+", should be "+toUnicode(c4));
	      System.out.println("NFKC(c2) = "+toUnicode(nc2)+", should be "+toUnicode(c4));
	      System.out.println("NFKC(c3) = "+toUnicode(nc3)+", should be "+toUnicode(c4));
	      System.out.println("NFKC(c4) = "+toUnicode(nc4)+", should be "+toUnicode(c4));
	      System.out.println("NFKC(c5) = "+toUnicode(nc5)+", should be "+toUnicode(c4));
	      return;
	    }
	  }
	}
      }

      System.out.println("No errors detected!");
    }
  }
}