use FileHandle;
use strict;
use Dumpvalue;
my $DEBUG = 1;
my $DUMPER = new Dumpvalue;
my $count = @ARGV;
my $ICU_DIR = shift() || '';
my $OUT_FILE = shift() || 'data.h';
my $HEADER_DIR = "$ICU_DIR/source/common/unicode";
my $UNIDATA_DIR = "$ICU_DIR/source/data/unidata";
my $YEAR = 1900+@{[localtime]}[5];
my $propNA = 0;
my $valueNA = 0;
my @TOP = qw( _bp _ep _sp _dp _mp );
my %TOP_PROPS = ( "" => [ '_bp', '_ep', '_sp', '_dp', '_mp' ] );
my %PROP_TYPE = (Binary => "_bp",
String => "_sp",
Double => "_dp",
Enumerated => "_ep",
Bitmask => "_mp");
my %UNSUPPORTED = (Composition_Exclusion => 1,
Decomposition_Mapping => 1,
Expands_On_NFC => 1,
Expands_On_NFD => 1,
Expands_On_NFKC => 1,
Expands_On_NFKD => 1,
FC_NFKC_Closure => 1,
ID_Start_Exceptions => 1,
Special_Case_Condition => 1,
);
my %MISSING_FROM_UCHAR;
my %additional_property_aliases;
my ($STRING_CLASS, $ALIAS_CLASS, $PROPERTY_CLASS) = qw(AliasName Alias Property);
if ($count < 1 || $count > 2 ||
!-d $HEADER_DIR ||
!-d $UNIDATA_DIR) {
my $me = $0;
$me =~ s|.+[/\\]||;
my $lm = ' ' x length($me);
print <<"END";
$me: Reads ICU4C headers and Unicode data files and creates
$lm a C header file that is included by genpname. The header
$lm file matches constants defined in the ICU4C headers with
$lm property|value aliases in the Unicode data files.
Usage: $me <icu_dir> [<out_file>]
<icu_dir> ICU4C root directory, containing
source/common/unicode/uchar.h
source/common/unicode/uscript.h
source/data/unidata/Blocks.txt
source/data/unidata/PropertyAliases.txt
source/data/unidata/PropertyValueAliases.txt
<out_file> File name of header to be written;
default is 'data.h'.
The Unicode versions of all input files must match.
END
exit(1);
}
my ($h, $version) = readAndMerge($HEADER_DIR, $UNIDATA_DIR);
if ($DEBUG) {
print "Merged hash:\n";
for my $key (sort keys %$h) {
my $hh = $h->{$key};
for my $subkey (sort keys %$hh) {
print "$key:$subkey:", $hh->{$subkey}, "\n";
}
}
}
my $out = new FileHandle($OUT_FILE, 'w');
die "Error: Can't write to $OUT_FILE: $!" unless (defined $out);
my $save = select($out);
formatData($h, $version);
select($save);
$out->close();
exit(0);
sub isIgnoredProperty {
local $_ = shift;
/^Other_/i || /^Non_Break$/i;
}
sub isPseudoProperty {
$_[0] eq 'qc' ||
$_[0] eq 'binprop';
}
sub formatData {
my $h = shift;
my $version = shift;
my $date = scalar localtime();
print <<"END";
/**
* Copyright (C) 2002-$YEAR, International Business Machines Corporation and
* others. All Rights Reserved.
*
* MACHINE GENERATED FILE. !!! Do not edit manually !!!
*
* Generated from
* uchar.h
* uscript.h
* Blocks.txt
* PropertyAliases.txt
* PropertyValueAliases.txt
*
* Date: $date
* Unicode version: $version
* Script: $0
*/
END
print "/* Unicode version $version */\n";
my @v = split(/\./, $version);
push @v, '0' while (@v < 4);
for (my $i=0; $i<@v; ++$i) {
print "const uint8_t VERSION_$i = $v[$i];\n";
}
print "\n";
my %strings;
for my $prop (sort keys %$h) {
my $hh = $h->{$prop};
for my $enum (sort keys %$hh) {
my @a = split(/\|/, $hh->{$enum});
for (@a) {
$strings{$_} = 1 if (length($_));
}
}
}
my @strings = sort keys %strings;
unshift @strings, "";
print "const int32_t STRING_COUNT = ", scalar @strings, ";\n\n";
my %stringToID;
print "/* to be sorted */\n";
print "const $STRING_CLASS STRING_TABLE[] = {\n";
for (my $i=0; $i<@strings; ++$i) {
print " $STRING_CLASS(\"$strings[$i]\", $i),\n";
$stringToID{$strings[$i]} = $i;
}
print "};\n\n";
print "/* to be filled in */\n";
print "int32_t REMAP[", scalar @strings, "];\n\n";
my @nameGroups;
my %groupToInt; for my $prop (sort keys %$h) {
my $hh = $h->{$prop};
for my $enum (sort keys %$hh) {
my $groupString = $hh->{$enum};
my $i;
if (exists $groupToInt{$groupString}) {
$i = $groupToInt{$groupString};
} else {
my @names = split(/\|/, $groupString);
die "Error: Wrong number of names in " . $groupString if (@names < 1);
$i = @nameGroups; $groupToInt{$groupString} = $i; push @nameGroups, map { $stringToID{$_} } @names;
$nameGroups[$ }
$hh->{$enum} = $i;
}
}
print "const int32_t NAME_GROUP_COUNT = ",
scalar @nameGroups, ";\n\n";
print "int32_t NAME_GROUP[] = {\n";
my $max_names = 0;
for (my $i=0; $i<@nameGroups; ) {
my @a;
my $line;
my $start = $i;
for (;;) {
my $j = $nameGroups[$i++];
$line .= "$j, ";
push @a, abs($j);
last if ($j < 0);
}
print " ",
$line,
' 'x(20-length($line)),
"/* ", sprintf("%3d", $start),
": \"", join("\", \"", map { $strings[$_] } @a), "\" */\n";
$max_names = @a if(@a > $max_names);
}
print "};\n\n";
print "#define MAX_NAMES_PER_GROUP $max_names\n\n";
for my $prop (sort keys %$h) {
next if ($prop =~ /^_/);
my $vh = $h->{$prop};
my $count = scalar keys %$vh;
print "const int32_t VALUES_${prop}_COUNT = ",
$count, ";\n\n";
print "const $ALIAS_CLASS VALUES_${prop}\[] = {\n";
for my $enum (sort keys %$vh) {
print " $ALIAS_CLASS((int32_t) $enum, ", $vh->{$enum}, "),\n";
}
print "};\n\n";
}
for my $topName (sort keys %TOP_PROPS) {
my $a = $TOP_PROPS{$topName};
my $count = 0;
for my $type (@$a) { $count += scalar keys %{$h->{$type}};
}
print "const int32_t ${topName}PROPERTY_COUNT = $count;\n\n";
print "const $PROPERTY_CLASS ${topName}PROPERTY[] = {\n";
for my $type (@$a) { my $p = $h->{$type};
for my $enum (sort keys %$p) {
my $name = $strings[$nameGroups[$p->{$enum}]];
my $valueRef = "0, NULL";
if ($type eq '_bp') {
$valueRef = "VALUES_binprop_COUNT, VALUES_binprop";
}
elsif (exists $h->{$name}) {
$valueRef = "VALUES_${name}_COUNT, VALUES_$name";
}
print " $PROPERTY_CLASS((int32_t) $enum, ",
$p->{$enum}, ", $valueRef),\n";
}
}
print "};\n\n";
}
print "/*eof*/\n";
}
sub readAndMerge {
my ($headerDir, $unidataDir) = @_;
my $h = read_uchar("$headerDir/uchar.h");
my $s = read_uscript("$headerDir/uscript.h");
my $b = read_Blocks("$unidataDir/Blocks.txt");
my $pa = {};
read_PropertyAliases($pa, "$unidataDir/PropertyAliases.txt");
read_PropertyAliases($pa, "SyntheticPropertyAliases.txt");
my $va = {};
read_PropertyValueAliases($va, "$unidataDir/PropertyValueAliases.txt");
read_PropertyValueAliases($va, "SyntheticPropertyValueAliases.txt");
my $fam = $pa->{'_family'};
delete $pa->{'_family'};
my $version = check_versions([ 'uchar.h', $h ],
[ 'Blocks.txt', $b ],
[ 'PropertyAliases.txt', $pa ],
[ 'PropertyValueAliases.txt', $va ]);
check_PropertyValueAliases($pa, $va);
if ($DEBUG) {
print "Property values hash:\n";
for my $key (sort keys %$va) {
my $hh = $va->{$key};
for my $subkey (sort keys %$hh) {
print "$key:$subkey:", $hh->{$subkey}, "\n";
}
}
}
if ($DEBUG) {
print "Script hash:\n";
for my $key (sort keys %$s) {
print "$key:", $s->{$key}, "\n";
}
}
$h->{'sc'} = $s;
merge_Blocks($h, $b);
merge_PropertyAliases($h, $pa, $fam);
merge_PropertyValueAliases($h, $va);
($h, $version);
}
sub check_versions {
my $version = '';
my $msg = '';
foreach my $a (@_) {
my $name = $a->[0];
my $h = $a->[1];
die "Error: No version found" unless (exists $h->{'_version'});
my $v = $h->{'_version'};
delete $h->{'_version'};
$v .= '.0' unless ($v =~ /\.\d+\./);
$v .= '.0' unless ($v =~ /\.\d+\./);
$msg .= "$name = $v\n";
if ($version) {
die "Error: Mismatched Unicode versions\n$msg"
unless ($version eq $v);
} else {
$version = $v;
}
}
$version;
}
sub check_PropertyValueAliases {
my ($pa, $va) = @_;
my %rev;
for (keys %$pa) { $rev{$pa->{$_}} = $_; }
for my $prop (keys %$va) {
if (!exists $rev{$prop} && !isPseudoProperty($prop)) {
print "Warning: Property $prop from PropertyValueAliases not listed in PropertyAliases\n";
}
}
}
sub merge_Blocks {
my ($h, $b) = @_;
die "Error: No blocks data in uchar.h"
unless (exists $h->{'blk'});
my $blk = $h->{'blk'};
for my $enum (keys %$blk) {
my $cp = $blk->{$enum};
if ($cp && !exists $b->{$cp}) {
die "Error: No block found at $cp in Blocks.txt";
}
$blk->{$enum} = $b->{$cp};
delete $b->{$cp};
}
my $err = '';
for my $cp (keys %$b) {
$err .= "Error: Block " . $b->{$cp} . " not listed in uchar.h\n";
}
die $err if ($err);
}
sub merge_PropertyAliases {
my ($h, $pa, $fam) = @_;
for my $k (@TOP) {
die "Error: No properties data for $k in uchar.h"
unless (exists $h->{$k});
}
for my $subh (map { $h->{$_} } @TOP) {
for my $enum (keys %$subh) {
my $long_name = $subh->{$enum};
if (!exists $pa->{$long_name}) {
die "Error: Property $long_name not found (or used more than once)";
}
my $value;
if($pa->{$long_name} =~ m|^n/a\d*$|) {
$value = "|" . $long_name;
} else {
$value = $pa->{$long_name} . "|" . $long_name;
}
if (exists $additional_property_aliases{$long_name}) {
$value .= "|" . $additional_property_aliases{$long_name};
}
$subh->{$enum} = $value;
delete $pa->{$long_name};
}
}
my @err;
for my $name (keys %$pa) {
$MISSING_FROM_UCHAR{$pa->{$name}} = 1;
if (exists $UNSUPPORTED{$name}) {
push @err, "Info: No enum for " . $fam->{$name} . " property $name in uchar.h";
} elsif (!isIgnoredProperty($name)) {
push @err, "Warning: No enum for " . $fam->{$name} . " property $name in uchar.h";
}
}
print join("\n", sort @err), "\n" if (@err);
}
sub matchesLoosely {
my ($a, $b) = @_;
$a =~ s/[\s\-_]//g;
$b =~ s/[\s\-_]//g;
$a =~ /^$b$/i;
}
sub merge_PropertyValueAliases {
my ($h, $va) = @_;
my %gcCount;
for my $prop (keys %$h) {
next if ($prop =~ /^_/);
my $prop2 = ($prop eq 'gcm') ? 'gc' : $prop;
die "Error: Can't find $prop in PropertyValueAliases.txt"
unless (exists $va->{$prop2});
my $pva = $va->{$prop2};
my $hh = $h->{$prop};
for my $enum (keys %$hh) {
my $name = $hh->{$enum};
my $n;
if (exists $pva->{$name}) {
$n = $name;
} else {
for my $a (keys %$pva) {
if ($a =~ /^$name$/i ||
$pva->{$a} =~ /^$name$/i) {
$n = $a;
last;
}
}
}
if (!$n && $prop eq 'blk') {
for my $a (keys %$pva) {
if (matchesLoosely($name, $pva->{$a}) ||
matchesLoosely($name, $a)) {
$n = $a;
last;
}
}
}
die "Error: Property value $prop:$name not found" unless ($n);
my $l = $n;
my $r = $pva->{$n};
$l = '' if ($l =~ m|^n/a\d*$|);
$r = '' if ($r =~ m|^n/a\d*$|);
$hh->{$enum} = "$l|$r";
if ($prop2 eq 'gc') {
++$gcCount{$n};
} else {
delete $pva->{$n};
}
}
}
die "Error: No ccc data"
unless exists $va->{'ccc'};
for my $ccc (keys %{$va->{'ccc'}}) {
die "Error: Can't overwrite ccc $ccc"
if (exists $h->{'ccc'}->{$ccc});
$h->{'lccc'}->{$ccc} =
$h->{'tccc'}->{$ccc} =
$h->{'ccc'}->{$ccc} = $va->{'ccc'}->{$ccc};
}
delete $va->{'ccc'};
die "Error: No True/False value aliases"
unless exists $va->{'binprop'};
for my $bp (keys %{$va->{'binprop'}}) {
$h->{'binprop'}->{$bp} = $va->{'binprop'}->{$bp};
}
delete $va->{'binprop'};
my $err = '';
for my $prop (sort keys %$va) {
my $hh = $va->{$prop};
for my $subkey (sort keys %$hh) {
if ($prop eq 'gc') {
my $n = $gcCount{$subkey};
next if ($n >= 1 && $n <= 2);
}
$err .= "Warning: Enum for value $prop:$subkey not found in uchar.h\n"
unless exists $MISSING_FROM_UCHAR{$prop};
}
}
print $err if ($err);
}
sub read_PropertyAliases {
my $hash = shift;
my $filename = shift;
my $fam = {}; $fam = $hash->{'_family'} if (exists $hash->{'_family'});
my $family;
my $in = new FileHandle($filename, 'r');
die "Error: Cannot open $filename" if (!defined $in);
while (<$in>) {
if (/PropertyAliases-(\d+\.\d+\.\d+)/i) {
die "Error: Multiple versions in $filename"
if (exists $hash->{'_version'});
$hash->{'_version'} = $1;
}
if (/^\s*\ $family = $1;
}
s/\ next unless (/\S/);
if (/^\s*(.+?)\s*;/) {
my $short = $1;
my @fields = /;\s*([^\s;]+)/g;
if (@fields < 1 || @fields > 2) {
my $number = @fields;
die "Error: Wrong number of fields ($number) in $filename at $_";
}
if ($short eq 'n/a') {
$short .= sprintf("%03d", $propNA++);
}
my $long = $fields[0];
if ($long eq 'n/a') {
$long .= sprintf("%03d", $propNA++);
}
if (exists $hash->{$long}) {
die "Error: Duplicate property $long in $filename"
}
$hash->{$long} = $short;
$fam->{$long} = $family;
if (@fields > 1) {
my $value = pop @fields;
while (@fields > 1) {
$value .= "|" . pop @fields;
}
$additional_property_aliases{$long} = $value;
}
} else {
die "Error: Can't parse $_ in $filename";
}
}
$in->close();
$hash->{'_family'} = $fam;
}
sub read_PropertyValueAliases {
my $hash = shift;
my $filename = shift;
my $in = new FileHandle($filename, 'r');
die "Error: Cannot open $filename" if (!defined $in);
while (<$in>) {
if (/PropertyValueAliases-(\d+\.\d+\.\d+)/i) {
die "Error: Multiple versions in $filename"
if (exists $hash->{'_version'});
$hash->{'_version'} = $1;
}
s/\ next unless (/\S/);
if (/^\s*(.+?)\s*;/i) {
my $prop = $1;
my @fields = /;\s*([^\s;]+)/g;
die "Error: Wrong number of fields in $filename"
if (@fields < 2 || @fields > 5);
$fields[0] .= sprintf("%03d", $valueNA++) if ($fields[0] eq 'n/a');
while (@fields > 2) {
my $f = pop @fields;
$fields[$ }
addDatum($hash, $prop, @fields);
}
else {
die "Error: Can't parse $_ in $filename";
}
}
$in->close();
$hash->{'sc'}->{'Qaac'} = 'Coptic'
unless (exists $hash->{'sc'}->{'Qaac'} || exists $hash->{'sc'}->{'Copt'});
if (!exists $hash->{'binprop'}->{'0'}) {
if (exists $hash->{'Alpha'}->{'N'}) {
$hash->{'binprop'}->{'0'} = 'N|' . $hash->{'Alpha'}->{'N'};
$hash->{'binprop'}->{'1'} = 'Y|' . $hash->{'Alpha'}->{'Y'};
} elsif (exists $hash->{'Alpha'}) {
die "Error: Unrecognized short value name for binary property 'Alpha'\n";
} else {
$hash->{'binprop'}->{'0'} = 'N|No|F|False';
$hash->{'binprop'}->{'1'} = 'Y|Yes|T|True';
}
}
}
sub read_Blocks {
my $filename = shift;
my $hash = {};
my $in = new FileHandle($filename, 'r');
die "Error: Cannot open $filename" if (!defined $in);
while (<$in>) {
if (/Blocks-(\d+\.\d+\.\d+)/i) {
die "Error: Multiple versions in $filename"
if (exists $hash->{'_version'});
$hash->{'_version'} = $1;
}
s/\ next unless (/\S/);
if (/^([0-9a-f]+)\.\.[0-9a-f]+\s*;\s*(.+?)\s*$/i) {
die "Error: Duplicate range $1 in $filename"
if (exists $hash->{$1});
$hash->{$1} = $2;
}
else {
die "Error: Can't parse $_ in $filename";
}
}
$in->close();
$hash->{'none'} = 'No Block';
$hash;
}
sub read_uscript {
my $filename = shift;
my $mode = ''; my $submode = '';
my $last = '';
my $hash = {}; my $key;
my $in = new FileHandle($filename, 'r');
die "Error: Cannot open $filename" if (!defined $in);
while (<$in>) {
if (/^(.*)\\$/) {
$last = $1;
next;
} elsif ($last) {
$_ = $last . $_;
$last = '';
}
if ($mode && $mode ne 'DEPRECATED') {
if (/^\s*\}/) {
$mode = '';
next;
}
}
if ($mode eq 'UScriptCode') {
if (m|^\s*(USCRIPT_\w+).+?/\*\s*(\w+)|) {
my ($enum, $code) = ($1, $2);
die "Error: Duplicate script $enum"
if (exists $hash->{$enum});
$hash->{$enum} = $code;
}
}
elsif ($mode eq 'DEPRECATED') {
if (/\s*\ die "Error: Nested #ifdef";
}
elsif (/\s*\ $mode = '';
}
}
elsif (!$mode) {
if (/^\s*typedef\s+enum\s+(\w+)\s*\{/ ||
/^\s*typedef\s+enum\s+(\w+)\s*$/) {
$mode = $1;
}
elsif (/^\s*\ $mode = 'DEPRECATED';
}
}
}
$in->close();
$hash;
}
sub read_uchar {
my $filename = shift;
my $mode = ''; my $submode = '';
my $last = '';
my $hash = {}; my $key;
my $in = new FileHandle($filename, 'r');
die "Error: Cannot open $filename" if (!defined $in);
while (<$in>) {
if (/^(.*)\\$/) {
$last .= $1;
next;
} elsif ($last) {
$_ = $last . $_;
$last = '';
}
if ($mode && $mode ne 'DEPRECATED') {
if (/^\s*\}/) {
$mode = '';
next;
}
}
if ($mode eq 'UProperty') {
if (/^\s*(UCHAR_\w+)\s*[,=]/ || /^\s+(UCHAR_\w+)\s*$/) {
if ($submode) {
addDatum($hash, $key, $1, $submode);
$submode = '';
} else {
}
}
elsif (m|^\s*/\*\*\s*(\w+)\s+property\s+(\w+)|i) {
die "Error: Unmatched tag $submode" if ($submode);
die "Error: Unrecognized UProperty comment: $_"
unless (exists $PROP_TYPE{$1});
$key = $PROP_TYPE{$1};
$submode = $2;
}
}
elsif ($mode eq 'UCharCategory') {
if (/^\s*(U_\w+)\s*=/) {
if ($submode) {
addDatum($hash, 'gc', $1, $submode);
$submode = '';
} else {
}
}
elsif (m|^\s*/\*\*\s*([A-Z][a-z])\s|) {
die "Error: Unmatched tag $submode" if ($submode);
$submode = $1;
}
}
elsif ($mode eq 'UCharDirection') {
if (/^\s*(U_\w+)\s*[,=]/ || /^\s+(U_\w+)\s*$/) {
if ($submode) {
addDatum($hash, $key, $1, $submode);
$submode = '';
} else {
}
}
elsif (m|/\*\*\s*([A-Z]+)\s|) {
die "Error: Unmatched tag $submode" if ($submode);
$key = 'bc';
$submode = $1;
}
}
elsif ($mode eq 'UBlockCode') {
if (m|^\s*(UBLOCK_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'blk', $1, $2);
}
}
elsif ($mode eq 'UEastAsianWidth') {
if (m|^\s*(U_EA_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'ea', $1, $2);
}
}
elsif ($mode eq 'UDecompositionType') {
if (m|^\s*(U_DT_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'dt', $1, $2);
}
}
elsif ($mode eq 'UJoiningType') {
if (m|^\s*(U_JT_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'jt', $1, $2);
}
}
elsif ($mode eq 'UJoiningGroup') {
if (/^\s*(U_JG_(\w+))/) {
addDatum($hash, 'jg', $1, $2) unless ($2 eq 'COUNT');
}
}
elsif ($mode eq 'UGraphemeClusterBreak') {
if (m|^\s*(U_GCB_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'GCB', $1, $2);
}
}
elsif ($mode eq 'UWordBreakValues') {
if (m|^\s*(U_WB_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'WB', $1, $2);
}
}
elsif ($mode eq 'USentenceBreak') {
if (m|^\s*(U_SB_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'SB', $1, $2);
}
}
elsif ($mode eq 'ULineBreak') {
if (m|^\s*(U_LB_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'lb', $1, $2);
}
}
elsif ($mode eq 'UNumericType') {
if (m|^\s*(U_NT_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'nt', $1, $2);
}
}
elsif ($mode eq 'UHangulSyllableType') {
if (m|^\s*(U_HST_\w+).+?/\*\[(.+?)\]\*/|) {
addDatum($hash, 'hst', $1, $2);
}
}
elsif ($mode eq 'DEPRECATED') {
if (/\s*\ die "Error: Nested #ifdef";
}
elsif (/\s*\ $mode = '';
}
}
elsif (!$mode) {
if (/^\s*\ my ($left, $right) = ($1, $2);
if ($left eq 'U_UNICODE_VERSION') {
my $version = $right;
$version = $1 if ($version =~ /^\"(.*)\"/);
die "Error: Multiple versions in $filename"
if (defined $hash->{'_version'});
$hash->{'_version'} = $version;
}
elsif ($left =~ /U_GC_(\w+?)_MASK/) {
addDatum($hash, 'gcm', $left, $1);
}
}
elsif (/^\s*typedef\s+enum\s+(\w+)\s*\{/ ||
/^\s*typedef\s+enum\s+(\w+)\s*$/) {
$mode = $1;
}
elsif (/^\s*enum\s+(\w+)\s*\{/ ||
/^\s*enum\s+(\w+)\s*$/) {
$mode = $1;
}
elsif (/^\s*\ $mode = 'DEPRECATED';
}
}
}
$in->close();
addDatum($hash, 'NFC_QC', 'UNORM_NO', 'N');
addDatum($hash, 'NFC_QC', 'UNORM_YES', 'Y');
addDatum($hash, 'NFC_QC', 'UNORM_MAYBE', 'M');
addDatum($hash, 'NFKC_QC', 'UNORM_NO', 'N');
addDatum($hash, 'NFKC_QC', 'UNORM_YES', 'Y');
addDatum($hash, 'NFKC_QC', 'UNORM_MAYBE', 'M');
addDatum($hash, 'NFD_QC', 'UNORM_NO', 'N');
addDatum($hash, 'NFD_QC', 'UNORM_YES', 'Y');
addDatum($hash, 'NFKD_QC', 'UNORM_NO', 'N');
addDatum($hash, 'NFKD_QC', 'UNORM_YES', 'Y');
$hash;
}
sub addDatum {
my ($h, $k1, $k2, $v) = @_;
if (exists $h->{$k1}->{$k2}) {
die "Error: $k1:$k2 already set to " .
$h->{$k1}->{$k2} . ", cannot set to " . $v;
}
$h->{$k1}->{$k2} = $v;
}