mk_emoji_tbl.pl   [plain text]


#!/usr/bin/perl
# script to generate Shift_JIS encoded Emoji to/from Unicode conversion table. 
# Rui Hirokawa <hirokawa@php.net>
#
# usage: mktbl.pl EmojiSources.txt
#
# Unicoe;DoCoMo;KDDI;SoftBank

@docomo = ();
@kddi = ();
@softbank = ();

@to_docomo = ();
@to_kddi = ();
@to_sb = ();

$fname = "emoji2uni.h";
open(OUT,">$fname") or die $!;

sub sjis2code {
  my @c = unpack("C*", pack("H4", $_[0]));

  # Shift_JIS -> JIS
  $c[0] = (($c[0]-($c[0]<160?112:176))<<1)-($c[1]<159?1:0);
  $c[1] -= ($c[1]<159?($c[1]>127?32:31):126); 

  $s = ($c[0] - 0x21)*94 + $c[1]-0x21;

  return $s;
}

sub show_code {
    my @c = @_;
    $s = "\t";
    for ($i=0; $i<=$#c; $i++) {#
	if ($c[$i]) {
	    @v = split(' ',$c[$i]);
	    $s .= "0x$v[0], \t";
	    if ($#v > 0) {
		print "$i $v[0] $v[1]\n";
	    }
	} else {
	    $s .= "0x0000, \t";
	}
	if ($i % 4 == 3) {
	    $s .= "\n\t";
	}
    }
    return $s;
}

while(<>) {
    if ($_ =~ /^\d+/) {
	@v = split(/;/,$_);
	if ($v[1] =~ /[\dA-F]+/) {
	    $code = &sjis2code($v[1]);
	    $docomo{$code} = $v[0];
	    $to_docomo{$v[0]} = $code;
	}
	if ($v[2] =~ /[\dA-F]+/) {
	    $code = &sjis2code($v[2]);
	    $kddi{$code} = $v[0];
	    $to_kddi{$v[0]} = $code;
	}
	if ($v[3] =~ /[\dA-F]+/) {
	    $code = &sjis2code($v[3]);
	    $softbank{$code} = $v[0];
	    $to_sb{$v[0]} = $code;
	}
    }
}

print "DoCoMo\n";

$docomo_min = 10434;
$docomo_max = 10434+281;
@docomo_v = ();

foreach $key (sort {hex($a) <=> hex($b)} keys(%docomo)) {
    $s = $key;
    $pos = $s % 94;
    $ku = ($s - $pos)/94;
    $v = $key - $docomo_min;
    #print "$ku:$pos - ". $v ."=> $docomo{$key}\n";
    $docomo_v[$key-$docomo_min] = $docomo{$key};
}

$to_docomo_min = 10434;

$to_docomo_min1 = 0x0023;
$to_docomo_max1 = 0x00AE;
$to_docomo_min2 = 0x203C;
$to_docomo_max2 = 0x3299;
$to_docomo_min3 = 0x1F17F;
$to_docomo_max3 = 0x1F6BB;

@r_docomo1_key = ();
@r_docomo1_val = ();

@r_docomo2_key = ();
@r_docomo2_val = ();

@r_docomo3_key = ();
@r_docomo3_val = ();


foreach $key (sort {hex($a) <=> hex($b)} keys(%to_docomo)) {
    $s = $to_docomo{$key};

    $pos = $s % 94;
    $ku = ($s - $pos)/94;
    $v = $to_docomo{$key} - $to_docomo_min;
    $h = sprintf("%x",$s);
    #print "$ku:$pos = $h ($v) <= $key\n";
    if (hex($key) <= $to_docomo_max1) {
	push(@r_docomo1_key, $key);
	push(@r_docomo1_val, sprintf("%x", $to_docomo{$key}));
    } elsif (hex($key) <= $to_docomo_max2) {
	push(@r_docomo2_key, $key);
	push(@r_docomo2_val, $h);
    } elsif (hex($key) >= $to_docomo_max3) {
	push(@r_docomo3_key, $key);
	push(@r_docomo3_val, $h);
    }
}

push(@r_docomo1_key, 0x00);
push(@r_docomo1_val, 0x00);
push(@r_docomo2_key, 0x00);
push(@r_docomo2_val, 0x00);
push(@r_docomo3_key, 0x00);
push(@r_docomo3_val, 0x00);

print OUT "int mb_tbl_code2uni_docomo_min = $docomo_min;\n"; 
print OUT "int mb_tbl_code2uni_docomo_max = $docomo_max;\n\n"; 

print OUT "int mb_tbl_code2uni_docomo[] = {\n";
print OUT &show_code(@docomo_v);
print OUT "};\n\n";

print OUT "int mb_tbl_uni_docomo2code_min1 = $to_docomo_min1;\n"; 
print OUT "int mb_tbl_uni_docomo2code_max1 = $to_docomo_max1;\n"; 
print OUT "int mb_tbl_uni_docomo2code_min2 = $to_docomo_min2;\n"; 
print OUT "int mb_tbl_uni_docomo2code_max2 = $to_docomo_max2;\n"; 
print OUT "int mb_tbl_uni_docomo2code_min3 = $to_docomo_min3;\n"; 
print OUT "int mb_tbl_uni_docomo2code_max3 = $to_docomo_max3;\n\n"; 

#print "DOCOMO reverse 1\n";

print OUT "int mb_tbl_uni_docomo2code_key1[] = {\n";
print OUT &show_code(@r_docomo1_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_docomo2code_val1[] = {\n";
print OUT &show_code(@r_docomo1_val),"\n";
print OUT "};\n\n";

#print "DOCOMO reverse 2\n";

print OUT "int mb_tbl_uni_docomo2code_key2[] = {\n";
print OUT &show_code(@r_docomo2_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_docomo2code_val2[] = {\n";
print OUT &show_code(@r_docomo2_val),"\n";
print OUT "};\n\n";

print "DOCOMO reverse 3\n";

print OUT "int mb_tbl_uni_docomo2code_key3[] = {\n";
print OUT &show_code(@r_docomo3_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_docomo2code_val3[] = {\n";
print OUT &show_code(@r_docomo3_val),"\n";
print OUT "};\n\n";

#print "DOCOMO reverse end \n";

$kddi_min1 = 9400;
$kddi_max1 = 9400+264;
$kddi_min2 = 9400+564;
$kddi_max2 = 9400+939;

@kddi_v1 = ();
@kddi_v2 = ();

#print "KDDI\n";

foreach $key (sort {hex($a) <=> hex($b)} keys(%kddi)) {
    $s = $key;
    $pos = $s % 94;
    $ku = ($s - $pos)/94;
    $v = $key - $kddi_min1;
    $h = sprintf("%x",$key);
    #print "$ku:$pos :: $v ($h) => $kddi{$key}\n";

    if ($key <= $kddi_max1) {
	$kddi_v1[$key-$kddi_min1] = $kddi{$key};
    } elsif ($key <= $kddi_max2) {
	$kddi_v2[$key-$kddi_min2] = $kddi{$key};
    }
}

$to_kddi_min = 9660;
$to_kddi_min1 = 0x0030;
$to_kddi_max1 = 0x00AE;
$to_kddi_min2 = 0x2002;
$to_kddi_max2 = 0x3299;
$to_kddi_min3 = 0x1F004;
$to_kddi_max3 = 0x1F6C0;

@r_kddi1_key = (); @r_kddi1_val = ();
@r_kddi2_key = (); @r_kddi2_val = ();
@r_kddi3_key = (); @r_kddi3_val = ();

foreach $key (sort {hex($a) <=> hex($b)} keys(%to_kddi)) {
    $s = $to_kddi{$key};

    $pos = $s % 94;
    $ku = ($s - $pos)/94;
    $v = $to_kddi{$key} - $to_kddi_min;
    $h = sprintf("%x",$s);
    #print "$ku:$pos = $h ($v) <= $key\n";
    if (hex($key) <= $to_kddi_max1) {
	push(@r_kddi1_key, $key);
	push(@r_kddi1_val, $h);
    } elsif (hex($key) <= $to_kddi_max2) {
	push(@r_kddi2_key, $key);
	push(@r_kddi2_val, $h);
    } else {
	push(@r_kddi3_key, $key);
	push(@r_kddi3_val, $h);
    }
}

push(@r_kddi1_key, 0x00);
push(@r_kddi1_val, 0x00);
push(@r_kddi2_key, 0x00);
push(@r_kddi2_val, 0x00);
push(@r_kddi3_key, 0x00);
push(@r_kddi3_val, 0x00);

print OUT "int mb_tbl_code2uni_kddi1_min = $kddi_min1;\n"; 
print OUT "int mb_tbl_code2uni_kddi1_max = $kddi_max1;\n"; 
print OUT "int mb_tbl_code2uni_kddi2_min = $kddi_min2;\n"; 
print OUT "int mb_tbl_code2uni_kddi2_max = $kddi_max2;\n\n"; 

#print "KDDI 1\n";

print OUT "int mb_tbl_code2uni_kddi1[] = {\n";
print OUT &show_code(@kddi_v1);
print OUT "};\n\n";

#print "KDDI 2\n";

print OUT "int mb_tbl_code2uni_kddi2[] = {\n";
print OUT &show_code(@kddi_v2);
print OUT "};\n\n";

print OUT "int mb_tbl_uni_kddi2code_min1 = $to_kddi_min1;\n"; 
print OUT "int mb_tbl_uni_kddi2code_max1 = $to_kddi_max1;\n"; 
print OUT "int mb_tbl_uni_kddi2code_min2 = $to_kddi_min2;\n"; 
print OUT "int mb_tbl_uni_kddi2code_max2 = $to_kddi_max2;\n"; 
print OUT "int mb_tbl_uni_kddi2code_min3 = $to_kddi_min3;\n"; 
print OUT "int mb_tbl_uni_kddi2code_max3 = $to_kddi_max3;\n\n"; 

#print "KDDI reverse 1\n";

print OUT "int mb_tbl_uni_kddi2code_key1[] = {\n";
print OUT &show_code(@r_kddi1_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_kddi2code_val1[] = {\n";
print OUT &show_code(@r_kddi1_val),"\n";
print OUT "};\n\n";

#print "KDDI reverse 1\n";

print OUT "int mb_tbl_uni_kddi2code_key2[] = {\n";
print OUT &show_code(@r_kddi2_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_kddi2code_val2[] = {\n";
print OUT &show_code(@r_kddi2_val),"\n";
print OUT "};\n\n";

#print "KDDI reverse 3\n";

print OUT "int mb_tbl_uni_kddi2code_key3[] = {\n";
print OUT &show_code(@r_kddi3_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_kddi2code_val3[] = {\n";
print OUT &show_code(@r_kddi3_val),"\n";
print OUT "};\n\n";


$sb_min1 = 10153;
$sb_max1 = 10153+177;
$sb_min2 = 10153+376;
$sb_max2 = 10153+547;
$sb_min3 = 10153+752;
$sb_max3 = 10153+901;

@sb_v1 = ();
@sb_v2 = ();
@sb_v3 = ();

if (1) {
    print "SoftBank\n";
    
    foreach $key (sort {hex($a) <=> hex($b)} keys(%softbank)) {
	$s = $key;
	$pos = $s % 94;
	$ku = ($s - $pos)/94;
	$v = $key - $sb_min1;
	$h = sprintf("%x",$key);
	#print "$ku:$pos :: $v ($h) => $softbank{$key}\n";
	if ($key <= $sb_max1) {
	    $sb_v1[$key-$sb_min1] = $softbank{$key};
	} elsif ($key <= $sb_max2) {
	    $sb_v2[$key-$sb_min2] = $softbank{$key};
	} elsif ($key <= $sb_max3) {
	    $sb_v3[$key-$sb_min3] = $softbank{$key};
	}
    }
   
}

$to_sb_min = 10263;
$to_sb_min1 = 0x0023;
$to_sb_max1 = 0x00AE;
$to_sb_min2 = 0x2122;
$to_sb_max2 = 0x3299;
$to_sb_min3 = 0x1F004;
$to_sb_max3 = 0x1F6C0;

@r_sb1_key = (); @r_sb1_val = ();
@r_sb2_key = (); @r_sb2_val = ();
@r_sb3_key = (); @r_sb3_val = ();

foreach $key (sort {hex($a) <=> hex($b)} keys(%to_sb)) {
    $s = $to_sb{$key};

    $pos = $s % 94;
    $ku = ($s - $pos)/94;
    $v = $to_sb{$key} - $to_sb_min;
    $h = sprintf("%x",$s);
    #print "$ku:$pos = $h ($v) <= $key\n";
    if (hex($key) <= $to_sb_max1) {
	push(@r_sb1_key, $key);
	push(@r_sb1_val, $h);
    } elsif (hex($key) >= $to_sb_min2 && hex($key) <= $to_sb_max2) {
	push(@r_sb2_key, $key);
	push(@r_sb2_val, $h);
    } else {
	push(@r_sb3_key, $key);
	push(@r_sb3_val, $h);
    }
}

push(@r_sb1_key, 0x00);
push(@r_sb1_val, 0x00);
push(@r_sb2_key, 0x00);
push(@r_sb2_val, 0x00);
push(@r_sb3_key, 0x00);
push(@r_sb3_val, 0x00);


print OUT "int mb_tbl_code2uni_sb1_min = $sb_min1;\n"; 
print OUT "int mb_tbl_code2uni_sb1_max = $sb_max1;\n"; 
print OUT "int mb_tbl_code2uni_sb2_min = $sb_min2;\n"; 
print OUT "int mb_tbl_code2uni_sb2_max = $sb_max2;\n"; 
print OUT "int mb_tbl_code2uni_sb3_min = $sb_min3;\n"; 
print OUT "int mb_tbl_code2uni_sb3_max = $sb_max3;\n\n"; 

#print "SoftBank 1\n";

print OUT "int mb_tbl_code2uni_sb1[] = {\n";
print OUT &show_code(@sb_v1);
print OUT "};\n\n";

#print "SoftBank 2\n";

print OUT "int mb_tbl_code2uni_sb2[] = {\n";
print OUT &show_code(@sb_v2);
print OUT "};\n\n";

#print "SoftBank 3\n";

print OUT "int mb_tbl_code2uni_sb3[] = {\n";
print OUT &show_code(@sb_v3);
print OUT "};\n\n";

print OUT "int mb_tbl_uni_sb2code_min1 = $to_sb_min1;\n"; 
print OUT "int mb_tbl_uni_sb2code_max1 = $to_sb_max1;\n"; 
print OUT "int mb_tbl_uni_sb2code_min2 = $to_sb_min2;\n"; 
print OUT "int mb_tbl_uni_sb2code_max2 = $to_sb_max2;\n"; 
print OUT "int mb_tbl_uni_sb2code_min3 = $to_sb_min3;\n"; 
print OUT "int mb_tbl_uni_sb2code_max3 = $to_sb_max3;\n\n"; 

#print "SB reverse 1\n";

print OUT "int mb_tbl_uni_sb2code_key1[] = {\n";
print OUT &show_code(@r_sb1_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_sb2code_val1[] = {\n";
print OUT &show_code(@r_sb1_val),"\n";
print OUT "};\n\n";

#print "SB reverse 2\n";

print OUT "int mb_tbl_uni_sb2code_key2[] = {\n";
print OUT &show_code(@r_sb2_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_sb2code_val2[] = {\n";
print OUT &show_code(@r_sb2_val),"\n";
print OUT "};\n\n";

#print "SB reverse 3\n";

print OUT "int mb_tbl_uni_sb2code_key3[] = {\n";
print OUT &show_code(@r_sb3_key),"\n";
print OUT "};\n\n";
print OUT "int mb_tbl_uni_sb2code_val3[] = {\n";
print OUT &show_code(@r_sb3_val),"\n";
print OUT "};\n\n";


close(OUT);