Latin_InterIndic.txt   [plain text]


#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------

# Latin-InterIndic
    #:: NFD;
    #\u0e00 reserved
    #consonants
    $chandrabindu=\ue001;
    $anusvara=\ue002;
    $visarga=\ue003;
    #\u0e004 reserved
    # w<vowel> represents the stand-alone form
    $wa=\ue005;
    $waa=\ue006;
    $wi=\ue007;
    $wii=\ue008;
    $wu=\ue009;
    $wuu=\ue00a;
    $wr=\ue00b;
    $wl=\ue00c;
    $wce=\ue00d; # LETTER CANDRA E
    $wse=\ue00e; # LETTER SHORT E
    $we=\ue00f;  # \u090f LETTER E
    $wai=\ue010;
    $wco=\ue011; # LETTER CANDRA O
    $wso=\ue012; # LETTER SHORT O
    $wo=\ue013;  # \u0913 LETTER O
    $wau=\ue014;
    $ka=\ue015;
    $kha=\ue016;
    $ga=\ue017;
    $gha=\ue018;
    $nga=\ue019;
    $ca=\ue01a;
    $cha=\ue01b;
    $ja=\ue01c;
    $jha=\ue01d;
    $nya=\ue01e;
    $tta=\ue01f;
    $ttha=\ue020;
    $dda=\ue021;
    $ddha=\ue022;
    $nna=\ue023;
    $ta=\ue024;
    $tha=\ue025;
    $da=\ue026;
    $dha=\ue027;
    $na=\ue028;
    $ena=\ue029; #compatibility
    $pa=\ue02a;
    $pha=\ue02b;
    $ba=\ue02c;
    $bha=\ue02d;
    $ma=\ue02e;
    $ya=\ue02f;
    $ra=\ue030;
    $rra=\ue031;
    $la=\ue032;
    $lla=\ue033; 
    $ela=\ue034; #compatibility
    $va=\ue035;
    $vva=\ue081;
    $sha=\ue036;
    $ssa=\ue037;
    $sa=\ue038;
    $ha=\ue039;
#\u093a Reserved
#\u093b Reserved
    $nukta=\ue03c;
    $avagraha=\ue03d; # SIGN AVAGRAHA
    # <vowel> represents the dependent form
    $aa=\ue03e;
    $i=\ue03f;
    $ii=\ue040;
    $u=\ue041;
    $uu=\ue042;
    $rh=\ue043;
    $lh=\ue044;
    $ce=\ue045; #VOWEL SIGN CANDRA E 
    $se=\ue046; #VOWEL SIGN SHORT E
    $e=\ue047;
    $ai=\ue048;
    $co=\ue049; # VOWEL SIGN CANDRA O
    $so=\ue04a; # VOWEL SIGN SHORT O
    $o=\ue04b;  # \u094b
    $au=\ue04c;    
    $virama=\ue04d;
# \u094e Reserved
# \u094f Reserved
   $om = \ue050; # OM
# \u0951>;        # UNMAPPED STRESS SIGN UDATTA
# \u0952>;        # UNMAPPED STRESS SIGN ANUDATTA
# \u0953>;        # UNMAPPED GRAVE ACCENT
# \u0954>;        # UNMAPPED ACUTE ACCENT
    $lm = \ue055;#  Telugu Length Mark    
    $ailm=\ue056;#  AI Length Mark 
    $aulm=\ue057;#  AU Length Mark 
     #urdu compatibity forms
    $uka=\ue058;
    $ukha=\ue059;
    $ugha=\ue05a;
    $ujha=\ue05b;
    $uddha=\ue05c;
    $udha=\ue05d;
    $ufa=\ue05e;
    $uya=\ue05f;
    $wrr=\ue060;
    $wll=\ue061;
    $rrh=\ue062;
    $llh=\ue063;
    $danda=\ue064;
    $doubleDanda=\ue065;
    $zero=\ue066;     # DIGIT ZERO
    $one=\ue067;      # DIGIT ONE
    $two=\ue068;      # DIGIT TWO
    $three=\ue069;    # DIGIT THREE
    $four=\ue06a;     # DIGIT FOUR
    $five=\ue06b;     # DIGIT FIVE
    $six=\ue06c;      # DIGIT SIX
    $seven=\ue06d;    # DIGIT SEVEN
    $eight=\ue06e;    # DIGIT EIGHT
    $nine=\ue06f;     # DIGIT NINE
    # For all other scripts
    $ecp0=\ue070;
    $ecp1=\ue071;
    $ecp2=\ue072;
    $ecp3=\ue073;
    $ecp4=\ue074;
    $ecp5=\ue075;
    $ecp6=\ue076;
    $ecp7=\ue077;
    $ecp8=\ue078;
    $ecp9=\ue079;
    $ecpA=\ue07a;
    $ecpB=\ue07b;
    $ecpC=\ue07c;
    $ecpD=\ue07d;
    $ecpE=\ue07e;
    $ecpF=\ue07f;
# \u0970>; # UNMAPPED ABBREVIATION SIGN
    $depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
    $depVowelBelow=[\ue041-\ue044];
    $endThing=[$danda$doubleDanda];
    # $x was originally called '&'; $z was '%'
    $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
    $z=[bcdfghjklmnpqrstvwxyz];
    $consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];
    \u0315 > $avagraha;
    \u0303>$chandrabindu$anusvara;
    m\u0310>$chandrabindu;
    h\u0323>$visarga;
    x>$ka$virama$sa;
# convert to independent forms at start of word or syllable:
# dependent forms for roundtrip
    \u0314a\u0304>$aa;
    \u0314ai>$ai;
    \u0314au>$au;
    \u0314ii>$ii;
    \u0314i\u0304>$ii;
    \u0314i>$i;
    \u0314u\u0304>$uu;
    \u0314u>$u;
    \u0314r\u0325\u0304>$rrh;
    \u0314r\u0325>$rh;
    \u0314l\u0325\u0304>$llh;
    \u0314lh>$lh;
    \u0314l\u0325>$lh;
    \u0314e\u0304>$e;
    \u0314o\u0304>$o;
    \u0314a>;
    \u0314e\u0306>$ce;
    \u0314o\u0306>$co;
    \u0314e>$se;
    \u0314o>$so;

# preceeded by consonants
    $consonants{ a\u0304>$aa;
    $consonants{ ai>$ai;
    $consonants{ au>$au;
    $consonants{ ii>$ii;
    $consonants{ i\u0304>$ii;
    $consonants{ i>$i;
    $consonants{ u\u0304>$uu;
    $consonants{ u>$u;
    $consonants{ r\u0325\u0304>$rrh;
    $consonants{ r\u0325a>$rh;
    $consonants{ r\u0325>$rh;
    $consonants{ l\u0325\u0304>$llh;
    $consonants{ lh>$lh;
    $consonants{ l\u0325>$lh;
    $consonants{ e\u0304>$e;
    $consonants{ o\u0304>$o;
    $consonants{ e\u0306>$ce;
    $consonants{ o\u0306>$co;
    $consonants{ e>$se;
    $consonants{ o>$so;

# e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
    a\u0304>$waa;
    ai>$wai;
    au>$wau;
    i\u0304>$wii;
    i>$wi;
    u\u0304>$wuu;
    u>$wu;
    r\u0325\u0304>$wrr;
    r\u0325>$wr;
    l\u0325\u0304>$wll;
    lh>$wl;
    l\u0325>$wl;
    e\u0304>$we;
    o\u0304>$wo;
    a>$wa;
    e\u0306>$wce;
    o\u0306>$wco;
    e>$wse;
    ''om>$om;
    o>$wso;
    
    # rules for anusvara
    n}r\u0325           > $na|$virama;
    n}l\u0325           > $na|$virama;
    n}na                > $na|$virama;
    n\u0307}[kg]        > $anusvara;
    n\u0307}n\u0307     > $anusvara;
    n\u0304}[cj]        > $anusvara;
    n\u0304}n\u0303     > $anusvara;
    n\u0323}[tdn]\u0323 > $anusvara;
    n}[tdn]             > $anusvara;
    m}[pbm]             > $anusvara;
    n}[ylvshr]          > $anusvara;
    m\u0307             > $anusvara;
    
    #urdu compatibility
    q>$uka|$virama;
    k\u0331h\u0331>$ukha |$virama;
    g\u0307> $ugha | $virama;
    z > $ujha |$virama;
    f > $ufa|$virama;
    
    # dev
    y\u0307>$uya|$virama;
    l\u0331>$ela|$virama;
    n\u0331>$ena|$virama;
    n\u0307>$nga|$virama;
    n\u0303>$nya|$virama;
    n\u0323>$nna|$virama;
    t\u0323h>$ttha|$virama;
    t\u0323>$tta|$virama;
    r\u0323h>$udha|$virama;
    r\u0323>$uddha|$virama;
    d\u0323h>$ddha|$virama;
    d\u0323>$dda|$virama;
    kh>$kha|$virama;
    k>$ka|$virama;
    gh>$gha|$virama;
    g>$ga|$virama;
    ch>$cha|$virama;
    c>$ca|$virama;
    jh>$jha|$virama;
    j>$ja|$virama;
    ny>$nya|$virama;
    tth>$ttha|$virama;
    ddh>$ddha|$virama;
    th>$tha|$virama;
    t>$ta|$virama;
    dh>$dha|$virama;
    d>$da|$virama;
    n>$na|$virama;
    ph>$pha|$virama;
    p>$pa|$virama;
    bh>$bha|$virama;
    b>$ba|$virama;
    m>$ma|$virama;
    y>$ya|$virama;
    r\u0331>$rra|$virama;
    r>$ra|$virama;
    l\u0323>$lla|$virama;
    l>$la|$virama;
    v>$va|$virama;
    w\u0307>$vva|$virama;
    w>$va|$virama;
    sh>$sha|$virama;
    ss>$ssa|$virama;
    s\u0323>$ssa|$virama;
    s\u0301>$sha|$virama;
    s>$sa|$virama;
    h>$ha|$virama;
    '.'>$danda;
    $danda'.'>$doubleDanda;
    $depVowelAbove{'~'>$anusvara;
    $depVowelBelow{'~'>$chandrabindu;
# convert to dependent forms after consonant with no vowel:
# e.g. kai -> {ka}{virama}ai -> {ka}{ai}
    #$virama aa>$aa;
    $virama a\u0304>$aa;
    $virama ai>$ai;
    $virama au>$au;
    $virama ii>$ii;
    $virama i\u0304>$ii;
    $virama i>$i;
    #$virama uu>$uu;
    $virama u\u0304>$uu;
    $virama u>$u;
    #$virama rrh>$rrh;
    $virama r\u0325\u0304>$rrh;
    #$virama rh>$rh;
    $virama r\u0325a>$rh;
    $virama r\u0325>$rh;
    $virama l\u0325\u0304>$llh;
    $virama lh>$lh;
    $virama l\u0325>$lh;
    $virama e\u0304>$e;
    $virama o\u0304>$o;
    $virama a>;
    $virama e\u0306>$ce;
    $virama o\u0306>$co;
    $virama e>$se;
    $virama o>$so;


# otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
    #$virama''aa>$waa;
    $virama''a\u0304>$waa;
    $virama''ai>$wai;
    $virama''au>$wau;
    #$virama''ii>$wii;
    $virama''i\u0304>$wii;
    $virama''i>$wi;
    #$virama''uu>$wuu;
    $virama''u\u0304>$wuu;
    $virama''u>$wu;
    #$virama''rrh>$wrr;
    $virama''r\u0325\u0304>$wrr;
    #$virama''rh>$wr;
    $virama''r\u0325>$wr;
    $virama''l\u0325\u0304>$wll;
    #$virama''lh>$wl;
    $virama''l\u0325>$wl;
    $virama''e\u0304>$we;
    $virama''o\u0304>$wo;
    $virama''a>$wa;
    $virama''e\u0306>$wce;
    $virama''o\u0306>$wco;
    $virama''e>$wse;
    $virama''o>$wso;
# no virama
    ''a\u0304>$waa;
    ''ai>$wai;
    ''au>$wau;
    ''i\u0304>$wii;
    ''i>$wi;
    ''u\u0304>$wuu;
    ''u>$wu;
    ''r\u0325\u0304>$wrr;
    ''r\u0325>$wr;
    ''l\u0325\u0304>$wll;
    ''l\u0325>$wl;
    ''e\u0304>$we;
    ''o\u0304>$wo;
    ''a>$wa;
    ''e\u0306>$wce;
    ''o\u0306>$wco;
    ''e>$wse;
    ''o>$wso;

    $virama } [$z] > $virama;
    $virama } ' ' > $virama ;
    $virama}$endThing>;
    0>$zero;
    1>$one;
    2>$two;
    3>$three;
    4>$four;
    5>$five;
    6>$six;
    7>$seven;
    8>$eight;
    9>$nine;
    ''>;
    #:: NFC (NFD) ;