t_Latn_InterIndic.txt   [plain text]


 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2002, International Business Machines
// Corporation and others.  All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpicurules.bat
// Source: ../../../impl/data/Transliterator_Latin_InterIndic.txt
// Date: Sat Jul 27 10:31:07 2002
//--------------------------------------------------------------------

// Latin_InterIndic

t_Latn_InterIndic {
  Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------

// Latin-InterIndic
    //:: NFD;
    //\u0e00 reserved
    //consonants
    "$chandrabindu=\ue001;"
    "$anusvara=\ue002;"
    "$visarga=\ue003;"
    //\u0e004 reserved
    // w<vowel> represents the stand-alone form
    "$wa=\ue005;"
    "$waa=\ue006;"
    "$wi=\ue007;"
    "$wii=\ue008;"
    "$wu=\ue009;"
    "$wuu=\ue00a;"
    "$wr=\ue00b;"
    "$wl=\ue00c;"
    "$wce=\ue00d;" // LETTER CANDRA E
    "$wse=\ue00e;" // LETTER SHORT E
    "$we=\ue00f;"  // \u090f LETTER E
    "$wai=\ue010;"
    "$wco=\ue011;" // LETTER CANDRA O
    "$wso=\ue012;" // LETTER SHORT O
    "$wo=\ue013;"  // \u0913 LETTER O
    "$wau=\ue014;"
    "$ka=\ue015;"
    "$kha=\ue016;"
    "$ga=\ue017;"
    "$gha=\ue018;"
    "$nga=\ue019;"
    "$ca=\ue01a;"
    "$cha=\ue01b;"
    "$ja=\ue01c;"
    "$jha=\ue01d;"
    "$nya=\ue01e;"
    "$tta=\ue01f;"
    "$ttha=\ue020;"
    "$dda=\ue021;"
    "$ddha=\ue022;"
    "$nna=\ue023;"
    "$ta=\ue024;"
    "$tha=\ue025;"
    "$da=\ue026;"
    "$dha=\ue027;"
    "$na=\ue028;"
    "$ena=\ue029;" //compatibility
    "$pa=\ue02a;"
    "$pha=\ue02b;"
    "$ba=\ue02c;"
    "$bha=\ue02d;"
    "$ma=\ue02e;"
    "$ya=\ue02f;"
    "$ra=\ue030;"
    "$rra=\ue031;"
    "$la=\ue032;"
    "$lla=\ue033;" 
    "$ela=\ue034;" //compatibility
    "$va=\ue035;"
    "$sha=\ue036;"
    "$ssa=\ue037;"
    "$sa=\ue038;"
    "$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
    "$nukta=\ue03c;"
    "$avagraha=\ue03d;" // SIGN AVAGRAHA
    // <vowel> represents the dependent form
    "$aa=\ue03e;"
    "$i=\ue03f;"
    "$ii=\ue040;"
    "$u=\ue041;"
    "$uu=\ue042;"
    "$rh=\ue043;"
    "$lh=\ue044;"
    "$ce=\ue045;" //VOWEL SIGN CANDRA E 
    "$se=\ue046;" //VOWEL SIGN SHORT E
    "$e=\ue047;"
    "$ai=\ue048;"
    "$co=\ue049;" // VOWEL SIGN CANDRA O
    "$so=\ue04a;" // VOWEL SIGN SHORT O
    "$o=\ue04b;"  // \u094b
    "$au=\ue04c;"    
    "$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
   "$om = \ue050;" // OM
// \u0951>;        # UNMAPPED STRESS SIGN UDATTA
// \u0952>;        # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>;        # UNMAPPED GRAVE ACCENT
// \u0954>;        # UNMAPPED ACUTE ACCENT
    "$lm = \ue055;"//  Telugu Length Mark    
    "$ailm=\ue056;"//  AI Length Mark 
    "$aulm=\ue057;"//  AU Length Mark 
     //urdu compatibity forms
    "$uka=\ue058;"
    "$ukha=\ue059;"
    "$ugha=\ue05a;"
    "$ujha=\ue05b;"
    "$uddha=\ue05c;"
    "$udha=\ue05d;"
    "$ufa=\ue05e;"
    "$uya=\ue05f;"
    "$wrr=\ue060;"
    "$wll=\ue061;"
    "$rrh=\ue062;"
    "$llh=\ue063;"
    "$danda=\ue064;"
    "$doubleDanda=\ue065;"
    "$zero=\ue066;"     // DIGIT ZERO
    "$one=\ue067;"      // DIGIT ONE
    "$two=\ue068;"      // DIGIT TWO
    "$three=\ue069;"    // DIGIT THREE
    "$four=\ue06a;"     // DIGIT FOUR
    "$five=\ue06b;"     // DIGIT FIVE
    "$six=\ue06c;"      // DIGIT SIX
    "$seven=\ue06d;"    // DIGIT SEVEN
    "$eight=\ue06e;"    // DIGIT EIGHT
    "$nine=\ue06f;"     // DIGIT NINE
    // For all other scripts
    "$ecp0=\ue070;"
    "$ecp1=\ue071;"
    "$ecp2=\ue072;"
    "$ecp3=\ue073;"
    "$ecp4=\ue074;"
    "$ecp5=\ue075;"
    "$ecp6=\ue076;"
    "$ecp7=\ue077;"
    "$ecp8=\ue078;"
    "$ecp9=\ue079;"
    "$ecpA=\ue07a;"
    "$ecpB=\ue07b;"
    "$ecpC=\ue07c;"
    "$ecpD=\ue07d;"
    "$ecpE=\ue07e;"
    "$ecpF=\ue07f;"
// \u0970>; # UNMAPPED ABBREVIATION SIGN
    "$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
    "$depVowelBelow=[\ue041-\ue044];"
    "$endThing=[$danda$doubleDanda];"
    // $x was originally called '&'; $z was '%'
    "$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
    "$z=[bcdfghjklmnpqrstvwxyz];"
    "$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];"
    "\u0315 > $avagraha;"
    "\u0303>$chandrabindu$anusvara;"
    "m\u0310>$chandrabindu;"
    "h\u0323>$visarga;"
    "x>$ka$virama$sa;"
// convert to independent forms at start of word or syllable:
// dependent forms for roundtrip
    "\u0314a\u0304>$aa;"
    "\u0314ai>$ai;"
    "\u0314au>$au;"
    "\u0314ii>$ii;"
    "\u0314i\u0304>$ii;"
    "\u0314i>$i;"
    "\u0314u\u0304>$uu;"
    "\u0314u>$u;"
    "\u0314r\u0325\u0304>$rrh;"
    "\u0314r\u0325>$rh;"
    "\u0314l\u0325\u0304>$llh;"
    "\u0314lh>$lh;"
    "\u0314l\u0325>$lh;"
    "\u0314e\u0304>$e;"
    "\u0314o\u0304>$o;"
    "\u0314a>;"
    "\u0314e\u0306>$ce;"
    "\u0314o\u0306>$co;"
    "\u0314e>$se;"
    "\u0314o>$so;"

// preceeded by consonants
    "$consonants{ a\u0304>$aa;"
    "$consonants{ ai>$ai;"
    "$consonants{ au>$au;"
    "$consonants{ ii>$ii;"
    "$consonants{ i\u0304>$ii;"
    "$consonants{ i>$i;"
    "$consonants{ u\u0304>$uu;"
    "$consonants{ u>$u;"
    "$consonants{ r\u0325\u0304>$rrh;"
    "$consonants{ r\u0325a>$rh;"
    "$consonants{ r\u0325>$rh;"
    "$consonants{ l\u0325\u0304>$llh;"
    "$consonants{ lh>$lh;"
    "$consonants{ l\u0325>$lh;"
    "$consonants{ e\u0304>$e;"
    "$consonants{ o\u0304>$o;"
    "$consonants{ e\u0306>$ce;"
    "$consonants{ o\u0306>$co;"
    "$consonants{ e>$se;"
    "$consonants{ o>$so;"

// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
    "a\u0304>$waa;"
    "ai>$wai;"
    "au>$wau;"
    "i\u0304>$wii;"
    "i>$wi;"
    "u\u0304>$wuu;"
    "u>$wu;"
    "r\u0325\u0304>$wrr;"
    "r\u0325>$wr;"
    "l\u0325\u0304>$wll;"
    "lh>$wl;"
    "l\u0325>$wl;"
    "e\u0304>$we;"
    "o\u0304>$wo;"
    "a>$wa;"
    "e\u0306>$wce;"
    "o\u0306>$wco;"
    "e>$wse;"
    "''om>$om;"
    "o>$wso;"
    
    // rules for anusvara
    "n}r\u0325           > $na|$virama;"
    "n}l\u0325           > $na|$virama;"
    "n}na                > $na|$virama;"
    "n\u0307}[kg]        > $anusvara;"
    "n\u0307}n\u0307     > $anusvara;"
    "n\u0304}[cj]        > $anusvara;"
    "n\u0304}n\u0303     > $anusvara;"
    "n\u0323}[tdn]\u0323 > $anusvara;"
    "n}[tdn]             > $anusvara;"
    "m}[pbm]             > $anusvara;"
    "n}[ylvshr]          > $anusvara;"
    "m\u0307             > $anusvara;"
    
    //urdu compatibility
    "q>$uka|$virama;"
    "k\u0331h\u0331>$ukha |$virama;"
    "g\u0307> $ugha | $virama;"
    "z > $ujha |$virama;"
    "f > $ufa|$virama;"
    
    // dev
    "y\u0307>$uya|$virama;"
    "l\u0331>$ela|$virama;"
    "n\u0331>$ena|$virama;"
    "n\u0307>$nga|$virama;"
    "n\u0303>$nya|$virama;"
    "n\u0323>$nna|$virama;"
    "t\u0323h>$ttha|$virama;"
    "t\u0323>$tta|$virama;"
    "r\u0323h>$udha|$virama;"
    "r\u0323>$uddha|$virama;"
    "d\u0323h>$ddha|$virama;"
    "d\u0323>$dda|$virama;"
    "kh>$kha|$virama;"
    "k>$ka|$virama;"
    "gh>$gha|$virama;"
    "g>$ga|$virama;"
    "ch>$cha|$virama;"
    "c>$ca|$virama;"
    "jh>$jha|$virama;"
    "j>$ja|$virama;"
    "ny>$nya|$virama;"
    "tth>$ttha|$virama;"
    "ddh>$ddha|$virama;"
    "th>$tha|$virama;"
    "t>$ta|$virama;"
    "dh>$dha|$virama;"
    "d>$da|$virama;"
    "n>$na|$virama;"
    "ph>$pha|$virama;"
    "p>$pa|$virama;"
    "bh>$bha|$virama;"
    "b>$ba|$virama;"
    "m>$ma|$virama;"
    "y>$ya|$virama;"
    "r\u0331>$rra|$virama;"
    "r>$ra|$virama;"
    "l\u0323>$lla|$virama;"
    "l>$la|$virama;"
    "v>$va|$virama;"
    "w>$va|$virama;"
    "sh>$sha|$virama;"
    "ss>$ssa|$virama;"
    "s\u0323>$ssa|$virama;"
    "s\u0301>$sha|$virama;"
    "s>$sa|$virama;"
    "h>$ha|$virama;"
    "'.'>$danda;"
    "$danda'.'>$doubleDanda;"
    "$depVowelAbove{'~'>$anusvara;"
    "$depVowelBelow{'~'>$chandrabindu;"
// convert to dependent forms after consonant with no vowel:
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
    //$virama aa>$aa;
    "$virama a\u0304>$aa;"
    "$virama ai>$ai;"
    "$virama au>$au;"
    "$virama ii>$ii;"
    "$virama i\u0304>$ii;"
    "$virama i>$i;"
    //$virama uu>$uu;
    "$virama u\u0304>$uu;"
    "$virama u>$u;"
    //$virama rrh>$rrh;
    "$virama r\u0325\u0304>$rrh;"
    //$virama rh>$rh;
    "$virama r\u0325a>$rh;"
    "$virama r\u0325>$rh;"
    "$virama l\u0325\u0304>$llh;"
    "$virama lh>$lh;"
    "$virama l\u0325>$lh;"
    "$virama e\u0304>$e;"
    "$virama o\u0304>$o;"
    "$virama a>;"
    "$virama e\u0306>$ce;"
    "$virama o\u0306>$co;"
    "$virama e>$se;"
    "$virama o>$so;"


// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
    //$virama''aa>$waa;
    "$virama''a\u0304>$waa;"
    "$virama''ai>$wai;"
    "$virama''au>$wau;"
    //$virama''ii>$wii;
    "$virama''i\u0304>$wii;"
    "$virama''i>$wi;"
    //$virama''uu>$wuu;
    "$virama''u\u0304>$wuu;"
    "$virama''u>$wu;"
    //$virama''rrh>$wrr;
    "$virama''r\u0325\u0304>$wrr;"
    //$virama''rh>$wr;
    "$virama''r\u0325>$wr;"
    "$virama''l\u0325\u0304>$wll;"
    //$virama''lh>$wl;
    "$virama''l\u0325>$wl;"
    "$virama''e\u0304>$we;"
    "$virama''o\u0304>$wo;"
    "$virama''a>$wa;"
    "$virama''e\u0306>$wce;"
    "$virama''o\u0306>$wco;"
    "$virama''e>$wse;"
    "$virama''o>$wso;"
// no virama
    "''a\u0304>$waa;"
    "''ai>$wai;"
    "''au>$wau;"
    "''i\u0304>$wii;"
    "''i>$wi;"
    "''u\u0304>$wuu;"
    "''u>$wu;"
    "''r\u0325\u0304>$wrr;"
    "''r\u0325>$wr;"
    "''l\u0325\u0304>$wll;"
    "''l\u0325>$wl;"
    "''e\u0304>$we;"
    "''o\u0304>$wo;"
    "''a>$wa;"
    "''e\u0306>$wce;"
    "''o\u0306>$wco;"
    "''e>$wse;"
    "''o>$wso;"

    "$virama } [$z] > $virama;"
    "$virama } ' ' > $virama ;"
    "$virama}$endThing>;"
    "0>$zero;"
    "1>$one;"
    "2>$two;"
    "3>$three;"
    "4>$four;"
    "5>$five;"
    "6>$six;"
    "7>$seven;"
    "8>$eight;"
    "9>$nine;"
    "''>;"
    //:: NFC (NFD) ;
  }
}