#-------------------------------------------------------------------- # Copyright (c) 1999-2004, International Business Machines # Corporation and others. All Rights Reserved. #-------------------------------------------------------------------- # InterIndic-Latin #\u0e00 reserved #consonants $chandrabindu=\ue001; $anusvara=\ue002; $visarga=\ue003; #\u0e004 reserved # w represents the stand-alone form $wa=\ue005; $waa=\ue006; $wi=\ue007; $wii=\ue008; $wu=\ue009; $wuu=\ue00a; $wr=\ue00b; $wl=\ue00c; $wce=\ue00d; # LETTER CANDRA E $wse=\ue00e; # LETTER SHORT E $we=\ue00f; # \u090f LETTER E $wai=\ue010; $wco=\ue011; # LETTER CANDRA O $wso=\ue012; # LETTER SHORT O $wo=\ue013; # \u0913 LETTER O $wau=\ue014; $ka=\ue015; $kha=\ue016; $ga=\ue017; $gha=\ue018; $nga=\ue019; $ca=\ue01a; $cha=\ue01b; $ja=\ue01c; $jha=\ue01d; $nya=\ue01e; $tta=\ue01f; $ttha=\ue020; $dda=\ue021; $ddha=\ue022; $nna=\ue023; $ta=\ue024; $tha=\ue025; $da=\ue026; $dha=\ue027; $na=\ue028; $ena=\ue029; #compatibility $pa=\ue02a; $pha=\ue02b; $ba=\ue02c; $bha=\ue02d; $ma=\ue02e; $ya=\ue02f; $ra=\ue030; $vva=\ue081; $rra=\ue031; $la=\ue032; $lla=\ue033; $ela=\ue034; #compatibility $va=\ue035; $sha=\ue036; $ssa=\ue037; $sa=\ue038; $ha=\ue039; #\u093a Reserved #\u093b Reserved $nukta=\ue03c; $avagraha=\ue03d; # SIGN AVAGRAHA # represents the dependent form $aa=\ue03e; $i=\ue03f; $ii=\ue040; $u=\ue041; $uu=\ue042; $rh=\ue043; $lh=\ue044; $ce=\ue045; #VOWEL SIGN CANDRA E $se=\ue046; #VOWEL SIGN SHORT E $e=\ue047; $ai=\ue048; $co=\ue049; # VOWEL SIGN CANDRA O $so=\ue04a; # VOWEL SIGN SHORT O $o=\ue04b; # \u094b $au=\ue04c; $virama=\ue04d; # \u094e Reserved # \u094f Reserved $om=\ue050; # OM \ue051>; # UNMAPPED STRESS SIGN UDATTA \ue052>; # UNMAPPED STRESS SIGN ANUDATTA \ue053>; # UNMAPPED GRAVE ACCENT \ue054>; # UNMAPPED ACUTE ACCENT $lm = \ue055;# Telugu Length Mark $ailm=\ue056;# AI Length Mark $aulm=\ue057;# AU Length Mark #urdu compatibity forms $uka=\ue058; $ukha=\ue059; $ugha=\ue05a; $ujha=\ue05b; $uddha=\ue05c; $udha=\ue05d; $ufa=\ue05e; $uya=\ue05f; $wrr=\ue060; $wll=\ue061; $rrh=\ue062; $llh=\ue063; $danda=\ue064; $doubleDanda=\ue065; $zero=\ue066; # DIGIT ZERO $one=\ue067; # DIGIT ONE $two=\ue068; # DIGIT TWO $three=\ue069; # DIGIT THREE $four=\ue06a; # DIGIT FOUR $five=\ue06b; # DIGIT FIVE $six=\ue06c; # DIGIT SIX $seven=\ue06d; # DIGIT SEVEN $eight=\ue06e; # DIGIT EIGHT $nine=\ue06f; # DIGIT NINE # \u0970>; # UNMAPPED ABBREVIATION SIGN $depVowelAbove=[\ue03e-\ue040\ue045-\ue04c]; $depVowelBelow=[\ue041-\ue044]; # $x was originally called '&'; $z was '%' $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co]; $z=[bcdfghjklmnpqrstvwxyz]; $vowels=[aeiour\u0304\u0325\u0306]; $forceIndependentMatra = [^[[:L:][\u0300-\u034c]]]; ###################################################################### # convert from Native letters to Latin letters ###################################################################### #transliterations for anusvara $anusvara} [$ka$kha$ga$gha$nga] > n\u0307; $anusvara} [$ca$cha$ja$jha$nya] > n\u0304; $anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323; $anusvara} [$ta$tha$da$dha$na] > n ; $anusvara} [$pa$pha$ba$bha$ma] > m ; $anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ; $anusvara> m\u0307; # Urdu compatibility $ya$nukta}$x > y\u0307 ; $ya$nukta$virama > y\u0307 ; $ya$nukta > y\u0307a ; $la$nukta }$x > l\u0331 ; $la$nukta$virama > l\u0331 ; $la$nukta > l\u0331a ; $na$nukta }$x > n\u0331 ; $na$nukta$virama > n\u0331 ; $na$nukta > n\u0331a ; $ena }$x > n\u0331 ; $ena$virama > n\u0331 ; $ena > n\u0331a ; $uka > qa ; $ka$nukta }$x > q ; $ka$nukta$virama > q ; $ka$nukta > qa ; $kha$nukta }$x > k\u0331h\u0331 ; $kha$nukta$virama > k\u0331h\u0331 ; $kha$nukta > k\u0331h\u0331a ; $ukha$virama > k\u0331h\u0331; $ukha > k\u0331h\u0331a; $ugha > g\u0307a ; $ga$nukta }$x > g\u0307 ; $ga$nukta$virama > g\u0307 ; $ga$nukta > g\u0307a ; $ujha > za ; $ja$nukta }$x > z ; $ja$nukta$virama > z ; $ja$nukta > za ; $ddha$nukta}$x > r\u0323h ; $ddha$nukta$virama > r\u0323h ; $ddha$nukta > r\u0323ha; $uddha}$x > r\u0323 ; $uddha$virama > r\u0323 ; $uddha > r\u0323a; $udha > r\u0323a ; $dda$nukta}$x > r\u0323 ; $dda$nukta$virama > r\u0323 ; $dda$nukta > r\u0323a ; $pha$nukta }$x > f ; $pha$nukta$virama > f ; $pha$nukta > fa ; $ufa }$x > f ; $ufa$virama > f ; $ufa > fa ; $ra$nukta}$x > r\u0331; $ra$nukta$virama > r\u0331; $ra$nukta > r\u0331a; $lla$nukta}$x > l\u0331; $lla$nukta$virama > l\u0331; $lla$nukta > l\u0331a; $ela}$x > l\u0331; $ela$virama > l\u0331; $ela > l\u0331a; $uya}$x > y\u0307; $uya$virama > y\u0307; $uya > y\u0307a; # normal consonants $ka$virama}$ha>k''; $ka}$x>k; $ka$virama>k; $ka>ka; $kha}$x>kh; $kha$virama>kh; $kha>kha; $ga$virama}$ha>g''; $ga}$x>g; $ga$virama>g; $ga>ga; $gha}$x>gh; $gha$virama>gh; $gha>gha; $nga}$x>n\u0307; $nga$virama>n\u0307; $nga>n\u0307a ; $ca$virama}$ha>c''; $ca}$x>c; $ca$virama>c; $ca>ca; $cha}$x>ch; $cha$virama>ch; $cha>cha; $ja$virama}$ha>j''; $ja}$x>j; $ja$virama>j; $ja>ja; $jha}$x>jh; $jha$virama>jh; $jha>jha; $nya }$x>n\u0303 ; $nya$virama>n\u0303; $nya > n\u0303a ; $tta$virama}$ha>t\u0323''; $tta}$x>t\u0323; $tta$virama>t\u0323; $tta>t\u0323a; $ttha}$x>t\u0323h; $ttha$virama>t\u0323h; $ttha>t\u0323ha; $dda}$x$ha>d\u0323''; $dda}$x>d\u0323; $dda$virama>d\u0323; $dda>d\u0323a; $ddha}$x>d\u0323h; $ddha$virama>d\u0323h; $ddha>d\u0323ha; $nna}$x>n\u0323 ; $nna$virama>n\u0323; $nna>n\u0323a ; $ta$virama}$ha>t''; $ta$virama}$ttha>t''; $ta$virama}$tta>t''; $ta$virama}$tha>t''; $ta}$x>t; $ta$virama>t; $ta>ta; $tha}$x>th; $tha$virama>th; $tha>tha; $da$virama}$ha>d''; $da$virama}$ddha>d''; $da$virama}$dda>d''; $da$virama}$dha>d''; $da}$x>d; $da$virama>d; $da>da; $dha}$x>dh; $dha$virama>dh; $dha>dha; $na$virama}$ga>n''; $na$virama}$ya>n''; $na}$x>n; $na$virama>n; $na>na; $pa$virama}$ha>p''; $pa}$x>p; $pa$virama>p; $pa>pa; $pha}$x>ph; $pha$virama>ph; $pha>pha; $ba$virama}$ha>b''; $ba}$x>b; $ba$virama>b; $ba>ba; $bha}$x>bh; $bha$virama>bh; $bha>bha; $ma$virama}$ma>m''; $ma}$x>m; $ma$virama>m; $ma>ma; $ya}$x>y; $ya$virama>y; $ya>ya; $ra$virama}$ha>r''; $ra}$x>r; $ra$virama>r; $ra>ra; $vva$virama}$ha>w\u0307''; $vva}$x>w\u0307; $vva$virama>w\u0307; $vva>w\u0307a; $rra$virama}$ha>r\u0331''; $rra}$x>r\u0331; $rra$virama>r\u0331; $rra>r\u0331a; $la$virama}$ha>l''; $la}$x>l; $la$virama>l; $la>la; $lla$virama}$ha>l\u0323''; $lla}$x>l\u0323; $lla$virama>l\u0323; $lla>l\u0323a; $va}$x>v; $va$virama>v; $va>va; $sa$virama}$ha>s''; $sa$virama}$sha>s''; $sa$virama}$ssa>s''; $sa$virama}$sa>s''; $sa}$x>s; $sa$virama>s; #for gurmukhi $sa$nukta}$x>s\u0301; $sa$nukta$virama>s\u0301; $sa$nukta>s\u0301a; $sa>sa; $sha}$x>s\u0301; $sha$virama>s\u0301; $sha>s\u0301a; $ssa}$x>s\u0323; $ssa$virama>s\u0323; $ssa>s\u0323a; $ha}$x>h; $ha$virama>h; $ha>ha; # dependent vowels (should never occur except following consonants) $forceIndependentMatra{$aa > \u0314a\u0304 ; $forceIndependentMatra{$ai > \u0314ai ; $forceIndependentMatra{$au > \u0314au ; $forceIndependentMatra{$ii > \u0314i\u0304 ; $forceIndependentMatra{$i > \u0314i ; $forceIndependentMatra{$uu > \u0314u\u0304 ; $forceIndependentMatra{$u > \u0314u ; $forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ; $forceIndependentMatra{$rh > \u0314r\u0325 ; $forceIndependentMatra{$llh > \u0314l\u0325\u0304 ; $forceIndependentMatra{$lh > \u0314l\u0325 ; $forceIndependentMatra{$e > \u0314e\u0304 ; $forceIndependentMatra{$o > \u0314o\u0304 ; #extra vowels $forceIndependentMatra{$ce > \u0314e\u0306 ; $forceIndependentMatra{$co > \u0314o\u0306 ; $forceIndependentMatra{$se > \u0314e ; $forceIndependentMatra{$so > \u0314o ; $forceIndependentMatra{$nukta >; # Nukta cannot appear independently or as first character $forceIndependentMatra{$virama >; # Virama cannot appear independently or as first character $aa > a\u0304 ; $ai > ai ; $au > au ; $ii > i\u0304 ; $i > i ; $uu > u\u0304 ; $u > u ; $rrh > r\u0325\u0304 ; $rh > r\u0325 ; $llh > l\u0325\u0304 ; $lh > l\u0325 ; $e > e\u0304 ; $o > o\u0304 ; #extra vowels $ce > e\u0306 ; $co > o\u0306 ; $se > e ; $so > o ; #dependent vowels when following independent vowels. Generally Illegal only for roundtripping $waa} $x > a\u0304\u0314 ; $wai} $x > ai\u0314 ; $wau} $x > au\u0314 ; $wii} $x > i\u0304\u0314 ; $wi } $x > i\u0314 ; $wuu} $x > u\u0304\u0314 ; $wu } $x > u\u0314 ; $wrr} $x > r\u0325\u0304\u0314 ; $wr } $x > r\u0325\u0314 ; $wll} $x > l\u0325\u0304\u0314 ; $wl } $x > l\u0325\u0314 ; $we } $x > e\u0304\u0314 ; $wo } $x > o\u0304\u0314 ; $wa } $x > a\u0314 ; #extra vowels $wce} $x > e\u0306\u0314 ; $wco} $x > o\u0306\u0314 ; $wse} $x > e\u0314 ; $wso} $x > o\u0314 ; $om} $x > ''om\u0314 ; # independent vowels when preceeded by vowels $vowels{$waa > ''a\u0304 ; $vowels{$wai > ''ai ; $vowels{$wau > ''au ; $vowels{$wii > ''i\u0304 ; $vowels{$wi > ''i ; $vowels{$wuu > ''u\u0304 ; $vowels{$wu > ''u ; $vowels{$wrr > ''r\u0325\u0304 ; $vowels{$wr > ''r\u0325 ; $vowels{$wll > ''l\u0325\u0304 ; $vowels{$wl > ''l\u0325 ; $vowels{$we > ''e\u0304 ; $vowels{$wo > ''o\u0304 ; $vowels{$wa > ''a ; #extra vowels $vowels{$wce > ''e\u0306 ; $vowels{$wco > ''o\u0306 ; $vowels{$wse > ''e ; $vowels{$wso > ''o ; # independent vowels (otherwise) $waa > a\u0304 ; $wai > ai ; $wau > au ; $wii > i\u0304 ; $wi > i ; $wuu > u\u0304 ; $wu > u ; $wrr > r\u0325\u0304 ; $wr > r\u0325 ; $wll > l\u0325\u0304 ; $wl > l\u0325 ; $we > e\u0304 ; $wo > o\u0304 ; $wa > a ; #extra vowels $wce > e\u0306 ; $wco > o\u0306 ; $wse > e ; $wso > o ; $om > ''om ; #stress marks $avagraha > \u0315; $chandrabindu$anusvara>\u0303; $chandrabindu > m\u0310; $visarga>h\u0323; #numbers $zero > 0; $one > 1; $two > 2; $three > 3; $four > 4; $five > 5; $six > 6; $seven > 7; $eight > 8; $nine > 9; $lm >; $ailm >; $aulm >; $danda>'.'; $doubleDanda>'.'; \ue070>; # ABBREVIATION SIGN # LETTER RA WITH MIDDLE DIAGONAL \ue071}$x>ra; \ue071$virama>r; \ue071>ra; # LETTER RA WITH LOWER DIAGONAL \ue072}$x>ra; \ue072$virama>r; \ue072>ra; \ue073>; # RUPEE MARK \ue074>; # RUPEE SIGN \ue075>; # CURRENCY NUMERATOR ONE \ue076>; # CURRENCY NUMERATOR TWO \ue077>; # CURRENCY NUMERATOR THREE \ue078>; # CURRENCY NUMERATOR FOUR \ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR \ue07A>; # CURRENCY DENOMINATOR SIXTEEN \ue07B>; # ISSHAR \uE07C>; # TIPPI \uE07D>; # ADDAK \uE07E>; # IRI \uE07F>; # URA \uE080>; # EK ONKAR \uE004>; # DEVANAGARI VOWEL SIGN SHORT A