Hebrew_Latin.txt   [plain text]


#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------

# Transliteration table for Hebrew
# Based on the UNGEGN table at:
#   http://www.eki.ee/wgrs/rom1_he.pdf
#
# Exceptions:
# - Accents are added to disambiguate letters
# - Combinations of dagesh, shin/sin dot that produce different
#   letters are not yet encoded.
#
# To test, open:
#   http://oss.software.ibm.com/cgi-bin/icu/tr
# Click Edit, paste in this file, Save As hebrew-latin/XXX
# (where XXX is a username)
# Now go back to the main window, and try it out.
# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2
# Paste in hebrew text in Input, and hit Transliterate.
#
# For more information, see"
#   http://oss.software.ibm.com/icu/userguide/Transliteration.html

:: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ;
:: nfkd (nfc) ;
$letterAfter = [:M:]* [:L:] ;

# move longer items here to avoid masking

ח <> ẖ ;
צ <> ẕ } $letterAfter;
ץ <> ẕ ;
ש <> ş ;
ת <> ţ ;

א <> ʼ ;
ב <> b ;
ג <> g ;
ד <> d ;
ה <> h ;
ו <> w ;
ז <> z ;
ט <> t ;
י <> y ;
כ <> k } $letterAfter;
ך <> k ;
ל <> l ;
מ <> m } $letterAfter;
ם <> m ; 
נ <> n  } $letterAfter;
ן <> n ;
ס <> s ;
ע <> ʻ ;
פ <> p } $letterAfter;
ף <> p ;
ק <> q ;
ר <> r ;

 װ > |  וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV
 ױ > | וי; # HEBREW LIGATURE YIDDISH VAV YOD
 ײ > | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD


ּ <> ̇ ; # dagesh just goes to overdot for now
ׁ <> ̌ ; # shin dot -> sh
ׂ <> ̂ ; # sin dot -> s

# points
$above = [^[:ccc=0:][:ccc=230:]]*;

‎ֲ‎ > à ;
‎ֲ‎ $1< a ($above)  ̀;

‎ָ‎ > á ;
‎ָ‎  $1 < a ($above)  ́;

‎ֱ‎ > è ;
‎ֱ‎  $1 < e ($above)  ̀;

‎ֵ‎ > é ;
‎ֵ‎  $1 < e ($above)   ́;

‎ְ‎ > e ̆ ;
‎ְ‎  $1 < e ($above)   ̆;

‎ֹ‎ > ò ;
‎ֹ‎  $1 < o ($above)   ̀;

ִ <> i ;
ֻ <> u ;
ַ <> a ;
ֶ <> e ;
ֳ <> o ;

\u05BF <>   ̄ ;

# fallbacks
ק < c ;
פ < f } $letterAfter;
ף < f ;
ז < j ;
ו < v ;
כס < x ;

:: (lower);
:: nfc (nfd) ;
:: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341    ̄ ]]);