#-------------------------------------------------------------------- # Copyright (c) 1999-2004, International Business Machines # Corporation and others. All Rights Reserved. #-------------------------------------------------------------------- # For modern Greek, based on UNGEGN rules. # Rules are predicated on running NFD first, and NFC afterwards # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN # WARNING: need to add accents to both filters ### # :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ; :: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ; ::NFD (NFC) ; # Useful variables $lower = [[:latin:][:greek:] & [:Ll:]] ; $upper = [[:latin:][:greek:] & [:Lu:]] ; $accent = [[:Mn:][:Me:]] ; $macron = ̄ ; $ddot = ̈ ; $lcgvowel = [αεηιουω] ; $ucgvowel = [ΑΕΗΙΟΥΩ] ; $gvowel = [$lcgvowel $ucgvowel] ; $lcgvowelC = [$lcgvowel $accent] ; $evowel = [aeiouyAEIOUY]; $vowel = [ $evowel $gvowel] ; $beforeLower = $accent * $lower ; $gammaLike = [ΓΚΞΧγκξχϰ] ; $egammaLike = [GKXCgkxc] ; $smooth = ̓ ; $rough = ̔ ; $iotasub = ͅ ; $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ; $under = ̱; $caron = ̌; $afterLetter = [:L:] [\'$accent]* ; $beforeLetter = [\'$accent]* [:L:] ; # Fix punctuation # preserve orginal \: <> \: $under ; \? <> \? $under ; \; <> \? ; · <> \: ; # Fix any ancient characters that creep in ͂ > ́ ; ̂ > ́ ; ̀ > ́ ; $smooth > ; $rough > ; $iotasub > ; ͺ > ; # need to have these up here so the rules don't mask η <> i $under ; Η <> I $under ; Ψ } $beforeLower <> Ps ; Ψ <> PS ; ψ <> ps ; ω <> o $under ; Ω <> O $under; # at begining or end of word, convert mp to b [^[:L:]$accent] { μπ > b ; μπ } [^[:L:]$accent] > b ; [^[:L:]$accent] { [Μμ][Ππ] > B ; [Μμ][Ππ] } [^[:L:]$accent] > B ; μπ < b ; Μπ < B } $beforeLower ; ΜΠ < B ; # handle diphthongs ending with upsilon ου <> ou ; ΟΥ <> OU ; Ου <> Ou ; οΥ <> oU ; $fmaker = [aeiAEI] $under ? ; $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate $fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ; υ $1 < ( $shiftForwardVowels )* v $under ; $fmaker { υ ( $shiftForwardVowels )* } > $1 f $under; υ $1 < ( $shiftForwardVowels )* f $under ; $fmaker { Υ } $softener <> V $under ; $fmaker { Υ <> U $under ; υ <> y ; Υ <> Y ; # NORMAL α <> a ; Α <> A ; β <> v ; Β <> V ; γ } $gammaLike <> n } $egammaLike ; γ <> g ; Γ } $gammaLike <> N } $egammaLike ; Γ <> G ; δ <> d ; Δ <> D ; ε <> e ; Ε <> E ; ζ <> z ; Ζ <> Z ; θ <> th ; Θ } $beforeLower <> Th ; Θ <> TH ; ι <> i ; Ι <> I ; κ <> k ; Κ <> K ; λ <> l ; Λ <> L ; μ <> m ; Μ <> M ; ν } $gammaLike > n\' ; ν <> n ; Ν } $gammaLike <> N\' ; Ν <> N ; ξ <> x ; Ξ <> X ; ο <> o ; Ο <> O ; π <> p ; Π <> P ; ρ <> r ; Ρ <> R ; # insert separator before things that turn into s [Pp] { } [ςσΣϷϸϺϻ] > \' ; # special S variants Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L # Caron means exception # before a letter, initial ς } $beforeLetter <> s $under } $beforeLetter; σ } $beforeLetter <> s } $beforeLetter; # otherwise, after a letter = final $afterLetter { σ <> $afterLetter { s $under; $afterLetter { ς <> $afterLetter { s ; # otherwise (isolated) = initial ς <> s $under; σ <> s ; # [Pp] { Σ <> \'S ; Σ <> S ; τ <> t ; Τ <> T ; φ <> f ; Φ <> F ; χ <> ch ; Χ } $beforeLower <> Ch ; Χ <> CH ; # Completeness for ASCII # $ignore = [[:Mark:]''] * ; | ch < h ; | k < c ; | i < j ; | k < q ; | b < u } $vowel ; | b < w } $vowel ; | y < u ; | y < w ; | Ch < H ; | K < C ; | I < J ; | K < Q ; | B < W } $vowel ; | B < U } $vowel ; | Y < W ; | Y < U ; # Completeness for Greek ϐ > | β ; ϑ > | θ ; ϒ > | Υ ; ϕ > | φ ; ϖ > | π ; ϰ > | κ ; ϱ > | ρ ; ϲ > | σ ; Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL ϳ > j ; ϴ > | Θ ; ϵ > | ε ; µ > | μ ; # delete any trailing ' marks used for roundtripping < [Ππ] { \' } [Ss] ; < [Νν] { \' } $egammaLike ; ::NFC (NFD) ; # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;