# # Copyright (C) 2002-2006, International Business Machines Corporation and others. # All Rights Reserved. # # file: char.txt # # ICU Character Break Rules, also known as Grapheme Cluster Boundaries # See Unicode Standard Annex #29. # These rules are based on TR29 Version 5.0.0 # Includes post-5.0 change to treat Japanese half width voicing marks # as Grapheme Extend. # # # Character Class Definitions. # $CR = [\p{Grapheme_Cluster_Break = CR}]; $LF = [\p{Grapheme_Cluster_Break = LF}]; $Control = [\p{Grapheme_Cluster_Break = Control}]; # add Japanese Half Width voicing marks to $Extend $VoiceMarks = [\uff9e\uff9f]; $Extend = [\p{Grapheme_Cluster_Break = Extend} $VoiceMarks]; # # Korean Syllable Definitions # $L = [\p{Grapheme_Cluster_Break = L}]; $V = [\p{Grapheme_Cluster_Break = V}]; $T = [\p{Grapheme_Cluster_Break = T}]; $LV = [\p{Grapheme_Cluster_Break = LV}]; $LVT = [\p{Grapheme_Cluster_Break = LVT}]; $HangulSyllable = $L+ | ($L* ($LV? $V+ | $LV | $LVT) $T*) | $T+; ## ------------------------------------------------- !!forward; $CR $LF; ([^$Control $CR $LF] | $HangulSyllable) $Extend*; ## ------------------------------------------------- !!reverse; $BackHangulSyllable = $L+ | ($T* ($V+$LV? | $LV | $LVT) $L*) | $T+; $BackOneCluster = ($LF $CR) | ($Extend* ([^$Control $CR $LF] | $BackHangulSyllable)); $BackOneCluster; ## ------------------------------------------------- !!safe_reverse; # rule 6, 7, 8 $V+ $L; ## ------------------------------------------------- !!safe_forward; # rule 6, 7, 8 $V+ $T;