grapheme.txt   [plain text]


#
# Copyright (C) 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved.

# file: grapheme.txt
#
# Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest
#
#
# Note: Rule syntax and the monkey test itself are still a work in progress.
#       They are expected to change with review and the addition of support for rule tailoring.

type = grapheme;      # one of grapheme | word | line | sentence
locale = en;

CR                 = [\p{Grapheme_Cluster_Break = CR}];
LF                 = [\p{Grapheme_Cluster_Break = LF}];

Control            = [[\p{Grapheme_Cluster_Break = Control}]];
Extend             = [[\p{Grapheme_Cluster_Break = Extend}]];
ZWJ                = [\p{Grapheme_Cluster_Break = ZWJ}];
Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
Prepend            = [\p{Grapheme_Cluster_Break = Prepend}];
SpacingMark        = [\p{Grapheme_Cluster_Break = SpacingMark}];

#
# Korean Syllable Definitions
#
L                  = [\p{Grapheme_Cluster_Break = L}];
V                  = [\p{Grapheme_Cluster_Break = V}];
T                  = [\p{Grapheme_Cluster_Break = T}];
LV                 = [\p{Grapheme_Cluster_Break = LV}];
LVT                = [\p{Grapheme_Cluster_Break = LVT}];

# Emoji defintions

Extended_Pict      = [:ExtPict:];

GB3:     CR LF;
GB4:     (Control | CR | LF) ÷;
GB5:     . ÷ (Control | CR | LF);

GB6:     L (L | V | LV | LVT);
GB7:     (LV | V) (V | T);
GB8:     (LVT | T) T;

GB11:    Extended_Pict Extend* ZWJ Extended_Pict;
GB9:     . (Extend | ZWJ);

GB9a:    . SpacingMark;
GB9b:    Prepend .;

# Regional Indicators, split into pairs.
#      Note that a pair of RIs that is not followed by a third RI will fall into
#      the normal rules for Extend, etc.
#
GB12:  Regional_Indicator Regional_Indicator ÷ Regional_Indicator;
GB13:  Regional_Indicator Regional_Indicator;

GB999:     . ÷;