// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // /* ********************************************************************** * Legacy version of RBBISymbolTable and RBBIRuleBuilder from ICU 57, * only for use by Apple RuleBasedTokenizer ********************************************************************** */ #ifndef RBBIRB57_H #define RBBIRB57_H #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/uniset.h" #include "unicode/parseerr.h" #include "uhash.h" #include "uvector.h" #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that // looks up references to $variables within a set. #include "rbbidata57.h" #include "rbbisetb57.h" #include "rbbirb.h" U_NAMESPACE_BEGIN class RBBIRuleScanner57; struct RBBIRuleTableEl; class RBBISetBuilder57; class RBBINode; class RBBITableBuilder57; //-------------------------------------------------------------------------------- // // RBBISymbolTable57. Implements SymbolTable interface that is used by the // UnicodeSet parser to resolve references to $variables. // //-------------------------------------------------------------------------------- // class RBBISymbolTableEntry - from standard rbbirb.h class RBBISymbolTable57 : public UMemory, public SymbolTable { private: const UnicodeString &fRules; UHashtable *fHashTable; RBBIRuleScanner57 *fRuleScanner; // These next two fields are part of the mechanism for passing references to // already-constructed UnicodeSets back to the UnicodeSet constructor // when the pattern includes $variable references. const UnicodeString ffffString; // = "/uffff" UnicodeSet *fCachedSetLookup; public: // API inherited from class SymbolTable virtual const UnicodeString* lookup(const UnicodeString& s) const; virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const; virtual UnicodeString parseReference(const UnicodeString& text, ParsePosition& pos, int32_t limit) const; // Additional Functions RBBISymbolTable57(RBBIRuleScanner57 *, const UnicodeString &fRules, UErrorCode &status); virtual ~RBBISymbolTable57(); virtual RBBINode *lookupNode(const UnicodeString &key) const; virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err); #ifdef RBBI_DEBUG virtual void rbbiSymtablePrint() const; #else // A do-nothing inline function for non-debug builds. Member funcs can't be empty // or the call sites won't compile. int32_t fFakeField; #define rbbiSymtablePrint() fFakeField=0; #endif private: RBBISymbolTable57(const RBBISymbolTable57 &other); // forbid copying of this class RBBISymbolTable57 &operator=(const RBBISymbolTable57 &other); // forbid copying of this class }; //-------------------------------------------------------------------------------- // // class RBBIRuleBuilder57 The top-level class handling RBBI rule compiling. // //-------------------------------------------------------------------------------- class RBBIRuleBuilder57 : public UMemory { public: // Create a rule based break iterator from a set of rules. // This function is the main entry point into the rule builder. The // public ICU API for creating RBBIs uses this function to do the actual work. // static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules, UParseError *parseError, UErrorCode &status); public: // The "public" functions and data members that appear below are accessed // (and shared) by the various parts that make up the rule builder. They // are NOT intended to be accessed by anything outside of the // rule builder implementation. RBBIRuleBuilder57(const UnicodeString &rules, UParseError *parseErr, UErrorCode &status ); virtual ~RBBIRuleBuilder57(); char *fDebugEnv; // controls debug trace output UErrorCode *fStatus; // Error reporting. Keeping status UParseError *fParseError; // here avoids passing it everywhere. const UnicodeString &fRules; // The rule string that we are compiling RBBIRuleScanner57 *fScanner; // The scanner. RBBINode *fForwardTree; // The parse trees, generated by the scanner, RBBINode *fReverseTree; // then manipulated by subsequent steps. RBBINode *fSafeFwdTree; RBBINode *fSafeRevTree; RBBINode **fDefaultTree; // For rules not qualified with a ! // the tree to which they belong to. UBool fChainRules; // True for chained Unicode TR style rules. // False for traditional regexp rules. UBool fLBCMNoChain; // True: suppress chaining of rules on // chars with LineBreak property == CM. UBool fLookAheadHardBreak; // True: Look ahead matches cause an // immediate break, no continuing for the // longest match. UBool fRINoChain; // True: suppress chaining of rules on chars // with (grapheme/word/line)break property == RI. RBBISetBuilder57 *fSetBuilder; // Set and Character Category builder. UVector *fUSetNodes; // Vector of all uset nodes. RBBITableBuilder57 *fForwardTables; // State transition tables RBBITableBuilder57 *fReverseTables; RBBITableBuilder57 *fSafeFwdTables; RBBITableBuilder57 *fSafeRevTables; UVector *fRuleStatusVals; // The values that can be returned // from getRuleStatus(). RBBIDataHeader57 *flattenData(); // Create the flattened (runtime format) // data tables.. private: RBBIRuleBuilder57(const RBBIRuleBuilder57 &other); // forbid copying of this class RBBIRuleBuilder57 &operator=(const RBBIRuleBuilder57 &other); // forbid copying of this class }; // struct RBBISetTableEl - from standard rbbirb.h // RBBIDebugPrintf - from standard rbbirb.h U_NAMESPACE_END #endif