rbbirb57.h   [plain text]


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
//
/*
**********************************************************************
*   Legacy version of RBBISymbolTable and RBBIRuleBuilder from ICU 57,
*   only for use by Apple RuleBasedTokenizer
**********************************************************************
*/

#ifndef RBBIRB57_H
#define RBBIRB57_H

#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/uniset.h"
#include "unicode/parseerr.h"
#include "uhash.h"
#include "uvector.h"
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
                          //    looks up references to $variables within a set.
#include "rbbidata57.h"
#include "rbbisetb57.h"
#include "rbbirb.h"



U_NAMESPACE_BEGIN

class               RBBIRuleScanner57;
struct              RBBIRuleTableEl;
class               RBBISetBuilder57;
class               RBBINode;
class               RBBITableBuilder57;



//--------------------------------------------------------------------------------
//
//   RBBISymbolTable57.    Implements SymbolTable interface that is used by the
//                       UnicodeSet parser to resolve references to $variables.
//
//--------------------------------------------------------------------------------
// class RBBISymbolTableEntry - from standard rbbirb.h


class RBBISymbolTable57 : public UMemory, public SymbolTable {
private:
    const UnicodeString      &fRules;
    UHashtable               *fHashTable;
    RBBIRuleScanner57        *fRuleScanner;

    // These next two fields are part of the mechanism for passing references to
    //   already-constructed UnicodeSets back to the UnicodeSet constructor
    //   when the pattern includes $variable references.
    const UnicodeString      ffffString;      // = "/uffff"
    UnicodeSet              *fCachedSetLookup;

public:
    //  API inherited from class SymbolTable
    virtual const UnicodeString*  lookup(const UnicodeString& s) const;
    virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
    virtual UnicodeString parseReference(const UnicodeString& text,
                                         ParsePosition& pos, int32_t limit) const;

    //  Additional Functions
    RBBISymbolTable57(RBBIRuleScanner57 *, const UnicodeString &fRules, UErrorCode &status);
    virtual ~RBBISymbolTable57();

    virtual RBBINode *lookupNode(const UnicodeString &key) const;
    virtual void      addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err);

#ifdef RBBI_DEBUG
    virtual void      rbbiSymtablePrint() const;
#else
    // A do-nothing inline function for non-debug builds.  Member funcs can't be empty
    //  or the call sites won't compile.
    int32_t fFakeField;
    #define rbbiSymtablePrint() fFakeField=0; 
#endif

private:
    RBBISymbolTable57(const RBBISymbolTable57 &other); // forbid copying of this class
    RBBISymbolTable57 &operator=(const RBBISymbolTable57 &other); // forbid copying of this class
};


//--------------------------------------------------------------------------------
//
//  class RBBIRuleBuilder57       The top-level class handling RBBI rule compiling.
//
//--------------------------------------------------------------------------------
class RBBIRuleBuilder57 : public UMemory {
public:

    //  Create a rule based break iterator from a set of rules.
    //  This function is the main entry point into the rule builder.  The
    //   public ICU API for creating RBBIs uses this function to do the actual work.
    //
    static BreakIterator * createRuleBasedBreakIterator( const UnicodeString    &rules,
                                    UParseError      *parseError,
                                    UErrorCode       &status);

public:
    // The "public" functions and data members that appear below are accessed
    //  (and shared) by the various parts that make up the rule builder.  They
    //  are NOT intended to be accessed by anything outside of the
    //  rule builder implementation.
    RBBIRuleBuilder57(const UnicodeString  &rules,
                    UParseError          *parseErr,
                    UErrorCode           &status
        );

    virtual    ~RBBIRuleBuilder57();
    char                          *fDebugEnv;        // controls debug trace output
    UErrorCode                    *fStatus;          // Error reporting.  Keeping status
    UParseError                   *fParseError;      //   here avoids passing it everywhere.
    const UnicodeString           &fRules;           // The rule string that we are compiling

    RBBIRuleScanner57             *fScanner;         // The scanner.
    RBBINode                      *fForwardTree;     // The parse trees, generated by the scanner,
    RBBINode                      *fReverseTree;     //   then manipulated by subsequent steps.
    RBBINode                      *fSafeFwdTree;
    RBBINode                      *fSafeRevTree;

    RBBINode                      **fDefaultTree;    // For rules not qualified with a !
                                                     //   the tree to which they belong to.

    UBool                         fChainRules;       // True for chained Unicode TR style rules.
                                                     // False for traditional regexp rules.

    UBool                         fLBCMNoChain;      // True:  suppress chaining of rules on
                                                     //   chars with LineBreak property == CM.

    UBool                         fLookAheadHardBreak;  // True:  Look ahead matches cause an
                                                     // immediate break, no continuing for the
                                                     // longest match.

    UBool                         fRINoChain;        // True:  suppress chaining of rules on chars
                                                     //   with (grapheme/word/line)break property == RI.

    RBBISetBuilder57              *fSetBuilder;      // Set and Character Category builder.
    UVector                       *fUSetNodes;       // Vector of all uset nodes.

    RBBITableBuilder57            *fForwardTables;   // State transition tables
    RBBITableBuilder57            *fReverseTables;
    RBBITableBuilder57            *fSafeFwdTables;
    RBBITableBuilder57            *fSafeRevTables;

    UVector                       *fRuleStatusVals;  // The values that can be returned
                                                     //   from getRuleStatus().

    RBBIDataHeader57              *flattenData();    // Create the flattened (runtime format)
                                                     // data tables..
private:
    RBBIRuleBuilder57(const RBBIRuleBuilder57 &other); // forbid copying of this class
    RBBIRuleBuilder57 &operator=(const RBBIRuleBuilder57 &other); // forbid copying of this class
};




// struct RBBISetTableEl - from standard rbbirb.h

// RBBIDebugPrintf - from standard rbbirb.h

U_NAMESPACE_END
#endif