affixpatternparser.h   [plain text]


/*
*******************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* affixpatternparser.h
*
* created on: 2015jan06
* created by: Travis Keep
*/

#ifndef __AFFIX_PATTERN_PARSER_H__
#define __AFFIX_PATTERN_PARSER_H__

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "pluralaffix.h"

U_NAMESPACE_BEGIN

class PluralRules;
class FixedPrecision;
class DecimalFormatSymbols;

/**
 * A representation of the various forms of a particular currency according
 * to some locale and usage context.
 * 
 * Includes the symbol, ISO code form, and long form(s) of the currency name
 * for each plural variation.
 */
class U_I18N_API CurrencyAffixInfo : public UMemory {
public:
    /**
     * Symbol is \u00a4; ISO form is \u00a4\u00a4;
     *  long form is \u00a4\u00a4\u00a4.
     */
    CurrencyAffixInfo();

    const UnicodeString &getSymbol() const { return fSymbol; }
    const UnicodeString &getISO() const { return fISO; }
    const PluralAffix &getLong() const { return fLong; }
    void setSymbol(const UnicodeString &symbol) {
        fSymbol = symbol;
        fIsDefault = FALSE;
    }
    void setISO(const UnicodeString &iso) {
        fISO = iso;
        fIsDefault = FALSE;
    }
    UBool
    equals(const CurrencyAffixInfo &other) const {
        return (fSymbol == other.fSymbol)
                && (fISO == other.fISO)
                && (fLong.equals(other.fLong))
                && (fIsDefault == other.fIsDefault);
    }

    /**
     * Intializes this instance.
     *
     * @param locale the locale for the currency forms.
     * @param rules The plural rules for the locale. 
     * @param currency the null terminated, 3 character ISO code of the
     * currency. If NULL, resets this instance as if it were just created.
     * In this case, the first 2 parameters may be NULL as well.
     * @param status any error returned here.
     */
    void set(
            const char *locale, const PluralRules *rules,
            const UChar *currency, UErrorCode &status);

    /**
     * Returns true if this instance is the default. That is has no real
     * currency. For instance never initialized with set()
     * or reset with set(NULL, NULL, NULL, status).
     */
    UBool isDefault() const { return fIsDefault; }

    /**
     * Adjusts the precision used for a particular currency.
     * @param currency the null terminated, 3 character ISO code of the
     * currency.
     * @param usage the usage of the currency
     * @param precision min/max fraction digits and rounding increment
     *  adjusted.
     * @params status any error reported here.
     */
    static void adjustPrecision(
            const UChar *currency, const UCurrencyUsage usage,
            FixedPrecision &precision, UErrorCode &status);

private:
    /**
     * The symbol form of the currency.
     */
    UnicodeString fSymbol;

    /**
     * The ISO form of the currency, usually three letter abbreviation.
     */
    UnicodeString fISO;

    /**
     * The long forms of the currency keyed by plural variation.
     */
    PluralAffix fLong;

    UBool fIsDefault;

};

class AffixPatternIterator;

/**
 * A locale agnostic representation of an affix pattern.
 */
class U_I18N_API AffixPattern : public UMemory {
public:

    /**
     * The token types that can appear in an affix pattern.
     */
    enum ETokenType {
        kLiteral,
        kPercent,
        kPerMill,
        kCurrency,
        kNegative,
        kPositive
    };

    /**
     * An empty affix pattern.
     */
    AffixPattern()
            : tokens(), literals(), hasCurrencyToken(FALSE),
              hasPercentToken(FALSE), hasPermillToken(FALSE),  char32Count(0) {
    }

    /**
     * Adds a string literal to this affix pattern.
     */
    void addLiteral(const UChar *, int32_t start, int32_t len);

    /**
     * Adds a token to this affix pattern. t must not be kLiteral as
     * the addLiteral() method adds literals. 
     * @param t the token type to add
     */
    void add(ETokenType t);

    /**
     * Adds a currency token with specific count to this affix pattern.
     * @param count the token count. Used to distinguish between
     *  one, two, or three currency symbols. Note that adding a currency
     *  token with count=2 (Use ISO code) is different than adding two
     *  currency tokens each with count=1 (two currency symbols).
     */
    void addCurrency(uint8_t count);

    /**
     * Makes this instance be an empty affix pattern.
     */
    void remove();

    /**
     * Provides an iterator over the tokens in this instance.
     * @param result this is initialized to point just before the
     *   first token of this instance. Caller must call nextToken()
     *   on the iterator once it is set up to have it actually point
     *   to the first token. This first call to nextToken() will return
     *   FALSE if the AffixPattern being iterated over is empty.
     * @return result
     */
    AffixPatternIterator &iterator(AffixPatternIterator &result) const;

    /**
     * Returns TRUE if this instance has currency tokens in it.
     */
    UBool usesCurrency() const {
        return hasCurrencyToken;
    }

    UBool usesPercent() const {
        return hasPercentToken;
    }

    UBool usesPermill() const {
        return hasPermillToken;
    }

    /**
     * Returns the number of code points a string of this instance
     * would have if none of the special tokens were escaped.
     * Used to compute the padding size.
     */
    int32_t countChar32() const {
        return char32Count;
    }

    /**
     * Appends other to this instance mutating this instance in place.
     * @param other The pattern appended to the end of this one.
     * @return a reference to this instance for chaining.
     */
    AffixPattern &append(const AffixPattern &other);

    /**
     * Converts this AffixPattern back into a user string.
     * It is the inverse of parseUserAffixString.
     */
    UnicodeString &toUserString(UnicodeString &appendTo) const;

    /**
     * Converts this AffixPattern back into a string.
     * It is the inverse of parseAffixString.
     */
    UnicodeString &toString(UnicodeString &appendTo) const;

    /**
     * Parses an affix pattern string appending it to an AffixPattern.
     * Parses affix pattern strings produced from using
     * DecimalFormatPatternParser to parse a format pattern. Affix patterns
     * include the positive prefix and suffix and the negative prefix
     * and suffix. This method expects affix patterns strings to be in the
     * same format that DecimalFormatPatternParser produces. Namely special
     * characters in the affix that correspond to a field type must be
     * prefixed with an apostrophe ('). These special character sequences
     * inluce minus (-), percent (%), permile (U+2030), plus (+),
     * short currency (U+00a4), medium currency (u+00a4 * 2),
     * long currency (u+a4 * 3), and apostrophe (')
     * (apostrophe does not correspond to a field type but has to be escaped
     * because it itself is the escape character).
     * Since the expansion of these special character
     * sequences is locale dependent, these sequences are not expanded in
     * an AffixPattern instance.
     * If these special characters are not prefixed with an apostrophe in
     * the affix pattern string, then they are treated verbatim just as
     * any other character. If an apostrophe prefixes a non special
     * character in the affix pattern, the apostrophe is simply ignored.
     *
     * @param affixStr the string from DecimalFormatPatternParser
     * @param appendTo parsed result appended here.
     * @param status any error parsing returned here.
     */
    static AffixPattern &parseAffixString(
            const UnicodeString &affixStr,
            AffixPattern &appendTo,
            UErrorCode &status);

    /**
     * Parses an affix pattern string appending it to an AffixPattern.
     * Parses affix pattern strings as the user would supply them.
     * In this function, quoting makes special characters like normal
     * characters whereas in parseAffixString, quoting makes special
     * characters special.
     *
     * @param affixStr the string from the user
     * @param appendTo parsed result appended here.
     * @param status any error parsing returned here.
     */
    static AffixPattern &parseUserAffixString(
            const UnicodeString &affixStr,
            AffixPattern &appendTo,
            UErrorCode &status);

    UBool equals(const AffixPattern &other) const {
        return (tokens == other.tokens)
                && (literals == other.literals)
                && (hasCurrencyToken == other.hasCurrencyToken)
                && (hasPercentToken == other.hasPercentToken)
                && (hasPermillToken == other.hasPermillToken)
                && (char32Count == other.char32Count);
    }

private:
    /*
     * Tokens stored here. Each UChar generally stands for one token. A
     * Each token is of form 'etttttttllllllll' llllllll is the length of
     * the token and ranges from 0-255. ttttttt is the token type and ranges
     * from 0-127. If e is set it means this is an extendo token (to be
     * described later). To accomodate token lengths above 255, each normal
     * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
     * the same type. Right now only kLiteral Tokens have extendo tokens.
     * Each extendo token provides the next 8 higher bits for the length.
     * If a kLiteral token is followed by 2 extendo tokens then, then the
     * llllllll of the next extendo token contains bits 8-15 of the length
     * and the last extendo token contains bits 16-23 of the length.
     */
    UnicodeString tokens;

    /*
     * The characters of the kLiteral tokens are concatenated together here.
     * The first characters go with the first kLiteral token, the next
     * characters go with the next kLiteral token etc.
     */
    UnicodeString literals;
    UBool hasCurrencyToken;
    UBool hasPercentToken;
    UBool hasPermillToken;
    int32_t char32Count;
    void add(ETokenType t, uint8_t count);

};

/**
 * An iterator over the tokens in an AffixPattern instance.
 */
class U_I18N_API AffixPatternIterator : public UMemory {
public:

    /**
     * Using an iterator without first calling iterator on an AffixPattern
     * instance to initialize the iterator results in
     * undefined behavior.
     */
    AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
    /**
     * Advances this iterator to the next token. Returns FALSE when there
     * are no more tokens. Calling the other methods after nextToken()
     * returns FALSE results in undefined behavior.
     */ 
    UBool nextToken();

    /**
     * Returns the type of token.
     */
    AffixPattern::ETokenType getTokenType() const;

    /**
     * For literal tokens, returns the literal string. Calling this for
     * other token types results in undefined behavior.
     * @param result replaced with a read-only alias to the literal string.
     * @return result
     */
    UnicodeString &getLiteral(UnicodeString &result) const;

    /**
     * Returns the token length. Usually 1, but for currency tokens may
     * be 2 for ISO code and 3 for long form.
     */
    int32_t getTokenLength() const;
private:
    int32_t nextLiteralIndex;
    int32_t lastLiteralLength;
    int32_t nextTokenIndex;
    const UnicodeString *tokens;
    const UnicodeString *literals;
    friend class AffixPattern;
    AffixPatternIterator(const AffixPatternIterator &);
    AffixPatternIterator &operator=(const AffixPatternIterator &);
};

/**
 * A locale aware class that converts locale independent AffixPattern
 * instances into locale dependent PluralAffix instances.
 */
class U_I18N_API AffixPatternParser : public UMemory {
public:
AffixPatternParser();
AffixPatternParser(const DecimalFormatSymbols &symbols);
void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);

/**
 * Parses affixPattern appending the result to appendTo.
 * @param affixPattern The affix pattern.
 * @param currencyAffixInfo contains the currency forms.
 * @param appendTo The result of parsing affixPattern is appended here.
 * @param status any error returned here.
 * @return appendTo.
 */
PluralAffix &parse(
        const AffixPattern &affixPattern,
        const CurrencyAffixInfo &currencyAffixInfo,
        PluralAffix &appendTo,
        UErrorCode &status) const;

UBool equals(const AffixPatternParser &other) const {
    return (fPercent == other.fPercent)
            && (fPermill == other.fPermill)
            && (fNegative == other.fNegative)
            && (fPositive == other.fPositive);
}

private:
UnicodeString fPercent;
UnicodeString fPermill;
UnicodeString fNegative;
UnicodeString fPositive;
};


U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif  // __AFFIX_PATTERN_PARSER_H__