unorm_it.h   [plain text]


/*
*******************************************************************************
*
*   Copyright (C) 2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  unorm_it.h
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003jan21
*   created by: Markus W. Scherer
*/

#ifndef __UNORM_IT_H__
#define __UNORM_IT_H__

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION

#include "unicode/uiter.h"
#include "unicode/unorm.h"

/**
 * Normalizing UCharIterator wrapper.
 * This internal API basically duplicates the functionality of the C++ Normalizer
 * but
 * - it actually implements a character iterator (UCharIterator)
 *   with few restrictions (see unorm_setIter())
 * - it supports UCharIterator getState()/setState()
 * - it uses lower-level APIs and buffers more text and states,
 *   hopefully resulting in higher performance
 *
 * Usage example:
 * \code
 * function(UCharIterator *srcIter) {
 *     UNormIterator *uni;
 *     UCharIterator *iter;
 *     UErrorCode errorCode;
 * 
 *     errorCode=U_ZERO_ERROR;
 *     uni=unorm_openIter(&errorCode);
 *     if(U_FAILURE(errorCode)) {
 *         // report error
 *         return;
 *     }
 * 
 *     iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode);
 *     if(U_FAILURE(errorCode)) {
 *         // report error
 *     } else {
 *         // use iter to iterate over the canonically ordered
 *         // version of srcIter's text
 *         uint32_t state;
 * 
 *         ...
 * 
 *         state=uiter_getState(iter);
 *         if(state!=UITER_NO_STATE) {
 *             // use valid state, store it, use iter some more
 *             ...
 * 
 *             // later restore iter to the saved state:
 *             uiter_setState(iter, state, &errorCode);
 * 
 *             ...
 *         }
 * 
 *         ...
 *     }
 *     unorm_closeIter(uni);
 * }
 * \endcode
 *
 * See also the ICU test suites.
 *
 * @internal
 */
struct UNormIterator;
typedef struct UNormIterator UNormIterator;

/**
 * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter
 * of unorm_openIter().
 *
 * @internal
 */
#define UNORM_ITER_SIZE 1024

/**
 * Open a normalizing iterator. Must be closed later.
 * Use unorm_setIter().
 *
 * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold
 *                 the UNormIterator if possible; can be NULL.
 * @param stackMemSize Number of bytes at stackMem; can be 0,
 *                     or should be >= UNORM_ITER_SIZE for a non-NULL stackMem.
 * @param pErrorCode ICU error code
 * @return an allocated and pre-initialized UNormIterator
 * @internal
 */
U_CAPI UNormIterator * U_EXPORT2
unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode);

/**
 * Close a normalizing iterator.
 *
 * @param uni UNormIterator from unorm_openIter()
 * @internal
 */
U_CAPI void U_EXPORT2
unorm_closeIter(UNormIterator *uni);

/**
 * Set a UCharIterator and a normalization mode for the normalizing iterator
 * to wrap. The normalizing iterator will read from the character iterator,
 * normalize the text, and in turn deliver it with its own wrapper UCharIterator
 * interface which it returns.
 *
 * The source iterator remains at its current position through the unorm_setIter()
 * call but will be used and moved as soon as the
 * the returned normalizing iterator is.
 *
 * The returned interface pointer is valid for as long as the normalizing iterator
 * is open and until another unorm_setIter() call is made on it.
 *
 * The normalizing iterator's UCharIterator interface has the following properties:
 * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX
 * - getState() will return UITER_NO_STATE for unknown states for positions
 *              that are not at normalization boundaries
 *
 * @param uni UNormIterator from unorm_openIter()
 * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator.
 *             Must support getState() and setState().
 * @param mode The normalization mode.
 * @param pErrorCode ICU error code
 * @return an alias to the normalizing iterator's UCharIterator interface
 * @internal
 */
U_CAPI UCharIterator * U_EXPORT2
unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode);

#endif /* uconfig.h switches */

#endif