#include "unicode/utypes.h"
#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/localpointer.h"
#include "unicode/uchar.h"
#include "unicode/unifilt.h"
#include "unicode/uniset.h"
#include "brktrans.h"
#include "cmemory.h"
#include "mutex.h"
#include "uprops.h"
#include "uinvchar.h"
#include "util.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
static const UChar SPACE = 32;
BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) {
}
BreakTransliterator::~BreakTransliterator() {
}
BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.fInsertion) {
}
Transliterator* BreakTransliterator::clone(void) const {
return new BreakTransliterator(*this);
}
void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
UBool isIncremental ) const {
UErrorCode status = U_ZERO_ERROR;
LocalPointer<BreakIterator> bi;
LocalPointer<UVector32> boundaries;
{
Mutex m;
BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
boundaries.moveFrom(nonConstThis->cachedBoundaries);
bi.moveFrom(nonConstThis->cachedBI);
}
if (bi.isNull()) {
bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status));
}
if (boundaries.isNull()) {
boundaries.adoptInstead(new UVector32(status));
}
if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) {
return;
}
boundaries->removeAllElements();
UnicodeString sText = replaceableAsString(text);
bi->setText(sText);
bi->preceding(offsets.start);
int32_t boundary;
for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
if (boundary == 0) continue;
UChar32 cp = sText.char32At(boundary-1);
int type = u_charType(cp);
if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
cp = sText.char32At(boundary);
type = u_charType(cp);
if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
boundaries->addElement(boundary, status);
}
int delta = 0;
int lastBoundary = 0;
if (boundaries->size() != 0) { delta = boundaries->size() * fInsertion.length();
lastBoundary = boundaries->lastElementi();
while (boundaries->size() > 0) {
boundary = boundaries->popi();
text.handleReplaceBetween(boundary, boundary, fInsertion);
}
}
offsets.contextLimit += delta;
offsets.limit += delta;
offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
{
Mutex m;
BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
if (nonConstThis->cachedBI.isNull()) {
nonConstThis->cachedBI.moveFrom(bi);
}
if (nonConstThis->cachedBoundaries.isNull()) {
nonConstThis->cachedBoundaries.moveFrom(boundaries);
}
}
}
const UnicodeString &BreakTransliterator::getInsertion() const {
return fInsertion;
}
void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
this->fInsertion = insertion;
}
UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
UnicodeString s;
UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
if (rs != NULL) {
s = *rs;
} else {
r.extractBetween(0, r.length(), s);
}
return s;
}
U_NAMESPACE_END
#endif