SurrogatePairAwareTextIterator.cpp [plain text]
#include "config.h"
#include "SurrogatePairAwareTextIterator.h"
#include <unicode/unorm2.h>
namespace WebCore {
SurrogatePairAwareTextIterator::SurrogatePairAwareTextIterator(const UChar* characters, unsigned currentIndex, unsigned lastIndex, unsigned endIndex)
: m_characters(characters)
, m_currentIndex(currentIndex)
, m_lastIndex(lastIndex)
, m_endIndex(endIndex)
{
}
bool SurrogatePairAwareTextIterator::consumeSlowCase(UChar32& character, unsigned& clusterLength)
{
if (character <= 0x30FE) {
if (UChar32 normalized = normalizeVoicingMarks()) {
character = normalized;
clusterLength = 2;
}
return true;
}
if (!U16_IS_SURROGATE(character))
return true;
if (!U16_IS_SURROGATE_LEAD(character))
return false;
if (m_currentIndex + 1 >= m_endIndex)
return false;
UChar low = m_characters[1];
if (!U16_IS_TRAIL(low))
return false;
character = U16_GET_SUPPLEMENTARY(character, low);
clusterLength = 2;
return true;
}
UChar32 SurrogatePairAwareTextIterator::normalizeVoicingMarks()
{
static constexpr uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8;
if (m_currentIndex + 1 >= m_endIndex)
return 0;
if (u_getCombiningClass(m_characters[1]) == hiraganaKatakanaVoicingMarksCombiningClass) {
UErrorCode status = U_ZERO_ERROR;
const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
ASSERT(U_SUCCESS(status));
auto composedCharacter = unorm2_composePair(normalizer, m_characters[0], m_characters[1]);
if (composedCharacter > 0)
return composedCharacter;
}
return 0;
}
}