TextCodecUTF16.cpp [plain text]
#include "config.h"
#include "TextCodecUTF16.h"
#include <wtf/text/CString.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/text/WTFString.h>
#include <wtf/unicode/CharacterNames.h>
namespace WebCore {
inline TextCodecUTF16::TextCodecUTF16(bool littleEndian)
: m_littleEndian(littleEndian)
{
}
void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
{
registrar("UTF-16LE", "UTF-16LE");
registrar("UTF-16BE", "UTF-16BE");
registrar("ISO-10646-UCS-2", "UTF-16LE");
registrar("UCS-2", "UTF-16LE");
registrar("UTF-16", "UTF-16LE");
registrar("Unicode", "UTF-16LE");
registrar("csUnicode", "UTF-16LE");
registrar("unicodeFEFF", "UTF-16LE");
registrar("unicodeFFFE", "UTF-16BE");
}
void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar)
{
registrar("UTF-16LE", [] {
return makeUnique<TextCodecUTF16>(true);
});
registrar("UTF-16BE", [] {
return makeUnique<TextCodecUTF16>(false);
});
}
String TextCodecUTF16::decode(const char* bytes, size_t length, bool flush, bool, bool& sawError)
{
const auto* p = reinterpret_cast<const uint8_t*>(bytes);
const auto* const end = p + length;
const auto* const endMinusOneOrNull = end ? end - 1 : nullptr;
StringBuilder result;
result.reserveCapacity(length / 2);
auto processCodeUnit = [&] (UChar codeUnit) {
if (std::exchange(m_shouldStripByteOrderMark, false) && codeUnit == byteOrderMark)
return;
if (m_leadSurrogate) {
auto leadSurrogate = *std::exchange(m_leadSurrogate, WTF::nullopt);
if (U16_IS_TRAIL(codeUnit)) {
result.appendCharacter(U16_GET_SUPPLEMENTARY(leadSurrogate, codeUnit));
return;
}
sawError = true;
result.append(replacementCharacter);
}
if (U16_IS_LEAD(codeUnit)) {
m_leadSurrogate = codeUnit;
return;
}
if (U16_IS_TRAIL(codeUnit)) {
sawError = true;
result.append(replacementCharacter);
return;
}
result.append(codeUnit);
};
auto processBytesLE = [&] (uint8_t first, uint8_t second) {
processCodeUnit(first | (second << 8));
};
auto processBytesBE = [&] (uint8_t first, uint8_t second) {
processCodeUnit((first << 8) | second);
};
if (m_leadByte && p < end) {
auto leadByte = *std::exchange(m_leadByte, WTF::nullopt);
if (m_littleEndian)
processBytesLE(leadByte, p[0]);
else
processBytesBE(leadByte, p[0]);
p++;
}
if (m_littleEndian) {
while (p < endMinusOneOrNull) {
processBytesLE(p[0], p[1]);
p += 2;
}
} else {
while (p < endMinusOneOrNull) {
processBytesBE(p[0], p[1]);
p += 2;
}
}
if (p && p == endMinusOneOrNull) {
ASSERT(!m_leadByte);
m_leadByte = p[0];
} else
ASSERT(!p || p == end);
if (flush) {
m_shouldStripByteOrderMark = false;
if (m_leadByte || m_leadSurrogate) {
m_leadByte = WTF::nullopt;
m_leadSurrogate = WTF::nullopt;
sawError = true;
result.append(replacementCharacter);
}
}
return result.toString();
}
Vector<uint8_t> TextCodecUTF16::encode(StringView string, UnencodableHandling) const
{
Vector<uint8_t> result(WTF::checkedProduct<size_t>(string.length(), 2).unsafeGet());
auto* bytes = result.data();
if (m_littleEndian) {
for (auto character : string.codeUnits()) {
*bytes++ = character;
*bytes++ = character >> 8;
}
} else {
for (auto character : string.codeUnits()) {
*bytes++ = character >> 8;
*bytes++ = character;
}
}
return result;
}
}