#include "config.h"
#include "TextDecoder.h"
#include "TextEncodingRegistry.h"
namespace WebCore {
TextDecoder::TextDecoder(const TextEncoding& encoding)
: m_encoding(encoding)
, m_checkedForBOM(false)
, m_numBufferedBytes(0)
{
}
void TextDecoder::reset(const TextEncoding& encoding)
{
m_encoding = encoding;
m_codec.clear();
m_checkedForBOM = false;
m_numBufferedBytes = 0;
}
String TextDecoder::checkForBOM(const char* data, size_t length, bool flush)
{
size_t numBufferedBytes = m_numBufferedBytes;
size_t buf1Len = numBufferedBytes;
size_t buf2Len = length;
const unsigned char* buf1 = m_bufferedBytes;
const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data);
unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0;
const TextEncoding* encodingConsideringBOM = &m_encoding;
bool foundBOM = true;
if (c1 == 0xFF && c2 == 0xFE) {
if (c3 != 0 || c4 != 0)
encodingConsideringBOM = &UTF16LittleEndianEncoding();
else if (numBufferedBytes + length > sizeof(m_bufferedBytes))
encodingConsideringBOM = &UTF32LittleEndianEncoding();
else
foundBOM = false;
}
else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF)
encodingConsideringBOM = &UTF8Encoding();
else if (c1 == 0xFE && c2 == 0xFF)
encodingConsideringBOM = &UTF16BigEndianEncoding();
else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF)
encodingConsideringBOM = &UTF32BigEndianEncoding();
else
foundBOM = false;
if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) {
memcpy(&m_bufferedBytes[numBufferedBytes], data, length);
m_numBufferedBytes += length;
return "";
}
m_codec.set(newTextCodec(*encodingConsideringBOM).release());
if (!m_codec)
return String();
m_checkedForBOM = true;
if (numBufferedBytes) {
char bufferedBytes[sizeof(m_bufferedBytes)];
memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
m_numBufferedBytes = 0;
return m_codec->decode(bufferedBytes, numBufferedBytes, false)
+ m_codec->decode(data, length, flush);
}
return m_codec->decode(data, length, flush);
}
}