#include "config.h"
#if ENABLE(VIDEO_TRACK)
#include "WebVTTParser.h"
#include "HTMLElement.h"
#include "ProcessingInstruction.h"
#include "SegmentedString.h"
#include "Text.h"
#include <wtf/text/WTFString.h>
namespace WebCore {
const double secondsPerHour = 3600;
const double secondsPerMinute = 60;
const double secondsPerMillisecond = 0.001;
const double malformedTime = -1;
const unsigned bomLength = 3;
const unsigned fileIdentifierLength = 6;
String WebVTTParser::collectDigits(const String& input, unsigned* position)
{
StringBuilder digits;
while (*position < input.length() && isASCIIDigit(input[*position]))
digits.append(input[(*position)++]);
return digits.toString();
}
String WebVTTParser::collectWord(const String& input, unsigned* position)
{
StringBuilder string;
while (*position < input.length() && !isASpace(input[*position]))
string.append(input[(*position)++]);
return string.toString();
}
WebVTTParser::WebVTTParser(WebVTTParserClient* client, ScriptExecutionContext* context)
: m_scriptExecutionContext(context)
, m_state(Initial)
, m_currentStartTime(0)
, m_currentEndTime(0)
, m_tokenizer(WebVTTTokenizer::create())
, m_client(client)
{
}
void WebVTTParser::getNewCues(Vector<RefPtr<TextTrackCue> >& outputCues)
{
outputCues = m_cuelist;
m_cuelist.clear();
}
void WebVTTParser::parseBytes(const char* data, unsigned length)
{
unsigned position = 0;
while (position < length) {
String line = collectNextLine(data, length, &position);
switch (m_state) {
case Initial:
m_identifierData.append(data, length);
if (m_identifierData.size() < bomLength + fileIdentifierLength)
return;
if (!hasRequiredFileIdentifier()) {
if (m_client)
m_client->fileFailedToParse();
return;
}
m_state = Header;
m_identifierData.clear();
break;
case Header:
if (line.isEmpty())
m_state = Id;
break;
case Id:
if (line.isEmpty())
break;
resetCueValues();
m_state = collectCueId(line);
break;
case TimingsAndSettings:
m_state = collectTimingsAndSettings(line);
break;
case CueText:
m_state = collectCueText(line, length, position);
break;
case BadCue:
m_state = ignoreBadCue(line);
break;
}
}
}
bool WebVTTParser::hasRequiredFileIdentifier()
{
unsigned position = 0;
if (m_identifierData.size() >= bomLength && m_identifierData[0] == '\xEF' && m_identifierData[1] == '\xBB' && m_identifierData[2] == '\xBF')
position += bomLength;
String line = collectNextLine(m_identifierData.data(), m_identifierData.size(), &position);
if (line.length() < fileIdentifierLength)
return false;
if (line.substring(0, fileIdentifierLength) != "WEBVTT")
return false;
if (line.length() > fileIdentifierLength && line[fileIdentifierLength] != ' ' && line[fileIdentifierLength] != '\t')
return false;
return true;
}
WebVTTParser::ParseState WebVTTParser::collectCueId(const String& line)
{
if (line.contains("-->"))
return collectTimingsAndSettings(line);
m_currentId = line;
return TimingsAndSettings;
}
WebVTTParser::ParseState WebVTTParser::collectTimingsAndSettings(const String& line)
{
unsigned position = 0;
skipWhiteSpace(line, &position);
m_currentStartTime = collectTimeStamp(line, &position);
if (m_currentStartTime == malformedTime)
return BadCue;
if (position >= line.length())
return BadCue;
char nextChar = line[position++];
if (nextChar != ' ' && nextChar != '\t')
return BadCue;
skipWhiteSpace(line, &position);
if (line.find("-->", position) == notFound)
return BadCue;
position += 3;
if (position >= line.length())
return BadCue;
nextChar = line[position++];
if (nextChar != ' ' && nextChar != '\t')
return BadCue;
skipWhiteSpace(line, &position);
m_currentEndTime = collectTimeStamp(line, &position);
if (m_currentEndTime == malformedTime)
return BadCue;
skipWhiteSpace(line, &position);
m_currentSettings = line.substring(position, line.length()-1);
return CueText;
}
WebVTTParser::ParseState WebVTTParser::collectCueText(const String& line, unsigned length, unsigned position)
{
if (line.isEmpty()) {
createNewCue();
return Id;
}
if (!m_currentContent.isEmpty())
m_currentContent.append("\n");
m_currentContent.append(line);
if (position >= length)
createNewCue();
return CueText;
}
WebVTTParser::ParseState WebVTTParser::ignoreBadCue(const String& line)
{
if (!line.isEmpty())
return BadCue;
return Id;
}
PassRefPtr<DocumentFragment> WebVTTParser::createDocumentFragmentFromCueText(const String& text)
{
if (!text.length())
return 0;
ASSERT(m_scriptExecutionContext->isDocument());
Document* document = static_cast<Document*>(m_scriptExecutionContext);
RefPtr<DocumentFragment> fragment = DocumentFragment::create(document);
m_currentNode = fragment;
m_tokenizer->reset();
m_token.clear();
SegmentedString content(text);
while (m_tokenizer->nextToken(content, m_token))
constructTreeFromToken(document);
return fragment.release();
}
void WebVTTParser::createNewCue()
{
if (!m_currentContent.length())
return;
RefPtr<TextTrackCue> cue = TextTrackCue::create(m_scriptExecutionContext, m_currentId, m_currentStartTime, m_currentEndTime, m_currentContent.toString(), m_currentSettings, false);
m_cuelist.append(cue);
if (m_client)
m_client->newCuesParsed();
}
void WebVTTParser::resetCueValues()
{
m_currentId = emptyString();
m_currentSettings = emptyString();
m_currentStartTime = 0;
m_currentEndTime = 0;
m_currentContent.clear();
}
double WebVTTParser::collectTimeStamp(const String& line, unsigned* position)
{
enum Mode { minutes, hours };
Mode mode = minutes;
if (*position >= line.length() || !isASCIIDigit(line[*position]))
return malformedTime;
String digits1 = collectDigits(line, position);
int value1 = digits1.toInt();
if (digits1.length() != 2 || value1 > 59)
mode = hours;
if (*position >= line.length() || line[(*position)++] != ':')
return malformedTime;
if (*position >= line.length() || !isASCIIDigit(line[(*position)]))
return malformedTime;
String digits2 = collectDigits(line, position);
int value2 = digits2.toInt();
if (digits2.length() != 2)
return malformedTime;
int value3;
if (mode == hours || (*position < line.length() && line[*position] == ':')) {
if (*position >= line.length() || line[(*position)++] != ':')
return malformedTime;
if (*position >= line.length() || !isASCIIDigit(line[*position]))
return malformedTime;
String digits3 = collectDigits(line, position);
if (digits3.length() != 2)
return malformedTime;
value3 = digits3.toInt();
} else {
value3 = value2;
value2 = value1;
value1 = 0;
}
if (*position >= line.length() || line[(*position)++] != '.')
return malformedTime;
if (*position >= line.length() || !isASCIIDigit(line[*position]))
return malformedTime;
String digits4 = collectDigits(line, position);
if (digits4.length() != 3)
return malformedTime;
int value4 = digits4.toInt();
if (value2 > 59 || value3 > 59)
return malformedTime;
return (value1 * secondsPerHour) + (value2 * secondsPerMinute) + value3 + (value4 * secondsPerMillisecond);
}
void WebVTTParser::constructTreeFromToken(Document* document)
{
AtomicString tokenTagName(m_token.name().data(), m_token.name().size());
QualifiedName tagName(nullAtom, tokenTagName, xhtmlNamespaceURI);
switch (m_token.type()) {
case WebVTTTokenTypes::Character: {
String content(m_token.characters().data(), m_token.characters().size());
RefPtr<Text> child = Text::create(document, content);
m_currentNode->parserAddChild(child);
break;
}
case WebVTTTokenTypes::StartTag: {
RefPtr<HTMLElement> child;
if (isRecognizedTag(tokenTagName))
child = HTMLElement::create(tagName, document);
else if (m_token.name().size() == 1 && m_token.name()[0] == 'c')
child = HTMLElement::create(spanTag, document);
else if (m_token.name().size() == 1 && m_token.name()[0] == 'v')
child = HTMLElement::create(qTag, document);
if (child) {
if (m_token.classes().size() > 0)
child->setAttribute(classAttr, AtomicString(m_token.classes().data(), m_token.classes().size()));
if (child->hasTagName(qTag))
child->setAttribute(titleAttr, AtomicString(m_token.annotation().data(), m_token.annotation().size()));
m_currentNode->parserAddChild(child);
m_currentNode = child;
}
break;
}
case WebVTTTokenTypes::EndTag:
if (isRecognizedTag(tokenTagName)
|| (m_token.name().size() == 1 && m_token.name()[0] == 'c')
|| (m_token.name().size() == 1 && m_token.name()[0] == 'v')) {
if (m_currentNode->parentNode())
m_currentNode = m_currentNode->parentNode();
}
break;
case WebVTTTokenTypes::TimestampTag: {
unsigned position = 0;
double time = collectTimeStamp(m_token.characters().data(), &position);
if (time != malformedTime)
m_currentNode->parserAddChild(ProcessingInstruction::create(document, "timestamp", String(m_token.characters().data(), m_token.characters().size())));
break;
}
default:
break;
}
m_token.clear();
}
void WebVTTParser::skipWhiteSpace(const String& line, unsigned* position)
{
while (*position < line.length() && isASpace(line[*position]))
(*position)++;
}
void WebVTTParser::skipLineTerminator(const char* data, unsigned length, unsigned* position)
{
if (*position >= length)
return;
if (data[*position] == '\r')
(*position)++;
if (*position >= length)
return;
if (data[*position] == '\n')
(*position)++;
}
String WebVTTParser::collectNextLine(const char* data, unsigned length, unsigned* position)
{
unsigned oldPosition = *position;
while (*position < length && data[*position] != '\r' && data[*position] != '\n')
(*position)++;
String line = String::fromUTF8(data + oldPosition, *position - oldPosition);
skipLineTerminator(data, length, position);
return line;
}
}
#endif