summaryrefslogtreecommitdiff
path: root/Source/WebCore/html/track/WebVTTTokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/WebCore/html/track/WebVTTTokenizer.cpp')
-rw-r--r--Source/WebCore/html/track/WebVTTTokenizer.cpp322
1 files changed, 159 insertions, 163 deletions
diff --git a/Source/WebCore/html/track/WebVTTTokenizer.cpp b/Source/WebCore/html/track/WebVTTTokenizer.cpp
index 9bde924c8..024c04a09 100644
--- a/Source/WebCore/html/track/WebVTTTokenizer.cpp
+++ b/Source/WebCore/html/track/WebVTTTokenizer.cpp
@@ -1,5 +1,6 @@
/*
- * Copyright (C) 2011 Google Inc. All rights reserved.
+ * Copyright (C) 2011, 2013 Google Inc. All rights reserved.
+ * Copyright (C) 2014-2015 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -29,206 +30,201 @@
*/
#include "config.h"
+#include "WebVTTTokenizer.h"
#if ENABLE(VIDEO_TRACK)
-#include "WebVTTTokenizer.h"
-
#include "MarkupTokenizerInlines.h"
+#include <wtf/text/StringBuilder.h>
#include <wtf/unicode/CharacterNames.h>
namespace WebCore {
-#define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(WebVTTTokenizerState, stateName)
-#define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(WebVTTTokenizerState, stateName)
+#define WEBVTT_ADVANCE_TO(stateName) \
+ do { \
+ ASSERT(!m_input.isEmpty()); \
+ m_preprocessor.advance(m_input); \
+ character = m_preprocessor.nextInputCharacter(); \
+ goto stateName; \
+ } while (false)
-WebVTTTokenizer::WebVTTTokenizer()
- : m_inputStreamPreprocessor(this)
+template<unsigned charactersCount> ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)[charactersCount])
{
- reset();
+ return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersCount - 1);
}
-template <typename CharacterType>
-inline bool vectorEqualsString(const Vector<CharacterType, 32>& vector, const String& string)
+static void addNewClass(StringBuilder& classes, const StringBuilder& newClass)
{
- if (vector.size() != string.length())
- return false;
-
- if (!string.length())
- return true;
+ if (!classes.isEmpty())
+ classes.append(' ');
+ classes.append(newClass);
+}
- return equal(string.impl(), vector.data(), vector.size());
+inline bool emitToken(WebVTTToken& resultToken, const WebVTTToken& token)
+{
+ resultToken = token;
+ return true;
}
-void WebVTTTokenizer::reset()
+inline bool advanceAndEmitToken(SegmentedString& source, WebVTTToken& resultToken, const WebVTTToken& token)
{
- m_state = WebVTTTokenizerState::DataState;
- m_token = 0;
- m_buffer.clear();
+ source.advance();
+ return emitToken(resultToken, token);
}
-
-bool WebVTTTokenizer::nextToken(SegmentedString& source, WebVTTToken& token)
+
+WebVTTTokenizer::WebVTTTokenizer(const String& input)
+ : m_input(input)
+ , m_preprocessor(*this)
{
- // If we have a token in progress, then we're supposed to be called back
- // with the same token so we can finish it.
- ASSERT(!m_token || m_token == &token || token.type() == WebVTTTokenTypes::Uninitialized);
- m_token = &token;
+ // Append an EOF marker and close the input "stream".
+ ASSERT(!m_input.isClosed());
+ m_input.append(String { &kEndOfFileMarker, 1 });
+ m_input.close();
+}
- if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
- return haveBufferedCharacterToken();
+bool WebVTTTokenizer::nextToken(WebVTTToken& token)
+{
+ if (m_input.isEmpty() || !m_preprocessor.peek(m_input))
+ return false;
- UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
+ UChar character = m_preprocessor.nextInputCharacter();
+ if (character == kEndOfFileMarker) {
+ m_preprocessor.advance(m_input);
+ return false;
+ }
- // 4.8.10.13.4 WebVTT cue text tokenizer
- switch (m_state) {
- WEBVTT_BEGIN_STATE(DataState) {
- if (cc == '&') {
- m_buffer.append(static_cast<LChar>(cc));
- WEBVTT_ADVANCE_TO(EscapeState);
- } else if (cc == '<') {
- if (m_token->type() == WebVTTTokenTypes::Uninitialized
- || vectorEqualsString<UChar>(m_token->characters(), emptyString()))
- WEBVTT_ADVANCE_TO(TagState);
- else
- return emitAndResumeIn(source, WebVTTTokenizerState::TagState);
- } else if (cc == kEndOfFileMarker)
- return emitEndOfFile(source);
+ StringBuilder buffer;
+ StringBuilder result;
+ StringBuilder classes;
+
+// 4.8.10.13.4 WebVTT cue text tokenizer
+DataState:
+ if (character == '&') {
+ buffer.append('&');
+ WEBVTT_ADVANCE_TO(EscapeState);
+ } else if (character == '<') {
+ if (result.isEmpty())
+ WEBVTT_ADVANCE_TO(TagState);
else {
- bufferCharacter(cc);
- WEBVTT_ADVANCE_TO(DataState);
- }
- }
- END_STATE()
-
- WEBVTT_BEGIN_STATE(EscapeState) {
- if (cc == ';') {
- if (vectorEqualsString(m_buffer, "&amp"))
- bufferCharacter('&');
- else if (vectorEqualsString(m_buffer, "&lt"))
- bufferCharacter('<');
- else if (vectorEqualsString(m_buffer, "&gt"))
- bufferCharacter('>');
- else if (vectorEqualsString(m_buffer, "&lrm"))
- bufferCharacter(leftToRightMark);
- else if (vectorEqualsString(m_buffer, "&rlm"))
- bufferCharacter(rightToLeftMark);
- else if (vectorEqualsString(m_buffer, "&nbsp"))
- bufferCharacter(noBreakSpace);
- else {
- m_buffer.append(static_cast<LChar>(cc));
- m_token->appendToCharacter(m_buffer);
- }
- m_buffer.clear();
- WEBVTT_ADVANCE_TO(DataState);
- } else if (isASCIIAlphanumeric(cc)) {
- m_buffer.append(static_cast<LChar>(cc));
- WEBVTT_ADVANCE_TO(EscapeState);
- } else if (cc == kEndOfFileMarker) {
- m_token->appendToCharacter(m_buffer);
- return emitEndOfFile(source);
- } else {
- if (!vectorEqualsString(m_buffer, "&"))
- m_token->appendToCharacter(m_buffer);
- m_buffer.clear();
- WEBVTT_ADVANCE_TO(DataState);
+ // We don't want to advance input or perform a state transition - just return a (new) token.
+ // (On the next call to nextToken we will see '<' again, but take the other branch in this if instead.)
+ return emitToken(token, WebVTTToken::StringToken(result.toString()));
}
+ } else if (character == kEndOfFileMarker)
+ return advanceAndEmitToken(m_input, token, WebVTTToken::StringToken(result.toString()));
+ else {
+ result.append(character);
+ WEBVTT_ADVANCE_TO(DataState);
}
- END_STATE()
-
- WEBVTT_BEGIN_STATE(TagState) {
- if (isTokenizerWhitespace(cc)) {
- m_token->beginEmptyStartTag();
- WEBVTT_ADVANCE_TO(StartTagAnnotationState);
- } else if (cc == '.') {
- m_token->beginEmptyStartTag();
- WEBVTT_ADVANCE_TO(StartTagClassState);
- } else if (cc == '/') {
- WEBVTT_ADVANCE_TO(EndTagOpenState);
- } else if (WTF::isASCIIDigit(cc)) {
- m_token->beginTimestampTag(cc);
- WEBVTT_ADVANCE_TO(TimestampTagState);
- } else if (cc == '>' || cc == kEndOfFileMarker) {
- m_token->beginEmptyStartTag();
- return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
- } else {
- m_token->beginStartTag(cc);
- WEBVTT_ADVANCE_TO(StartTagState);
- }
- }
- END_STATE()
-
- WEBVTT_BEGIN_STATE(StartTagState) {
- if (isTokenizerWhitespace(cc))
- WEBVTT_ADVANCE_TO(StartTagAnnotationState);
- else if (cc == '.')
- WEBVTT_ADVANCE_TO(StartTagClassState);
- else if (cc == '>' || cc == kEndOfFileMarker)
- return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
+
+EscapeState:
+ if (character == ';') {
+ if (equalLiteral(buffer, "&amp"))
+ result.append('&');
+ else if (equalLiteral(buffer, "&lt"))
+ result.append('<');
+ else if (equalLiteral(buffer, "&gt"))
+ result.append('>');
+ else if (equalLiteral(buffer, "&lrm"))
+ result.append(leftToRightMark);
+ else if (equalLiteral(buffer, "&rlm"))
+ result.append(rightToLeftMark);
+ else if (equalLiteral(buffer, "&nbsp"))
+ result.append(noBreakSpace);
else {
- m_token->appendToName(cc);
- WEBVTT_ADVANCE_TO(StartTagState);
+ buffer.append(character);
+ result.append(buffer);
}
- }
- END_STATE()
-
- WEBVTT_BEGIN_STATE(StartTagClassState) {
- if (isTokenizerWhitespace(cc)) {
- m_token->addNewClass();
- WEBVTT_ADVANCE_TO(StartTagAnnotationState);
- } else if (cc == '.') {
- m_token->addNewClass();
- WEBVTT_ADVANCE_TO(StartTagClassState);
- } else if (cc == '>' || cc == kEndOfFileMarker) {
- m_token->addNewClass();
- return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
- } else {
- m_token->appendToClass(cc);
- WEBVTT_ADVANCE_TO(StartTagClassState);
+ buffer.clear();
+ WEBVTT_ADVANCE_TO(DataState);
+ } else if (isASCIIAlphanumeric(character)) {
+ buffer.append(character);
+ WEBVTT_ADVANCE_TO(EscapeState);
+ } else if (character == '<') {
+ result.append(buffer);
+ return emitToken(token, WebVTTToken::StringToken(result.toString()));
+ } else if (character == kEndOfFileMarker) {
+ result.append(buffer);
+ return advanceAndEmitToken(m_input, token, WebVTTToken::StringToken(result.toString()));
+ } else {
+ result.append(buffer);
+ buffer.clear();
+
+ if (character == '&') {
+ buffer.append('&');
+ WEBVTT_ADVANCE_TO(EscapeState);
}
-
+ result.append(character);
+ WEBVTT_ADVANCE_TO(DataState);
}
- END_STATE()
- WEBVTT_BEGIN_STATE(StartTagAnnotationState) {
- if (cc == '>' || cc == kEndOfFileMarker) {
- m_token->addNewAnnotation();
- return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
- }
- m_token->appendToAnnotation(cc);
+TagState:
+ if (isTokenizerWhitespace(character)) {
+ ASSERT(result.isEmpty());
WEBVTT_ADVANCE_TO(StartTagAnnotationState);
- }
- END_STATE()
-
- WEBVTT_BEGIN_STATE(EndTagOpenState) {
- if (cc == '>' || cc == kEndOfFileMarker) {
- m_token->beginEndTag('\0');
- return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
- }
- m_token->beginEndTag(cc);
- WEBVTT_ADVANCE_TO(EndTagState);
- }
- END_STATE()
-
- WEBVTT_BEGIN_STATE(EndTagState) {
- if (cc == '>' || cc == kEndOfFileMarker)
- return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
- m_token->appendToName(cc);
+ } else if (character == '.') {
+ ASSERT(result.isEmpty());
+ WEBVTT_ADVANCE_TO(StartTagClassState);
+ } else if (character == '/') {
WEBVTT_ADVANCE_TO(EndTagState);
+ } else if (WTF::isASCIIDigit(character)) {
+ result.append(character);
+ WEBVTT_ADVANCE_TO(TimestampTagState);
+ } else if (character == '>' || character == kEndOfFileMarker) {
+ ASSERT(result.isEmpty());
+ return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString()));
+ } else {
+ result.append(character);
+ WEBVTT_ADVANCE_TO(StartTagState);
}
- END_STATE()
- WEBVTT_BEGIN_STATE(TimestampTagState) {
- if (cc == '>' || cc == kEndOfFileMarker)
- return emitAndResumeIn(source, WebVTTTokenizerState::DataState);
- m_token->appendToTimestamp(cc);
- WEBVTT_ADVANCE_TO(TimestampTagState);
+StartTagState:
+ if (isTokenizerWhitespace(character))
+ WEBVTT_ADVANCE_TO(StartTagAnnotationState);
+ else if (character == '.')
+ WEBVTT_ADVANCE_TO(StartTagClassState);
+ else if (character == '>' || character == kEndOfFileMarker)
+ return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString()));
+ else {
+ result.append(character);
+ WEBVTT_ADVANCE_TO(StartTagState);
}
- END_STATE()
+StartTagClassState:
+ if (isTokenizerWhitespace(character)) {
+ addNewClass(classes, buffer);
+ buffer.clear();
+ WEBVTT_ADVANCE_TO(StartTagAnnotationState);
+ } else if (character == '.') {
+ addNewClass(classes, buffer);
+ buffer.clear();
+ WEBVTT_ADVANCE_TO(StartTagClassState);
+ } else if (character == '>' || character == kEndOfFileMarker) {
+ addNewClass(classes, buffer);
+ buffer.clear();
+ return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString(), classes.toAtomicString()));
+ } else {
+ buffer.append(character);
+ WEBVTT_ADVANCE_TO(StartTagClassState);
}
- ASSERT_NOT_REACHED();
- return false;
+StartTagAnnotationState:
+ if (character == '>' || character == kEndOfFileMarker)
+ return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString(), classes.toAtomicString(), buffer.toAtomicString()));
+ buffer.append(character);
+ WEBVTT_ADVANCE_TO(StartTagAnnotationState);
+
+EndTagState:
+ if (character == '>' || character == kEndOfFileMarker)
+ return advanceAndEmitToken(m_input, token, WebVTTToken::EndTag(result.toString()));
+ result.append(character);
+ WEBVTT_ADVANCE_TO(EndTagState);
+
+TimestampTagState:
+ if (character == '>' || character == kEndOfFileMarker)
+ return advanceAndEmitToken(m_input, token, WebVTTToken::TimestampTag(result.toString()));
+ result.append(character);
+ WEBVTT_ADVANCE_TO(TimestampTagState);
}
}