diff options
Diffstat (limited to 'Source/WebCore/html/track/WebVTTParser.cpp')
-rw-r--r-- | Source/WebCore/html/track/WebVTTParser.cpp | 598 |
1 files changed, 301 insertions, 297 deletions
diff --git a/Source/WebCore/html/track/WebVTTParser.cpp b/Source/WebCore/html/track/WebVTTParser.cpp index 48d729fb6..646110180 100644 --- a/Source/WebCore/html/track/WebVTTParser.cpp +++ b/Source/WebCore/html/track/WebVTTParser.cpp @@ -1,6 +1,7 @@ /* - * Copyright (C) 2011 Google Inc. All rights reserved. + * Copyright (C) 2011, 2013 Google Inc. All rights reserved. * Copyright (C) 2013 Cable Television Labs, Inc. + * Copyright (C) 2011-2014 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -35,8 +36,11 @@ #include "WebVTTParser.h" +#include "HTMLParserIdioms.h" +#include "ISOVTTCue.h" #include "ProcessingInstruction.h" #include "Text.h" +#include "VTTScanner.h" #include "WebVTTElement.h" namespace WebCore { @@ -44,83 +48,46 @@ namespace WebCore { const double secondsPerHour = 3600; const double secondsPerMinute = 60; const double secondsPerMillisecond = 0.001; -const double malformedTime = -1; -const UChar bom = 0xFEFF; const char* fileIdentifier = "WEBVTT"; const unsigned fileIdentifierLength = 6; -String WebVTTParser::collectDigits(const String& input, unsigned* position) -{ - StringBuilder digits; - while (*position < input.length() && isASCIIDigit(input[*position])) - digits.append(input[(*position)++]); - return digits.toString(); -} - -String WebVTTParser::collectWord(const String& input, unsigned* position) -{ - StringBuilder string; - while (*position < input.length() && !isASpace(input[*position])) - string.append(input[(*position)++]); - return string.toString(); -} - -#if ENABLE(WEBVTT_REGIONS) -float WebVTTParser::parseFloatPercentageValue(const String& value, bool& isValidSetting) +bool WebVTTParser::parseFloatPercentageValue(VTTScanner& valueScanner, float& percentage) { + float number; + if (!valueScanner.scanFloat(number)) + return false; // '%' must be present and at the end of the setting value. - if (value.find('%', 1) != value.length() - 1) { - isValidSetting = false; - return 0; - } - - unsigned position = 0; - - StringBuilder floatNumberAsString; - floatNumberAsString.append(WebVTTParser::collectDigits(value, &position)); - - if (value[position] == '.') { - floatNumberAsString.append("."); - position++; - - floatNumberAsString.append(WebVTTParser::collectDigits(value, &position)); - } - float number = floatNumberAsString.toString().toFloat(&isValidSetting); + if (!valueScanner.scan('%')) + return false; - if (isValidSetting && (number <= 0 || number >= 100)) - isValidSetting = false; + if (number < 0 || number > 100) + return false; - return number; + percentage = number; + return true; } -FloatPoint WebVTTParser::parseFloatPercentageValuePair(const String& value, char delimiter, bool& isValidSetting) +bool WebVTTParser::parseFloatPercentageValuePair(VTTScanner& valueScanner, char delimiter, FloatPoint& valuePair) { - // The delimiter can't be the first or second value because a pair of - // percentages (x%,y%) implies that at least the first two characters - // are the first percentage value. - size_t delimiterOffset = value.find(delimiter, 2); - if (delimiterOffset == notFound || delimiterOffset == value.length() - 1) { - isValidSetting = false; - return FloatPoint(0, 0); - } + float firstCoord; + if (!parseFloatPercentageValue(valueScanner, firstCoord)) + return false; - bool isFirstValueValid; - float firstCoord = parseFloatPercentageValue(value.substring(0, delimiterOffset), isFirstValueValid); + if (!valueScanner.scan(delimiter)) + return false; - bool isSecondValueValid; - float secondCoord = parseFloatPercentageValue(value.substring(delimiterOffset + 1, value.length() - 1), isSecondValueValid); + float secondCoord; + if (!parseFloatPercentageValue(valueScanner, secondCoord)) + return false; - isValidSetting = isFirstValueValid && isSecondValueValid; - return FloatPoint(firstCoord, secondCoord); + valuePair = FloatPoint(firstCoord, secondCoord); + return true; } -#endif WebVTTParser::WebVTTParser(WebVTTParserClient* client, ScriptExecutionContext* context) : m_scriptExecutionContext(context) , m_state(Initial) - , m_currentStartTime(0) - , m_currentEndTime(0) - , m_tokenizer(WebVTTTokenizer::create()) + , m_decoder(TextResourceDecoder::create("text/plain", UTF8Encoding())) , m_client(client) { } @@ -131,32 +98,64 @@ void WebVTTParser::getNewCues(Vector<RefPtr<WebVTTCueData>>& outputCues) m_cuelist.clear(); } -#if ENABLE(WEBVTT_REGIONS) -void WebVTTParser::getNewRegions(Vector<RefPtr<TextTrackRegion>>& outputRegions) +void WebVTTParser::getNewRegions(Vector<RefPtr<VTTRegion>>& outputRegions) { outputRegions = m_regionList; m_regionList.clear(); } -#endif + +void WebVTTParser::parseFileHeader(String&& data) +{ + m_state = Initial; + m_lineReader.reset(); + m_lineReader.append(WTFMove(data)); + parse(); +} void WebVTTParser::parseBytes(const char* data, unsigned length) { - // 4.8.10.13.3 WHATWG WebVTT Parser algorithm. - // 1-3 - Initial setup. - unsigned position = 0; - - while (position < length) { - String line = collectNextLine(data, length, &position); - if (line.isNull()) { - m_buffer.append(data + position, length - position); - return; - } + m_lineReader.append(m_decoder->decode(data, length)); + parse(); +} + +void WebVTTParser::parseCueData(const ISOWebVTTCue& data) +{ + auto cue = WebVTTCueData::create(); + + MediaTime startTime = data.presentationTime(); + cue->setStartTime(startTime); + cue->setEndTime(startTime + data.duration()); + + cue->setContent(data.cueText()); + cue->setId(data.id()); + cue->setSettings(data.settings()); + + MediaTime originalStartTime; + if (WebVTTParser::collectTimeStamp(data.originalStartTime(), originalStartTime)) + cue->setOriginalStartTime(originalStartTime); + + m_cuelist.append(WTFMove(cue)); + if (m_client) + m_client->newCuesParsed(); +} +void WebVTTParser::flush() +{ + m_lineReader.append(m_decoder->flush()); + m_lineReader.appendEndOfStream(); + parse(); + flushPendingCue(); +} + +void WebVTTParser::parse() +{ + // WebVTT parser algorithm. (5.1 WebVTT file parsing.) + // Steps 1 - 3 - Initial setup. + while (auto line = m_lineReader.nextLine()) { switch (m_state) { case Initial: - - // 4-12 - Collect the first line and check for "WEBVTT". - if (!hasRequiredFileIdentifier(line)) { + // Steps 4 - 9 - Check for a valid WebVTT signature. + if (!hasRequiredFileIdentifier(*line)) { if (m_client) m_client->fileFailedToParse(); return; @@ -166,48 +165,53 @@ void WebVTTParser::parseBytes(const char* data, unsigned length) break; case Header: - // 13-18 - Allow a header (comment area) under the WEBVTT line. -#if ENABLE(WEBVTT_REGIONS) - if (line.isEmpty()) { + collectMetadataHeader(*line); + + if (line->isEmpty()) { + // Steps 10-14 - Allow a header (comment area) under the WEBVTT line. if (m_client && m_regionList.size()) m_client->newRegionsParsed(); - m_state = Id; break; } - collectHeader(line); + // Step 15 - Break out of header loop if the line could be a timestamp line. + if (line->contains("-->")) + m_state = recoverCue(*line); - break; - - case Metadata: -#endif - if (line.isEmpty()) - m_state = Id; + // Step 16 - Line is not the empty string and does not contain "-->". break; case Id: - // 19-29 - Allow any number of line terminators, then initialize new cue values. - if (line.isEmpty()) + // Steps 17 - 20 - Allow any number of line terminators, then initialize new cue values. + if (line->isEmpty()) break; + + // Step 21 - Cue creation (start a new cue). resetCueValues(); - // 30-39 - Check if this line contains an optional identifier or timing data. - m_state = collectCueId(line); + // Steps 22 - 25 - Check if this line contains an optional identifier or timing data. + m_state = collectCueId(*line); break; case TimingsAndSettings: - // 40 - Collect cue timings and settings. - m_state = collectTimingsAndSettings(line); + // Steps 26 - 27 - Discard current cue if the line is empty. + if (line->isEmpty()) { + m_state = Id; + break; + } + + // Steps 28 - 29 - Collect cue timings and settings. + m_state = collectTimingsAndSettings(*line); break; case CueText: - // 41-53 - Collect the cue text, create a cue, and add it to the output. - m_state = collectCueText(line); + // Steps 31 - 41 - Collect the cue text, create a cue, and add it to the output. + m_state = collectCueText(*line); break; case BadCue: - // 54-62 - Collect and discard the remaining cue. - m_state = ignoreBadCue(line); + // Steps 42 - 48 - Discard lines until an empty line or a potential timing line is seen. + m_state = ignoreBadCue(*line); break; case Finished: @@ -224,55 +228,47 @@ void WebVTTParser::fileFinished() m_state = Finished; } +void WebVTTParser::flushPendingCue() +{ + ASSERT(m_lineReader.isAtEndOfStream()); + // If we're in the CueText state when we run out of data, we emit the pending cue. + if (m_state == CueText) + createNewCue(); +} + bool WebVTTParser::hasRequiredFileIdentifier(const String& line) { // A WebVTT file identifier consists of an optional BOM character, // the string "WEBVTT" followed by an optional space or tab character, // and any number of characters that are not line terminators ... - unsigned linePos = 0; - - if (line.isEmpty()) + if (!line.startsWith(fileIdentifier, fileIdentifierLength)) return false; - - if (line[0] == bom) - ++linePos; - - if (line.length() < fileIdentifierLength + linePos) - return false; - - for (unsigned i = 0; i < fileIdentifierLength; ++i, ++linePos) { - if (line[linePos] != fileIdentifier[i]) - return false; - } - - if (linePos < line.length() && line[linePos] != ' ' && line[linePos] != '\t') + if (line.length() > fileIdentifierLength && !isHTMLSpace(line[fileIdentifierLength])) return false; return true; } -#if ENABLE(WEBVTT_REGIONS) -void WebVTTParser::collectHeader(const String& line) +void WebVTTParser::collectMetadataHeader(const String& line) { - // 4.1 Extension of WebVTT header parsing (11 - 15) - DEFINE_STATIC_LOCAL(const AtomicString, regionHeaderName, ("Region", AtomicString::ConstructFromLiteral)); + // WebVTT header parsing (WebVTT parser algorithm step 12) + static NeverDestroyed<const AtomicString> regionHeaderName("Region", AtomicString::ConstructFromLiteral); - // 15.4 If line contains the character ":" (A U+003A COLON), then set metadata's + // Step 12.4 If line contains the character ":" (A U+003A COLON), then set metadata's // name to the substring of line before the first ":" character and // metadata's value to the substring after this character. - if (!line.contains(":")) + size_t colonPosition = line.find(':'); + if (colonPosition == notFound) return; - unsigned colonPosition = line.find(":"); - m_currentHeaderName = line.substring(0, colonPosition); + String headerName = line.substring(0, colonPosition); - // 15.5 If metadata's name equals "Region": - if (m_currentHeaderName == regionHeaderName) { - m_currentHeaderValue = line.substring(colonPosition + 1, line.length() - 1); - // 15.5.1 - 15.5.8 Region creation: Let region be a new text track region [...] - createNewRegion(); + // Step 12.5 If metadata's name equals "Region": + if (headerName == regionHeaderName) { + String headerValue = line.substring(colonPosition + 1, line.length() - 1); + // Steps 12.5.1 - 12.5.11 Region creation: Let region be a new text track region [...] + createNewRegion(headerValue); } } -#endif WebVTTParser::ParseState WebVTTParser::collectCueId(const String& line) { @@ -284,97 +280,126 @@ WebVTTParser::ParseState WebVTTParser::collectCueId(const String& line) WebVTTParser::ParseState WebVTTParser::collectTimingsAndSettings(const String& line) { - // 4.8.10.13.3 Collect WebVTT cue timings and settings. - // 1-3 - Let input be the string being parsed and position be a pointer into input - unsigned position = 0; - skipWhiteSpace(line, &position); - - // 4-5 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue start time be the collected time. - m_currentStartTime = collectTimeStamp(line, &position); - if (m_currentStartTime == malformedTime) - return BadCue; - if (position >= line.length()) - return BadCue; - char nextChar = line[position++]; - if (nextChar != ' ' && nextChar != '\t') + if (line.isEmpty()) return BadCue; - skipWhiteSpace(line, &position); - // 6-9 - If the next three characters are not "-->", abort and return failure. - if (line.find("-->", position) == notFound) - return BadCue; - position += 3; - if (position >= line.length()) + VTTScanner input(line); + + // Collect WebVTT cue timings and settings. (5.3 WebVTT cue timings and settings parsing.) + // Steps 1 - 3 - Let input be the string being parsed and position be a pointer into input + input.skipWhile<isHTMLSpace<UChar>>(); + + // Steps 4 - 5 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue start time be the collected time. + if (!collectTimeStamp(input, m_currentStartTime)) return BadCue; - nextChar = line[position++]; - if (nextChar != ' ' && nextChar != '\t') + + input.skipWhile<isHTMLSpace<UChar>>(); + + // Steps 6 - 9 - If the next three characters are not "-->", abort and return failure. + if (!input.scan("-->")) return BadCue; - skipWhiteSpace(line, &position); + + input.skipWhile<isHTMLSpace<UChar>>(); - // 10-11 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue end time be the collected time. - m_currentEndTime = collectTimeStamp(line, &position); - if (m_currentEndTime == malformedTime) + // Steps 10 - 11 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue end time be the collected time. + if (!collectTimeStamp(input, m_currentEndTime)) return BadCue; - skipWhiteSpace(line, &position); - // 12 - Parse the WebVTT settings for the cue (conducted in TextTrackCue). - m_currentSettings = line.substring(position, line.length()-1); + input.skipWhile<isHTMLSpace<UChar>>(); + + // Step 12 - Parse the WebVTT settings for the cue (conducted in TextTrackCue). + m_currentSettings = input.restOfInputAsString(); return CueText; } WebVTTParser::ParseState WebVTTParser::collectCueText(const String& line) { + // Step 34. if (line.isEmpty()) { createNewCue(); return Id; } + // Step 35. + if (line.contains("-->")) { + // Step 39-40. + createNewCue(); + + // Step 41 - New iteration of the cue loop. + return recoverCue(line); + } if (!m_currentContent.isEmpty()) - m_currentContent.append("\n"); + m_currentContent.append('\n'); m_currentContent.append(line); - + return CueText; } +WebVTTParser::ParseState WebVTTParser::recoverCue(const String& line) +{ + // Step 17 and 21. + resetCueValues(); + + // Step 22. + return collectTimingsAndSettings(line); +} + WebVTTParser::ParseState WebVTTParser::ignoreBadCue(const String& line) { - if (!line.isEmpty()) - return BadCue; - return Id; + if (line.isEmpty()) + return Id; + if (line.contains("-->")) + return recoverCue(line); + return BadCue; } -PassRefPtr<DocumentFragment> WebVTTParser::createDocumentFragmentFromCueText(const String& text) +// A helper class for the construction of a "cue fragment" from the cue text. +class WebVTTTreeBuilder { +public: + WebVTTTreeBuilder(Document& document) + : m_document(document) { } + + Ref<DocumentFragment> buildFromString(const String& cueText); + +private: + void constructTreeFromToken(Document&); + + WebVTTToken m_token; + RefPtr<ContainerNode> m_currentNode; + Vector<AtomicString> m_languageStack; + Document& m_document; +}; + +Ref<DocumentFragment> WebVTTTreeBuilder::buildFromString(const String& cueText) { // Cue text processing based on - // 4.8.10.13.4 WebVTT cue text parsing rules and - // 4.8.10.13.5 WebVTT cue text DOM construction rules. - - ASSERT(m_scriptExecutionContext->isDocument()); - Document* document = toDocument(m_scriptExecutionContext); - - RefPtr<DocumentFragment> fragment = DocumentFragment::create(*document); + // 5.4 WebVTT cue text parsing rules, and + // 5.5 WebVTT cue text DOM construction rules. + auto fragment = DocumentFragment::create(m_document); - if (text.isEmpty()) { - fragment->parserAppendChild(Text::create(*document, emptyString())); - return fragment.release(); + if (cueText.isEmpty()) { + fragment->parserAppendChild(Text::create(m_document, emptyString())); + return fragment; } - m_currentNode = fragment; - m_tokenizer->reset(); - m_token.clear(); - + m_currentNode = fragment.ptr(); + + WebVTTTokenizer tokenizer(cueText); m_languageStack.clear(); - SegmentedString content(text); - while (m_tokenizer->nextToken(content, m_token)) - constructTreeFromToken(document); + + while (tokenizer.nextToken(m_token)) + constructTreeFromToken(m_document); - return fragment.release(); + return fragment; } -void WebVTTParser::createNewCue() +Ref<DocumentFragment> WebVTTParser::createDocumentFragmentFromCueText(Document& document, const String& cueText) { - if (!m_currentContent.length()) - return; + WebVTTTreeBuilder treeBuilder(document); + return treeBuilder.buildFromString(cueText); +} +void WebVTTParser::createNewCue() +{ RefPtr<WebVTTCueData> cue = WebVTTCueData::create(); cue->setStartTime(m_currentStartTime); cue->setEndTime(m_currentEndTime); @@ -391,21 +416,21 @@ void WebVTTParser::resetCueValues() { m_currentId = emptyString(); m_currentSettings = emptyString(); - m_currentStartTime = 0; - m_currentEndTime = 0; + m_currentStartTime = MediaTime::zeroTime(); + m_currentEndTime = MediaTime::zeroTime(); m_currentContent.clear(); } -#if ENABLE(WEBVTT_REGIONS) -void WebVTTParser::createNewRegion() +void WebVTTParser::createNewRegion(const String& headerValue) { - if (!m_currentHeaderValue.length()) + if (headerValue.isEmpty()) return; - RefPtr<TextTrackRegion> region = TextTrackRegion::create(); - region->setRegionSettings(m_currentHeaderValue); + // Steps 12.5.1 - 12.5.9 - Construct and initialize a WebVTT Region object. + RefPtr<VTTRegion> region = VTTRegion::create(*m_scriptExecutionContext); + region->setRegionSettings(headerValue); - // 15.5.10 If the text track list of regions regions contains a region + // Step 12.5.10 If the text track list of regions regions contains a region // with the same region identifier value as region, remove that region. for (size_t i = 0; i < m_regionList.size(); ++i) if (m_regionList[i]->id() == region->id()) { @@ -413,73 +438,66 @@ void WebVTTParser::createNewRegion() break; } + // Step 12.5.11 m_regionList.append(region); } -#endif -double WebVTTParser::collectTimeStamp(const String& line, unsigned* position) +bool WebVTTParser::collectTimeStamp(const String& line, MediaTime& timeStamp) { - // 4.8.10.13.3 Collect a WebVTT timestamp. - // 1-4 - Initial checks, let most significant units be minutes. + if (line.isEmpty()) + return false; + + VTTScanner input(line); + return collectTimeStamp(input, timeStamp); +} + +bool WebVTTParser::collectTimeStamp(VTTScanner& input, MediaTime& timeStamp) +{ + // Collect a WebVTT timestamp (5.3 WebVTT cue timings and settings parsing.) + // Steps 1 - 4 - Initial checks, let most significant units be minutes. enum Mode { minutes, hours }; Mode mode = minutes; - if (*position >= line.length() || !isASCIIDigit(line[*position])) - return malformedTime; - // 5-6 - Collect a sequence of characters that are 0-9. - String digits1 = collectDigits(line, position); - int value1 = digits1.toInt(); - - // 7 - If not 2 characters or value is greater than 59, interpret as hours. - if (digits1.length() != 2 || value1 > 59) + // Steps 5 - 7 - Collect a sequence of characters that are 0-9. + // If not 2 characters or value is greater than 59, interpret as hours. + int value1; + unsigned value1Digits = input.scanDigits(value1); + if (!value1Digits) + return false; + if (value1Digits != 2 || value1 > 59) mode = hours; - // 8-12 - Collect the next sequence of 0-9 after ':' (must be 2 chars). - if (*position >= line.length() || line[(*position)++] != ':') - return malformedTime; - if (*position >= line.length() || !isASCIIDigit(line[(*position)])) - return malformedTime; - String digits2 = collectDigits(line, position); - int value2 = digits2.toInt(); - if (digits2.length() != 2) - return malformedTime; - - // 13 - Detect whether this timestamp includes hours. + // Steps 8 - 11 - Collect the next sequence of 0-9 after ':' (must be 2 chars). + int value2; + if (!input.scan(':') || input.scanDigits(value2) != 2) + return false; + + // Step 12 - Detect whether this timestamp includes hours. int value3; - if (mode == hours || (*position < line.length() && line[*position] == ':')) { - if (*position >= line.length() || line[(*position)++] != ':') - return malformedTime; - if (*position >= line.length() || !isASCIIDigit(line[*position])) - return malformedTime; - String digits3 = collectDigits(line, position); - if (digits3.length() != 2) - return malformedTime; - value3 = digits3.toInt(); + if (mode == hours || input.match(':')) { + if (!input.scan(':') || input.scanDigits(value3) != 2) + return false; } else { value3 = value2; value2 = value1; value1 = 0; } - // 14-19 - Collect next sequence of 0-9 after '.' (must be 3 chars). - if (*position >= line.length() || line[(*position)++] != '.') - return malformedTime; - if (*position >= line.length() || !isASCIIDigit(line[*position])) - return malformedTime; - String digits4 = collectDigits(line, position); - if (digits4.length() != 3) - return malformedTime; - int value4 = digits4.toInt(); + // Steps 13 - 17 - Collect next sequence of 0-9 after '.' (must be 3 chars). + int value4; + if (!input.scan('.') || input.scanDigits(value4) != 3) + return false; if (value2 > 59 || value3 > 59) - return malformedTime; + return false; - // 20-21 - Calculate result. - return (value1 * secondsPerHour) + (value2 * secondsPerMinute) + value3 + (value4 * secondsPerMillisecond); + // Steps 18 - 19 - Calculate result. + timeStamp = MediaTime::createWithDouble((value1 * secondsPerHour) + (value2 * secondsPerMinute) + value3 + (value4 * secondsPerMillisecond)); + return true; } static WebVTTNodeType tokenToNodeType(WebVTTToken& token) { - switch (token.name().size()) { + switch (token.name().length()) { case 1: if (token.name()[0] == 'c') return WebVTTNodeTypeClass; @@ -506,91 +524,77 @@ static WebVTTNodeType tokenToNodeType(WebVTTToken& token) return WebVTTNodeTypeNone; } -void WebVTTParser::constructTreeFromToken(Document* document) +void WebVTTTreeBuilder::constructTreeFromToken(Document& document) { - QualifiedName tagName(nullAtom, AtomicString(m_token.name()), xhtmlNamespaceURI); - // http://dev.w3.org/html5/webvtt/#webvtt-cue-text-dom-construction-rules switch (m_token.type()) { case WebVTTTokenTypes::Character: { - String content(m_token.characters()); // FIXME: This should be 8bit if possible. - RefPtr<Text> child = Text::create(*document, content); - m_currentNode->parserAppendChild(child); + m_currentNode->parserAppendChild(Text::create(document, m_token.characters())); break; } case WebVTTTokenTypes::StartTag: { - RefPtr<WebVTTElement> child; WebVTTNodeType nodeType = tokenToNodeType(m_token); - if (nodeType != WebVTTNodeTypeNone) - child = WebVTTElement::create(nodeType, *document); - if (child) { - if (m_token.classes().size() > 0) - child->setAttribute(classAttr, AtomicString(m_token.classes())); - - if (child->webVTTNodeType() == WebVTTNodeTypeVoice) - child->setAttribute(WebVTTElement::voiceAttributeName(), AtomicString(m_token.annotation())); - else if (child->webVTTNodeType() == WebVTTNodeTypeLanguage) { - m_languageStack.append(AtomicString(m_token.annotation())); - child->setAttribute(WebVTTElement::langAttributeName(), m_languageStack.last()); - } - if (!m_languageStack.isEmpty()) - child->setLanguage(m_languageStack.last()); - m_currentNode->parserAppendChild(child); - m_currentNode = child; + if (nodeType == WebVTTNodeTypeNone) + break; + + WebVTTNodeType currentType = is<WebVTTElement>(*m_currentNode) ? downcast<WebVTTElement>(*m_currentNode).webVTTNodeType() : WebVTTNodeTypeNone; + // <rt> is only allowed if the current node is <ruby>. + if (nodeType == WebVTTNodeTypeRubyText && currentType != WebVTTNodeTypeRuby) + break; + + auto child = WebVTTElement::create(nodeType, document); + if (!m_token.classes().isEmpty()) + child->setAttributeWithoutSynchronization(classAttr, m_token.classes()); + + if (nodeType == WebVTTNodeTypeVoice) + child->setAttributeWithoutSynchronization(WebVTTElement::voiceAttributeName(), m_token.annotation()); + else if (nodeType == WebVTTNodeTypeLanguage) { + m_languageStack.append(m_token.annotation()); + child->setAttributeWithoutSynchronization(WebVTTElement::langAttributeName(), m_languageStack.last()); } + if (!m_languageStack.isEmpty()) + child->setLanguage(m_languageStack.last()); + m_currentNode->parserAppendChild(child); + m_currentNode = WTFMove(child); break; } case WebVTTTokenTypes::EndTag: { WebVTTNodeType nodeType = tokenToNodeType(m_token); - if (nodeType != WebVTTNodeTypeNone) { - if (nodeType == WebVTTNodeTypeLanguage && m_currentNode->isWebVTTElement() && toWebVTTElement(m_currentNode.get())->webVTTNodeType() == WebVTTNodeTypeLanguage) - m_languageStack.removeLast(); - if (m_currentNode->parentNode()) - m_currentNode = m_currentNode->parentNode(); + if (nodeType == WebVTTNodeTypeNone) + break; + + // The only non-VTTElement would be the DocumentFragment root. (Text + // nodes and PIs will never appear as m_currentNode.) + if (!is<WebVTTElement>(*m_currentNode)) + break; + + WebVTTNodeType currentType = downcast<WebVTTElement>(*m_currentNode).webVTTNodeType(); + bool matchesCurrent = nodeType == currentType; + if (!matchesCurrent) { + // </ruby> auto-closes <rt> + if (currentType == WebVTTNodeTypeRubyText && nodeType == WebVTTNodeTypeRuby) { + if (m_currentNode->parentNode()) + m_currentNode = m_currentNode->parentNode(); + } else + break; } + if (nodeType == WebVTTNodeTypeLanguage) + m_languageStack.removeLast(); + if (m_currentNode->parentNode()) + m_currentNode = m_currentNode->parentNode(); break; } case WebVTTTokenTypes::TimestampTag: { - unsigned position = 0; - String charactersString(StringImpl::create8BitIfPossible(m_token.characters())); - double time = collectTimeStamp(charactersString, &position); - if (time != malformedTime) - m_currentNode->parserAppendChild(ProcessingInstruction::create(*document, "timestamp", charactersString)); + String charactersString = m_token.characters(); + MediaTime parsedTimeStamp; + if (WebVTTParser::collectTimeStamp(charactersString, parsedTimeStamp)) + m_currentNode->parserAppendChild(ProcessingInstruction::create(document, "timestamp", charactersString)); break; } default: break; } - m_token.clear(); -} - -void WebVTTParser::skipWhiteSpace(const String& line, unsigned* position) -{ - while (*position < line.length() && isASpace(line[*position])) - (*position)++; -} - -String WebVTTParser::collectNextLine(const char* data, unsigned length, unsigned* position) -{ - unsigned currentPosition = *position; - while (currentPosition < length && data[currentPosition] != '\r' && data[currentPosition] != '\n') - currentPosition++; - if (currentPosition >= length) - return String(); - String line = String::fromUTF8(data + *position , currentPosition - *position); - if (data[currentPosition] == '\r') - currentPosition++; - if (currentPosition < length && data[currentPosition] == '\n') - currentPosition++; - *position = currentPosition; - if (m_buffer.isEmpty()) - return line; - - String lineWithBuffer = String::fromUTF8(m_buffer.data(), m_buffer.size()); - lineWithBuffer.append(line); - m_buffer.clear(); - return lineWithBuffer; } } |