diff options
Diffstat (limited to 'Source/WebCore/dom/TextDecoder.cpp')
-rw-r--r-- | Source/WebCore/dom/TextDecoder.cpp | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/Source/WebCore/dom/TextDecoder.cpp b/Source/WebCore/dom/TextDecoder.cpp new file mode 100644 index 000000000..96cc739fc --- /dev/null +++ b/Source/WebCore/dom/TextDecoder.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "TextDecoder.h" + +#include "ExceptionCode.h" +#include "HTMLParserIdioms.h" + +namespace WebCore { + +ExceptionOr<Ref<TextDecoder>> TextDecoder::create(const String& label, Options options) +{ + String strippedLabel = stripLeadingAndTrailingHTMLSpaces(label); + const UChar nullCharacter = '\0'; + if (strippedLabel.contains(nullCharacter)) + return Exception { RangeError }; + auto decoder = adoptRef(*new TextDecoder(strippedLabel.utf8().data(), options)); + if (!decoder->m_textEncoding.isValid() || !strcmp(decoder->m_textEncoding.name(), "replacement")) + return Exception { RangeError }; + return WTFMove(decoder); +} + +TextDecoder::TextDecoder(const char* label, Options options) + : m_textEncoding(label) + , m_options(options) +{ +} + +void TextDecoder::ignoreBOMIfNecessary(const uint8_t*& data, size_t& length) +{ + const uint8_t utf8BOMBytes[3] = {0xEF, 0xBB, 0xBF}; + const uint8_t utf16BEBOMBytes[2] = {0xFE, 0xFF}; + const uint8_t utf16LEBOMBytes[2] = {0xFF, 0xFE}; + + if (m_textEncoding == UTF8Encoding() + && length >= sizeof(utf8BOMBytes) + && data[0] == utf8BOMBytes[0] + && data[1] == utf8BOMBytes[1] + && data[2] == utf8BOMBytes[2]) { + data += sizeof(utf8BOMBytes); + length -= sizeof(utf8BOMBytes); + } else if (m_textEncoding == UTF16BigEndianEncoding() + && length >= sizeof(utf16BEBOMBytes) + && data[0] == utf16BEBOMBytes[0] + && data[1] == utf16BEBOMBytes[1]) { + data += sizeof(utf16BEBOMBytes); + length -= sizeof(utf16BEBOMBytes); + } else if (m_textEncoding == UTF16LittleEndianEncoding() + && length >= sizeof(utf16LEBOMBytes) + && data[0] == utf16LEBOMBytes[0] + && data[1] == utf16LEBOMBytes[1]) { + data += sizeof(utf16LEBOMBytes); + length -= sizeof(utf16LEBOMBytes); + } +} + +String TextDecoder::prependBOMIfNecessary(const String& decoded) +{ + if (m_hasDecoded || !m_options.ignoreBOM) + return decoded; + const UChar utf16BEBOM[2] = {0xFEFF, '\0'}; + + // FIXME: Make TextCodec::decode take a flag for prepending BOM so we don't need to do this extra allocation and copy. + return makeString(utf16BEBOM, decoded); +} + +static size_t codeUnitByteSize(const TextEncoding& encoding) +{ + if (encoding.isByteBasedEncoding()) + return 1; + if (encoding == UTF32BigEndianEncoding() || encoding == UTF32LittleEndianEncoding()) + return 4; + return 2; +} + +ExceptionOr<String> TextDecoder::decode(std::optional<BufferSource::VariantType> input, DecodeOptions options) +{ + std::optional<BufferSource> inputBuffer; + const uint8_t* data = nullptr; + size_t length = 0; + if (input) { + inputBuffer = BufferSource(WTFMove(input.value())); + data = inputBuffer->data(); + length = inputBuffer->length(); + } + + ignoreBOMIfNecessary(data, length); + + if (m_buffer.size()) { + m_buffer.append(data, length); + data = m_buffer.data(); + length = m_buffer.size(); + } + + const bool stopOnError = true; + bool sawError = false; + if (length % codeUnitByteSize(m_textEncoding)) + sawError = true; + const char* charData = reinterpret_cast<const char*>(data); + String result; + if (!sawError) + result = prependBOMIfNecessary(m_textEncoding.decode(charData, length, stopOnError, sawError)); + + if (sawError) { + if (options.stream) { + result = String(); + if (!m_buffer.size()) + m_buffer.append(data, length); + } else { + if (m_options.fatal) + return Exception { TypeError }; + result = prependBOMIfNecessary(m_textEncoding.decode(charData, length)); + } + } else + m_buffer.clear(); + + m_hasDecoded = true; + return WTFMove(result); +} + +String TextDecoder::encoding() const +{ + return String(m_textEncoding.name()).convertToASCIILowercase(); +} + +} |