summaryrefslogtreecommitdiff
path: root/Source/WebCore/dom/TextDecoder.cpp
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/dom/TextDecoder.cpp
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/WebCore/dom/TextDecoder.cpp')
-rw-r--r--Source/WebCore/dom/TextDecoder.cpp148
1 files changed, 148 insertions, 0 deletions
diff --git a/Source/WebCore/dom/TextDecoder.cpp b/Source/WebCore/dom/TextDecoder.cpp
new file mode 100644
index 000000000..96cc739fc
--- /dev/null
+++ b/Source/WebCore/dom/TextDecoder.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2016 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextDecoder.h"
+
+#include "ExceptionCode.h"
+#include "HTMLParserIdioms.h"
+
+namespace WebCore {
+
+ExceptionOr<Ref<TextDecoder>> TextDecoder::create(const String& label, Options options)
+{
+ String strippedLabel = stripLeadingAndTrailingHTMLSpaces(label);
+ const UChar nullCharacter = '\0';
+ if (strippedLabel.contains(nullCharacter))
+ return Exception { RangeError };
+ auto decoder = adoptRef(*new TextDecoder(strippedLabel.utf8().data(), options));
+ if (!decoder->m_textEncoding.isValid() || !strcmp(decoder->m_textEncoding.name(), "replacement"))
+ return Exception { RangeError };
+ return WTFMove(decoder);
+}
+
+TextDecoder::TextDecoder(const char* label, Options options)
+ : m_textEncoding(label)
+ , m_options(options)
+{
+}
+
+void TextDecoder::ignoreBOMIfNecessary(const uint8_t*& data, size_t& length)
+{
+ const uint8_t utf8BOMBytes[3] = {0xEF, 0xBB, 0xBF};
+ const uint8_t utf16BEBOMBytes[2] = {0xFE, 0xFF};
+ const uint8_t utf16LEBOMBytes[2] = {0xFF, 0xFE};
+
+ if (m_textEncoding == UTF8Encoding()
+ && length >= sizeof(utf8BOMBytes)
+ && data[0] == utf8BOMBytes[0]
+ && data[1] == utf8BOMBytes[1]
+ && data[2] == utf8BOMBytes[2]) {
+ data += sizeof(utf8BOMBytes);
+ length -= sizeof(utf8BOMBytes);
+ } else if (m_textEncoding == UTF16BigEndianEncoding()
+ && length >= sizeof(utf16BEBOMBytes)
+ && data[0] == utf16BEBOMBytes[0]
+ && data[1] == utf16BEBOMBytes[1]) {
+ data += sizeof(utf16BEBOMBytes);
+ length -= sizeof(utf16BEBOMBytes);
+ } else if (m_textEncoding == UTF16LittleEndianEncoding()
+ && length >= sizeof(utf16LEBOMBytes)
+ && data[0] == utf16LEBOMBytes[0]
+ && data[1] == utf16LEBOMBytes[1]) {
+ data += sizeof(utf16LEBOMBytes);
+ length -= sizeof(utf16LEBOMBytes);
+ }
+}
+
+String TextDecoder::prependBOMIfNecessary(const String& decoded)
+{
+ if (m_hasDecoded || !m_options.ignoreBOM)
+ return decoded;
+ const UChar utf16BEBOM[2] = {0xFEFF, '\0'};
+
+ // FIXME: Make TextCodec::decode take a flag for prepending BOM so we don't need to do this extra allocation and copy.
+ return makeString(utf16BEBOM, decoded);
+}
+
+static size_t codeUnitByteSize(const TextEncoding& encoding)
+{
+ if (encoding.isByteBasedEncoding())
+ return 1;
+ if (encoding == UTF32BigEndianEncoding() || encoding == UTF32LittleEndianEncoding())
+ return 4;
+ return 2;
+}
+
+ExceptionOr<String> TextDecoder::decode(std::optional<BufferSource::VariantType> input, DecodeOptions options)
+{
+ std::optional<BufferSource> inputBuffer;
+ const uint8_t* data = nullptr;
+ size_t length = 0;
+ if (input) {
+ inputBuffer = BufferSource(WTFMove(input.value()));
+ data = inputBuffer->data();
+ length = inputBuffer->length();
+ }
+
+ ignoreBOMIfNecessary(data, length);
+
+ if (m_buffer.size()) {
+ m_buffer.append(data, length);
+ data = m_buffer.data();
+ length = m_buffer.size();
+ }
+
+ const bool stopOnError = true;
+ bool sawError = false;
+ if (length % codeUnitByteSize(m_textEncoding))
+ sawError = true;
+ const char* charData = reinterpret_cast<const char*>(data);
+ String result;
+ if (!sawError)
+ result = prependBOMIfNecessary(m_textEncoding.decode(charData, length, stopOnError, sawError));
+
+ if (sawError) {
+ if (options.stream) {
+ result = String();
+ if (!m_buffer.size())
+ m_buffer.append(data, length);
+ } else {
+ if (m_options.fatal)
+ return Exception { TypeError };
+ result = prependBOMIfNecessary(m_textEncoding.decode(charData, length));
+ }
+ } else
+ m_buffer.clear();
+
+ m_hasDecoded = true;
+ return WTFMove(result);
+}
+
+String TextDecoder::encoding() const
+{
+ return String(m_textEncoding.name()).convertToASCIILowercase();
+}
+
+}