diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/loader/archive/mhtml | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/WebCore/loader/archive/mhtml')
-rw-r--r-- | Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp | 94 | ||||
-rw-r--r-- | Source/WebCore/loader/archive/mhtml/MHTMLArchive.h | 28 | ||||
-rw-r--r-- | Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp | 78 | ||||
-rw-r--r-- | Source/WebCore/loader/archive/mhtml/MHTMLParser.h | 16 |
4 files changed, 93 insertions, 123 deletions
diff --git a/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp b/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp index 4708bf11e..2ca67e27b 100644 --- a/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp +++ b/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp @@ -59,7 +59,6 @@ namespace WebCore { const char* const quotedPrintable = "quoted-printable"; const char* const base64 = "base64"; -const char* const binary = "binary"; static String generateRandomBoundary() { @@ -102,50 +101,40 @@ MHTMLArchive::~MHTMLArchive() clearAllSubframeArchives(); } -PassRefPtr<MHTMLArchive> MHTMLArchive::create() +Ref<MHTMLArchive> MHTMLArchive::create() { - return adoptRef(new MHTMLArchive); + return adoptRef(*new MHTMLArchive); } -PassRefPtr<MHTMLArchive> MHTMLArchive::create(const URL& url, SharedBuffer* data) +RefPtr<MHTMLArchive> MHTMLArchive::create(const URL& url, SharedBuffer& data) { // For security reasons we only load MHTML pages from local URLs. - if (!SchemeRegistry::shouldTreatURLSchemeAsLocal(url.protocol())) - return 0; + if (!SchemeRegistry::shouldTreatURLSchemeAsLocal(url.protocol().toString())) + return nullptr; - MHTMLParser parser(data); + MHTMLParser parser(&data); RefPtr<MHTMLArchive> mainArchive = parser.parseArchive(); if (!mainArchive) - return 0; // Invalid MHTML file. + return nullptr; // Invalid MHTML file. // Since MHTML is a flat format, we need to make all frames aware of all resources. for (size_t i = 0; i < parser.frameCount(); ++i) { RefPtr<MHTMLArchive> archive = parser.frameAt(i); for (size_t j = 1; j < parser.frameCount(); ++j) { if (i != j) - archive->addSubframeArchive(parser.frameAt(j)); + archive->addSubframeArchive(*parser.frameAt(j)); } for (size_t j = 0; j < parser.subResourceCount(); ++j) - archive->addSubresource(parser.subResourceAt(j)); + archive->addSubresource(*parser.subResourceAt(j)); } - return mainArchive.release(); + return mainArchive; } -PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page) -{ - return generateMHTMLData(page, false); -} - -PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(Page* page) -{ - return generateMHTMLData(page, true); -} - -PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page, bool useBinaryEncoding) +RefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page) { Vector<PageSerializer::Resource> resources; - PageSerializer pageSerializer(&resources); - pageSerializer.serialize(page); + PageSerializer pageSerializer(resources); + pageSerializer.serialize(*page); String boundary = generateRandomBoundary(); String endOfResourceBoundary = makeString("--", boundary, "\r\n"); @@ -176,18 +165,14 @@ PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page, bool useBin RefPtr<SharedBuffer> mhtmlData = SharedBuffer::create(); mhtmlData->append(asciiString.data(), asciiString.length()); - for (size_t i = 0; i < resources.size(); ++i) { - const PageSerializer::Resource& resource = resources[i]; - + for (auto& resource : resources) { stringBuilder.clear(); stringBuilder.append(endOfResourceBoundary); stringBuilder.append("Content-Type: "); stringBuilder.append(resource.mimeType); - const char* contentEncoding = 0; - if (useBinaryEncoding) - contentEncoding = binary; - else if (MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType)) + const char* contentEncoding = nullptr; + if (MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType)) contentEncoding = quotedPrintable; else contentEncoding = base64; @@ -201,36 +186,27 @@ PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page, bool useBin asciiString = stringBuilder.toString().utf8(); mhtmlData->append(asciiString.data(), asciiString.length()); - if (!strcmp(contentEncoding, binary)) { - const char* data; - size_t position = 0; - while (size_t length = resource.data->getSomeData(data, position)) { - mhtmlData->append(data, length); - position += length; - } + // FIXME: ideally we would encode the content as a stream without having to fetch it all. + const char* data = resource.data->data(); + size_t dataLength = resource.data->size(); + Vector<char> encodedData; + if (!strcmp(contentEncoding, quotedPrintable)) { + quotedPrintableEncode(data, dataLength, encodedData); + mhtmlData->append(encodedData.data(), encodedData.size()); + mhtmlData->append("\r\n", 2); } else { - // FIXME: ideally we would encode the content as a stream without having to fetch it all. - const char* data = resource.data->data(); - size_t dataLength = resource.data->size(); - Vector<char> encodedData; - if (!strcmp(contentEncoding, quotedPrintable)) { - quotedPrintableEncode(data, dataLength, encodedData); - mhtmlData->append(encodedData.data(), encodedData.size()); + ASSERT(!strcmp(contentEncoding, base64)); + // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs. + base64Encode(data, dataLength, encodedData); + const size_t maximumLineLength = 76; + size_t index = 0; + size_t encodedDataLength = encodedData.size(); + do { + size_t lineLength = std::min(encodedDataLength - index, maximumLineLength); + mhtmlData->append(encodedData.data() + index, lineLength); mhtmlData->append("\r\n", 2); - } else { - ASSERT(!strcmp(contentEncoding, base64)); - // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs. - base64Encode(data, dataLength, encodedData); - const size_t maximumLineLength = 76; - size_t index = 0; - size_t encodedDataLength = encodedData.size(); - do { - size_t lineLength = std::min(encodedDataLength - index, maximumLineLength); - mhtmlData->append(encodedData.data() + index, lineLength); - mhtmlData->append("\r\n", 2); - index += maximumLineLength; - } while (index < encodedDataLength); - } + index += maximumLineLength; + } while (index < encodedDataLength); } } diff --git a/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h b/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h index 14cd87572..8524659aa 100644 --- a/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h +++ b/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h @@ -28,8 +28,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef MHTMLArchive_h -#define MHTMLArchive_h +#pragma once #if ENABLE(MHTML) @@ -41,27 +40,26 @@ class MHTMLParser; class Page; class SharedBuffer; -class MHTMLArchive : public Archive { +class MHTMLArchive final : public Archive { public: - virtual Type type() const { return MHTML; } + static Ref<MHTMLArchive> create(); + static RefPtr<MHTMLArchive> create(const URL&, SharedBuffer&); - static PassRefPtr<MHTMLArchive> create(); - static PassRefPtr<MHTMLArchive> create(const URL&, SharedBuffer*); - - static PassRefPtr<SharedBuffer> generateMHTMLData(Page*); - // Binary encoding results in smaller MHTML files but they might not work in other browsers. - static PassRefPtr<SharedBuffer> generateMHTMLDataUsingBinaryEncoding(Page*); + static RefPtr<SharedBuffer> generateMHTMLData(Page*); virtual ~MHTMLArchive(); private: - static PassRefPtr<SharedBuffer> generateMHTMLData(Page*, bool useBinaryEncoding); - friend class MHTMLParser; + MHTMLArchive(); + + bool shouldLoadFromArchiveOnly() const final { return true; } + bool shouldOverrideBaseURL() const final { return true; } + bool shouldUseMainResourceEncoding() const final { return false; } + bool shouldUseMainResourceURL() const final { return false; } }; -} +} // namespace WebCore -#endif -#endif +#endif // ENABLE(MHTML) diff --git a/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp b/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp index d8760f9ed..b0414ebc9 100644 --- a/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp +++ b/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp @@ -37,7 +37,6 @@ #include "MIMEHeader.h" #include "MIMETypeRegistry.h" #include "QuotedPrintable.h" -#include <wtf/HashMap.h> #include <wtf/text/Base64.h> namespace WebCore { @@ -57,17 +56,16 @@ MHTMLParser::MHTMLParser(SharedBuffer* data) { } -PassRefPtr<MHTMLArchive> MHTMLParser::parseArchive() +RefPtr<MHTMLArchive> MHTMLParser::parseArchive() { - RefPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReader); - return parseArchiveWithHeader(header.get()); + return parseArchiveWithHeader(MIMEHeader::parseHeader(m_lineReader).get()); } -PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header) +RefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header) { if (!header) { LOG_ERROR("Failed to parse MHTML part: no header."); - return 0; + return nullptr; } RefPtr<MHTMLArchive> archive = MHTMLArchive::create(); @@ -76,8 +74,8 @@ PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header) bool endOfArchiveReached = false; RefPtr<ArchiveResource> resource = parseNextPart(*header, String(), String(), endOfArchiveReached); if (!resource) - return 0; - archive->setMainResource(resource); + return nullptr; + archive->setMainResource(resource.releaseNonNull()); return archive; } @@ -86,31 +84,31 @@ PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header) bool endOfArchive = false; while (!endOfArchive) { - RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReader); + RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(m_lineReader); if (!resourceHeader) { LOG_ERROR("Failed to parse MHTML, invalid MIME header."); - return 0; + return nullptr; } if (resourceHeader->contentType() == "multipart/alternative") { // Ignore IE nesting which makes little sense (IE seems to nest only some of the frames). RefPtr<MHTMLArchive> subframeArchive = parseArchiveWithHeader(resourceHeader.get()); if (!subframeArchive) { LOG_ERROR("Failed to parse MHTML subframe."); - return 0; + return nullptr; } bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary()); ASSERT_UNUSED(endOfPartReached, endOfPartReached); // The top-frame is the first frame found, regardless of the nesting level. if (subframeArchive->mainResource()) addResourceToArchive(subframeArchive->mainResource(), archive.get()); - archive->addSubframeArchive(subframeArchive); + archive->addSubframeArchive(subframeArchive.releaseNonNull()); continue; } RefPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive); if (!resource) { LOG_ERROR("Failed to parse MHTML part."); - return 0; + return nullptr; } addResourceToArchive(resource.get(), archive.get()); } @@ -128,17 +126,17 @@ void MHTMLParser::addResourceToArchive(ArchiveResource* resource, MHTMLArchive* // The first document suitable resource is the main frame. if (!archive->mainResource()) { - archive->setMainResource(resource); + archive->setMainResource(*resource); m_frames.append(archive); return; } RefPtr<MHTMLArchive> subframe = MHTMLArchive::create(); - subframe->setMainResource(resource); + subframe->setMainResource(*resource); m_frames.append(subframe); } -PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached) +RefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached) { ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty()); @@ -148,31 +146,31 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea if (mimeHeader.contentTransferEncoding() == MIMEHeader::Binary) { if (!checkBoundary) { LOG_ERROR("Binary contents requires end of part"); - return 0; + return nullptr; } m_lineReader.setSeparator(endOfPartBoundary.utf8().data()); Vector<char> part; if (!m_lineReader.nextChunk(part)) { LOG_ERROR("Binary contents requires end of part"); - return 0; - } - content->append(part); - m_lineReader.setSeparator("\r\n"); - Vector<char> nextChars; - if (m_lineReader.peek(nextChars, 2) != 2) { - LOG_ERROR("Invalid seperator."); - return 0; - } - endOfPartReached = true; - ASSERT(nextChars.size() == 2); - endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-'); - if (!endOfArchiveReached) { - String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback(); - if (!line.isEmpty()) { - LOG_ERROR("No CRLF at end of binary section."); - return 0; - } - } + return nullptr; + } + content->append(part); + m_lineReader.setSeparator("\r\n"); + Vector<char> nextChars; + if (m_lineReader.peek(nextChars, 2) != 2) { + LOG_ERROR("Invalid seperator."); + return nullptr; + } + endOfPartReached = true; + ASSERT(nextChars.size() == 2); + endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-'); + if (!endOfArchiveReached) { + String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback(); + if (!line.isEmpty()) { + LOG_ERROR("No CRLF at end of binary section."); + return nullptr; + } + } } else { String line; while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) { @@ -191,7 +189,7 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea } if (!endOfPartReached && checkBoundary) { LOG_ERROR("No bounday found for MHTML part."); - return 0; + return nullptr; } Vector<char> data; @@ -199,7 +197,7 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea case MIMEHeader::Base64: if (!base64Decode(content->data(), content->size(), data)) { LOG_ERROR("Invalid base64 content for MHTML part."); - return 0; + return nullptr; } break; case MIMEHeader::QuotedPrintable: @@ -211,14 +209,14 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea break; default: LOG_ERROR("Invalid encoding for MHTML part."); - return 0; + return nullptr; } RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data); // FIXME: the URL in the MIME header could be relative, we should resolve it if it is. // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rfc2557#section-5 // IE and Firefox (UNMht) seem to generate only absolute URLs. URL location = URL(URL(), mimeHeader.contentLocation()); - return ArchiveResource::create(contentBuffer, location, mimeHeader.contentType(), mimeHeader.charset(), String()); + return ArchiveResource::create(WTFMove(contentBuffer), location, mimeHeader.contentType(), mimeHeader.charset(), String()); } size_t MHTMLParser::frameCount() const diff --git a/Source/WebCore/loader/archive/mhtml/MHTMLParser.h b/Source/WebCore/loader/archive/mhtml/MHTMLParser.h index 4f1b126bc..1e9bd4d77 100644 --- a/Source/WebCore/loader/archive/mhtml/MHTMLParser.h +++ b/Source/WebCore/loader/archive/mhtml/MHTMLParser.h @@ -28,10 +28,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef MHTMLParser_h -#define MHTMLParser_h +#pragma once #if ENABLE(MHTML) + #include "SharedBufferChunkReader.h" #include <wtf/RefPtr.h> #include <wtf/text/WTFString.h> @@ -48,7 +48,7 @@ class MHTMLParser { public: explicit MHTMLParser(SharedBuffer*); - PassRefPtr<MHTMLArchive> parseArchive(); + RefPtr<MHTMLArchive> parseArchive(); size_t frameCount() const; MHTMLArchive* frameAt(size_t) const; @@ -57,8 +57,8 @@ public: ArchiveResource* subResourceAt(size_t) const; private: - PassRefPtr<MHTMLArchive> parseArchiveWithHeader(MIMEHeader*); - PassRefPtr<ArchiveResource> parseNextPart(const MIMEHeader&, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached); + RefPtr<MHTMLArchive> parseArchiveWithHeader(MIMEHeader*); + RefPtr<ArchiveResource> parseNextPart(const MIMEHeader&, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached); void addResourceToArchive(ArchiveResource*, MHTMLArchive*); @@ -67,8 +67,6 @@ private: Vector<RefPtr<MHTMLArchive>> m_frames; }; -} - -#endif -#endif +} // namespace WebCore +#endif // ENABLE(MHTML) |