summaryrefslogtreecommitdiff
path: root/Source/WebCore/loader/archive/mhtml
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/loader/archive/mhtml
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/WebCore/loader/archive/mhtml')
-rw-r--r--Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp94
-rw-r--r--Source/WebCore/loader/archive/mhtml/MHTMLArchive.h28
-rw-r--r--Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp78
-rw-r--r--Source/WebCore/loader/archive/mhtml/MHTMLParser.h16
4 files changed, 93 insertions, 123 deletions
diff --git a/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp b/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp
index 4708bf11e..2ca67e27b 100644
--- a/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp
+++ b/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp
@@ -59,7 +59,6 @@ namespace WebCore {
const char* const quotedPrintable = "quoted-printable";
const char* const base64 = "base64";
-const char* const binary = "binary";
static String generateRandomBoundary()
{
@@ -102,50 +101,40 @@ MHTMLArchive::~MHTMLArchive()
clearAllSubframeArchives();
}
-PassRefPtr<MHTMLArchive> MHTMLArchive::create()
+Ref<MHTMLArchive> MHTMLArchive::create()
{
- return adoptRef(new MHTMLArchive);
+ return adoptRef(*new MHTMLArchive);
}
-PassRefPtr<MHTMLArchive> MHTMLArchive::create(const URL& url, SharedBuffer* data)
+RefPtr<MHTMLArchive> MHTMLArchive::create(const URL& url, SharedBuffer& data)
{
// For security reasons we only load MHTML pages from local URLs.
- if (!SchemeRegistry::shouldTreatURLSchemeAsLocal(url.protocol()))
- return 0;
+ if (!SchemeRegistry::shouldTreatURLSchemeAsLocal(url.protocol().toString()))
+ return nullptr;
- MHTMLParser parser(data);
+ MHTMLParser parser(&data);
RefPtr<MHTMLArchive> mainArchive = parser.parseArchive();
if (!mainArchive)
- return 0; // Invalid MHTML file.
+ return nullptr; // Invalid MHTML file.
// Since MHTML is a flat format, we need to make all frames aware of all resources.
for (size_t i = 0; i < parser.frameCount(); ++i) {
RefPtr<MHTMLArchive> archive = parser.frameAt(i);
for (size_t j = 1; j < parser.frameCount(); ++j) {
if (i != j)
- archive->addSubframeArchive(parser.frameAt(j));
+ archive->addSubframeArchive(*parser.frameAt(j));
}
for (size_t j = 0; j < parser.subResourceCount(); ++j)
- archive->addSubresource(parser.subResourceAt(j));
+ archive->addSubresource(*parser.subResourceAt(j));
}
- return mainArchive.release();
+ return mainArchive;
}
-PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page)
-{
- return generateMHTMLData(page, false);
-}
-
-PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(Page* page)
-{
- return generateMHTMLData(page, true);
-}
-
-PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page, bool useBinaryEncoding)
+RefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page)
{
Vector<PageSerializer::Resource> resources;
- PageSerializer pageSerializer(&resources);
- pageSerializer.serialize(page);
+ PageSerializer pageSerializer(resources);
+ pageSerializer.serialize(*page);
String boundary = generateRandomBoundary();
String endOfResourceBoundary = makeString("--", boundary, "\r\n");
@@ -176,18 +165,14 @@ PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page, bool useBin
RefPtr<SharedBuffer> mhtmlData = SharedBuffer::create();
mhtmlData->append(asciiString.data(), asciiString.length());
- for (size_t i = 0; i < resources.size(); ++i) {
- const PageSerializer::Resource& resource = resources[i];
-
+ for (auto& resource : resources) {
stringBuilder.clear();
stringBuilder.append(endOfResourceBoundary);
stringBuilder.append("Content-Type: ");
stringBuilder.append(resource.mimeType);
- const char* contentEncoding = 0;
- if (useBinaryEncoding)
- contentEncoding = binary;
- else if (MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType))
+ const char* contentEncoding = nullptr;
+ if (MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType))
contentEncoding = quotedPrintable;
else
contentEncoding = base64;
@@ -201,36 +186,27 @@ PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page, bool useBin
asciiString = stringBuilder.toString().utf8();
mhtmlData->append(asciiString.data(), asciiString.length());
- if (!strcmp(contentEncoding, binary)) {
- const char* data;
- size_t position = 0;
- while (size_t length = resource.data->getSomeData(data, position)) {
- mhtmlData->append(data, length);
- position += length;
- }
+ // FIXME: ideally we would encode the content as a stream without having to fetch it all.
+ const char* data = resource.data->data();
+ size_t dataLength = resource.data->size();
+ Vector<char> encodedData;
+ if (!strcmp(contentEncoding, quotedPrintable)) {
+ quotedPrintableEncode(data, dataLength, encodedData);
+ mhtmlData->append(encodedData.data(), encodedData.size());
+ mhtmlData->append("\r\n", 2);
} else {
- // FIXME: ideally we would encode the content as a stream without having to fetch it all.
- const char* data = resource.data->data();
- size_t dataLength = resource.data->size();
- Vector<char> encodedData;
- if (!strcmp(contentEncoding, quotedPrintable)) {
- quotedPrintableEncode(data, dataLength, encodedData);
- mhtmlData->append(encodedData.data(), encodedData.size());
+ ASSERT(!strcmp(contentEncoding, base64));
+ // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
+ base64Encode(data, dataLength, encodedData);
+ const size_t maximumLineLength = 76;
+ size_t index = 0;
+ size_t encodedDataLength = encodedData.size();
+ do {
+ size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
+ mhtmlData->append(encodedData.data() + index, lineLength);
mhtmlData->append("\r\n", 2);
- } else {
- ASSERT(!strcmp(contentEncoding, base64));
- // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
- base64Encode(data, dataLength, encodedData);
- const size_t maximumLineLength = 76;
- size_t index = 0;
- size_t encodedDataLength = encodedData.size();
- do {
- size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
- mhtmlData->append(encodedData.data() + index, lineLength);
- mhtmlData->append("\r\n", 2);
- index += maximumLineLength;
- } while (index < encodedDataLength);
- }
+ index += maximumLineLength;
+ } while (index < encodedDataLength);
}
}
diff --git a/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h b/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h
index 14cd87572..8524659aa 100644
--- a/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h
+++ b/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h
@@ -28,8 +28,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef MHTMLArchive_h
-#define MHTMLArchive_h
+#pragma once
#if ENABLE(MHTML)
@@ -41,27 +40,26 @@ class MHTMLParser;
class Page;
class SharedBuffer;
-class MHTMLArchive : public Archive {
+class MHTMLArchive final : public Archive {
public:
- virtual Type type() const { return MHTML; }
+ static Ref<MHTMLArchive> create();
+ static RefPtr<MHTMLArchive> create(const URL&, SharedBuffer&);
- static PassRefPtr<MHTMLArchive> create();
- static PassRefPtr<MHTMLArchive> create(const URL&, SharedBuffer*);
-
- static PassRefPtr<SharedBuffer> generateMHTMLData(Page*);
- // Binary encoding results in smaller MHTML files but they might not work in other browsers.
- static PassRefPtr<SharedBuffer> generateMHTMLDataUsingBinaryEncoding(Page*);
+ static RefPtr<SharedBuffer> generateMHTMLData(Page*);
virtual ~MHTMLArchive();
private:
- static PassRefPtr<SharedBuffer> generateMHTMLData(Page*, bool useBinaryEncoding);
-
friend class MHTMLParser;
+
MHTMLArchive();
+
+ bool shouldLoadFromArchiveOnly() const final { return true; }
+ bool shouldOverrideBaseURL() const final { return true; }
+ bool shouldUseMainResourceEncoding() const final { return false; }
+ bool shouldUseMainResourceURL() const final { return false; }
};
-}
+} // namespace WebCore
-#endif
-#endif
+#endif // ENABLE(MHTML)
diff --git a/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp b/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp
index d8760f9ed..b0414ebc9 100644
--- a/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp
+++ b/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp
@@ -37,7 +37,6 @@
#include "MIMEHeader.h"
#include "MIMETypeRegistry.h"
#include "QuotedPrintable.h"
-#include <wtf/HashMap.h>
#include <wtf/text/Base64.h>
namespace WebCore {
@@ -57,17 +56,16 @@ MHTMLParser::MHTMLParser(SharedBuffer* data)
{
}
-PassRefPtr<MHTMLArchive> MHTMLParser::parseArchive()
+RefPtr<MHTMLArchive> MHTMLParser::parseArchive()
{
- RefPtr<MIMEHeader> header = MIMEHeader::parseHeader(&m_lineReader);
- return parseArchiveWithHeader(header.get());
+ return parseArchiveWithHeader(MIMEHeader::parseHeader(m_lineReader).get());
}
-PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header)
+RefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header)
{
if (!header) {
LOG_ERROR("Failed to parse MHTML part: no header.");
- return 0;
+ return nullptr;
}
RefPtr<MHTMLArchive> archive = MHTMLArchive::create();
@@ -76,8 +74,8 @@ PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header)
bool endOfArchiveReached = false;
RefPtr<ArchiveResource> resource = parseNextPart(*header, String(), String(), endOfArchiveReached);
if (!resource)
- return 0;
- archive->setMainResource(resource);
+ return nullptr;
+ archive->setMainResource(resource.releaseNonNull());
return archive;
}
@@ -86,31 +84,31 @@ PassRefPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header)
bool endOfArchive = false;
while (!endOfArchive) {
- RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReader);
+ RefPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(m_lineReader);
if (!resourceHeader) {
LOG_ERROR("Failed to parse MHTML, invalid MIME header.");
- return 0;
+ return nullptr;
}
if (resourceHeader->contentType() == "multipart/alternative") {
// Ignore IE nesting which makes little sense (IE seems to nest only some of the frames).
RefPtr<MHTMLArchive> subframeArchive = parseArchiveWithHeader(resourceHeader.get());
if (!subframeArchive) {
LOG_ERROR("Failed to parse MHTML subframe.");
- return 0;
+ return nullptr;
}
bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());
ASSERT_UNUSED(endOfPartReached, endOfPartReached);
// The top-frame is the first frame found, regardless of the nesting level.
if (subframeArchive->mainResource())
addResourceToArchive(subframeArchive->mainResource(), archive.get());
- archive->addSubframeArchive(subframeArchive);
+ archive->addSubframeArchive(subframeArchive.releaseNonNull());
continue;
}
RefPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive);
if (!resource) {
LOG_ERROR("Failed to parse MHTML part.");
- return 0;
+ return nullptr;
}
addResourceToArchive(resource.get(), archive.get());
}
@@ -128,17 +126,17 @@ void MHTMLParser::addResourceToArchive(ArchiveResource* resource, MHTMLArchive*
// The first document suitable resource is the main frame.
if (!archive->mainResource()) {
- archive->setMainResource(resource);
+ archive->setMainResource(*resource);
m_frames.append(archive);
return;
}
RefPtr<MHTMLArchive> subframe = MHTMLArchive::create();
- subframe->setMainResource(resource);
+ subframe->setMainResource(*resource);
m_frames.append(subframe);
}
-PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached)
+RefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached)
{
ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty());
@@ -148,31 +146,31 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea
if (mimeHeader.contentTransferEncoding() == MIMEHeader::Binary) {
if (!checkBoundary) {
LOG_ERROR("Binary contents requires end of part");
- return 0;
+ return nullptr;
}
m_lineReader.setSeparator(endOfPartBoundary.utf8().data());
Vector<char> part;
if (!m_lineReader.nextChunk(part)) {
LOG_ERROR("Binary contents requires end of part");
- return 0;
- }
- content->append(part);
- m_lineReader.setSeparator("\r\n");
- Vector<char> nextChars;
- if (m_lineReader.peek(nextChars, 2) != 2) {
- LOG_ERROR("Invalid seperator.");
- return 0;
- }
- endOfPartReached = true;
- ASSERT(nextChars.size() == 2);
- endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
- if (!endOfArchiveReached) {
- String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback();
- if (!line.isEmpty()) {
- LOG_ERROR("No CRLF at end of binary section.");
- return 0;
- }
- }
+ return nullptr;
+ }
+ content->append(part);
+ m_lineReader.setSeparator("\r\n");
+ Vector<char> nextChars;
+ if (m_lineReader.peek(nextChars, 2) != 2) {
+ LOG_ERROR("Invalid seperator.");
+ return nullptr;
+ }
+ endOfPartReached = true;
+ ASSERT(nextChars.size() == 2);
+ endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
+ if (!endOfArchiveReached) {
+ String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback();
+ if (!line.isEmpty()) {
+ LOG_ERROR("No CRLF at end of binary section.");
+ return nullptr;
+ }
+ }
} else {
String line;
while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
@@ -191,7 +189,7 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea
}
if (!endOfPartReached && checkBoundary) {
LOG_ERROR("No bounday found for MHTML part.");
- return 0;
+ return nullptr;
}
Vector<char> data;
@@ -199,7 +197,7 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea
case MIMEHeader::Base64:
if (!base64Decode(content->data(), content->size(), data)) {
LOG_ERROR("Invalid base64 content for MHTML part.");
- return 0;
+ return nullptr;
}
break;
case MIMEHeader::QuotedPrintable:
@@ -211,14 +209,14 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea
break;
default:
LOG_ERROR("Invalid encoding for MHTML part.");
- return 0;
+ return nullptr;
}
RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data);
// FIXME: the URL in the MIME header could be relative, we should resolve it if it is.
// The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rfc2557#section-5
// IE and Firefox (UNMht) seem to generate only absolute URLs.
URL location = URL(URL(), mimeHeader.contentLocation());
- return ArchiveResource::create(contentBuffer, location, mimeHeader.contentType(), mimeHeader.charset(), String());
+ return ArchiveResource::create(WTFMove(contentBuffer), location, mimeHeader.contentType(), mimeHeader.charset(), String());
}
size_t MHTMLParser::frameCount() const
diff --git a/Source/WebCore/loader/archive/mhtml/MHTMLParser.h b/Source/WebCore/loader/archive/mhtml/MHTMLParser.h
index 4f1b126bc..1e9bd4d77 100644
--- a/Source/WebCore/loader/archive/mhtml/MHTMLParser.h
+++ b/Source/WebCore/loader/archive/mhtml/MHTMLParser.h
@@ -28,10 +28,10 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef MHTMLParser_h
-#define MHTMLParser_h
+#pragma once
#if ENABLE(MHTML)
+
#include "SharedBufferChunkReader.h"
#include <wtf/RefPtr.h>
#include <wtf/text/WTFString.h>
@@ -48,7 +48,7 @@ class MHTMLParser {
public:
explicit MHTMLParser(SharedBuffer*);
- PassRefPtr<MHTMLArchive> parseArchive();
+ RefPtr<MHTMLArchive> parseArchive();
size_t frameCount() const;
MHTMLArchive* frameAt(size_t) const;
@@ -57,8 +57,8 @@ public:
ArchiveResource* subResourceAt(size_t) const;
private:
- PassRefPtr<MHTMLArchive> parseArchiveWithHeader(MIMEHeader*);
- PassRefPtr<ArchiveResource> parseNextPart(const MIMEHeader&, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached);
+ RefPtr<MHTMLArchive> parseArchiveWithHeader(MIMEHeader*);
+ RefPtr<ArchiveResource> parseNextPart(const MIMEHeader&, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached);
void addResourceToArchive(ArchiveResource*, MHTMLArchive*);
@@ -67,8 +67,6 @@ private:
Vector<RefPtr<MHTMLArchive>> m_frames;
};
-}
-
-#endif
-#endif
+} // namespace WebCore
+#endif // ENABLE(MHTML)