summaryrefslogtreecommitdiff
path: root/Source/WebKit/chromium/src/WebPageSerializer.cpp
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@digia.com>2013-09-13 12:51:20 +0200
committerThe Qt Project <gerrit-noreply@qt-project.org>2013-09-19 20:50:05 +0200
commitd441d6f39bb846989d95bcf5caf387b42414718d (patch)
treee367e64a75991c554930278175d403c072de6bb8 /Source/WebKit/chromium/src/WebPageSerializer.cpp
parent0060b2994c07842f4c59de64b5e3e430525c4b90 (diff)
downloadqtwebkit-d441d6f39bb846989d95bcf5caf387b42414718d.tar.gz
Import Qt5x2 branch of QtWebkit for Qt 5.2
Importing a new snapshot of webkit. Change-Id: I2d01ad12cdc8af8cb015387641120a9d7ea5f10c Reviewed-by: Allan Sandfeld Jensen <allan.jensen@digia.com>
Diffstat (limited to 'Source/WebKit/chromium/src/WebPageSerializer.cpp')
-rw-r--r--Source/WebKit/chromium/src/WebPageSerializer.cpp289
1 files changed, 0 insertions, 289 deletions
diff --git a/Source/WebKit/chromium/src/WebPageSerializer.cpp b/Source/WebKit/chromium/src/WebPageSerializer.cpp
deleted file mode 100644
index 6b4a4cbd3..000000000
--- a/Source/WebKit/chromium/src/WebPageSerializer.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (C) 2009 Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "WebPageSerializer.h"
-
-#include "DocumentLoader.h"
-#include "Element.h"
-#include "Frame.h"
-#include "HTMLAllCollection.h"
-#include "HTMLFrameOwnerElement.h"
-#include "HTMLInputElement.h"
-#include "HTMLNames.h"
-#include "KURL.h"
-#include "MHTMLArchive.h"
-#include "PageSerializer.h"
-#include "WebFrame.h"
-#include "WebFrameImpl.h"
-#include "WebPageSerializerClient.h"
-#include "WebPageSerializerImpl.h"
-#include "WebView.h"
-#include "WebViewImpl.h"
-#include <public/WebCString.h>
-#include <public/WebString.h>
-#include <public/WebURL.h>
-#include <public/WebVector.h>
-#include <wtf/Vector.h>
-#include <wtf/text/StringConcatenate.h>
-
-using namespace WebCore;
-
-namespace {
-
-KURL getSubResourceURLFromElement(Element* element)
-{
- ASSERT(element);
- const QualifiedName* attributeName = 0;
- if (element->hasTagName(HTMLNames::imgTag) || element->hasTagName(HTMLNames::scriptTag))
- attributeName = &HTMLNames::srcAttr;
- else if (element->hasTagName(HTMLNames::inputTag)) {
- HTMLInputElement* input = static_cast<HTMLInputElement*>(element);
- if (input->isImageButton())
- attributeName = &HTMLNames::srcAttr;
- } else if (element->hasTagName(HTMLNames::bodyTag)
- || element->hasTagName(HTMLNames::tableTag)
- || element->hasTagName(HTMLNames::trTag)
- || element->hasTagName(HTMLNames::tdTag))
- attributeName = &HTMLNames::backgroundAttr;
- else if (element->hasTagName(HTMLNames::blockquoteTag)
- || element->hasTagName(HTMLNames::qTag)
- || element->hasTagName(HTMLNames::delTag)
- || element->hasTagName(HTMLNames::insTag))
- attributeName = &HTMLNames::citeAttr;
- else if (element->hasTagName(HTMLNames::linkTag)) {
- // If the link element is not css, ignore it.
- if (equalIgnoringCase(element->getAttribute(HTMLNames::typeAttr), "text/css")) {
- // FIXME: Add support for extracting links of sub-resources which
- // are inside style-sheet such as @import, @font-face, url(), etc.
- attributeName = &HTMLNames::hrefAttr;
- }
- } else if (element->hasTagName(HTMLNames::objectTag))
- attributeName = &HTMLNames::dataAttr;
- else if (element->hasTagName(HTMLNames::embedTag))
- attributeName = &HTMLNames::srcAttr;
-
- if (!attributeName)
- return KURL();
-
- String value = element->getAttribute(*attributeName);
- // Ignore javascript content.
- if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", false))
- return KURL();
-
- return element->document()->completeURL(value);
-}
-
-void retrieveResourcesForElement(Element* element,
- Vector<Frame*>* visitedFrames,
- Vector<Frame*>* framesToVisit,
- Vector<KURL>* frameURLs,
- Vector<KURL>* resourceURLs)
-{
- // If the node is a frame, we'll process it later in retrieveResourcesForFrame.
- if ((element->hasTagName(HTMLNames::iframeTag) || element->hasTagName(HTMLNames::frameTag)
- || element->hasTagName(HTMLNames::objectTag) || element->hasTagName(HTMLNames::embedTag))
- && element->isFrameOwnerElement()) {
- Frame* frame = static_cast<HTMLFrameOwnerElement*>(element)->contentFrame();
- if (frame) {
- if (!visitedFrames->contains(frame))
- framesToVisit->append(frame);
- return;
- }
- }
-
- KURL url = getSubResourceURLFromElement(element);
- if (url.isEmpty() || !url.isValid())
- return; // No subresource for this node.
-
- // Ignore URLs that have a non-standard protocols. Since the FTP protocol
- // does no have a cache mechanism, we skip it as well.
- if (!url.protocolIsInHTTPFamily() && !url.isLocalFile())
- return;
-
- if (!resourceURLs->contains(url))
- resourceURLs->append(url);
-}
-
-void retrieveResourcesForFrame(Frame* frame,
- const WebKit::WebVector<WebKit::WebCString>& supportedSchemes,
- Vector<Frame*>* visitedFrames,
- Vector<Frame*>* framesToVisit,
- Vector<KURL>* frameURLs,
- Vector<KURL>* resourceURLs)
-{
- KURL frameURL = frame->loader()->documentLoader()->request().url();
-
- // If the frame's URL is invalid, ignore it, it is not retrievable.
- if (!frameURL.isValid())
- return;
-
- // Ignore frames from unsupported schemes.
- bool isValidScheme = false;
- for (size_t i = 0; i < supportedSchemes.size(); ++i) {
- if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) {
- isValidScheme = true;
- break;
- }
- }
- if (!isValidScheme)
- return;
-
- // If we have already seen that frame, ignore it.
- if (visitedFrames->contains(frame))
- return;
- visitedFrames->append(frame);
- if (!frameURLs->contains(frameURL))
- frameURLs->append(frameURL);
-
- // Now get the resources associated with each node of the document.
- RefPtr<HTMLCollection> allNodes = frame->document()->all();
- for (unsigned i = 0; i < allNodes->length(); ++i) {
- Node* node = allNodes->item(i);
- // We are only interested in HTML resources.
- if (!node->isElementNode())
- continue;
- retrieveResourcesForElement(static_cast<Element*>(node),
- visitedFrames, framesToVisit,
- frameURLs, resourceURLs);
- }
-}
-
-} // namespace
-
-namespace WebKit {
-
-void WebPageSerializer::serialize(WebView* view, WebVector<WebPageSerializer::Resource>* resourcesParam)
-{
- Vector<PageSerializer::Resource> resources;
- PageSerializer serializer(&resources);
- serializer.serialize(static_cast<WebViewImpl*>(view)->page());
-
- Vector<Resource> result;
- for (Vector<PageSerializer::Resource>::const_iterator iter = resources.begin(); iter != resources.end(); ++iter) {
- Resource resource;
- resource.url = iter->url;
- resource.mimeType = iter->mimeType.ascii();
- // FIXME: we are copying all the resource data here. Idealy we would have a WebSharedData().
- resource.data = WebCString(iter->data->data(), iter->data->size());
- result.append(resource);
- }
-
- *resourcesParam = result;
-}
-
-WebCString WebPageSerializer::serializeToMHTML(WebView* view)
-{
- RefPtr<SharedBuffer> mhtml = MHTMLArchive::generateMHTMLData(static_cast<WebViewImpl*>(view)->page());
- // FIXME: we are copying all the data here. Idealy we would have a WebSharedData().
- return WebCString(mhtml->data(), mhtml->size());
-}
-
-WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
-{
- RefPtr<SharedBuffer> mhtml = MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(static_cast<WebViewImpl*>(view)->page());
- // FIXME: we are copying all the data here. Idealy we would have a WebSharedData().
- return WebCString(mhtml->data(), mhtml->size());
-}
-
-bool WebPageSerializer::serialize(WebFrame* frame,
- bool recursive,
- WebPageSerializerClient* client,
- const WebVector<WebURL>& links,
- const WebVector<WebString>& localPaths,
- const WebString& localDirectoryName)
-{
- WebPageSerializerImpl serializerImpl(
- frame, recursive, client, links, localPaths, localDirectoryName);
- return serializerImpl.serialize();
-}
-
-bool WebPageSerializer::retrieveAllResources(WebView* view,
- const WebVector<WebCString>& supportedSchemes,
- WebVector<WebURL>* resourceURLs,
- WebVector<WebURL>* frameURLs) {
- WebFrameImpl* mainFrame = static_cast<WebFrameImpl*>(view->mainFrame());
- if (!mainFrame)
- return false;
-
- Vector<Frame*> framesToVisit;
- Vector<Frame*> visitedFrames;
- Vector<KURL> frameKURLs;
- Vector<KURL> resourceKURLs;
-
- // Let's retrieve the resources from every frame in this page.
- framesToVisit.append(mainFrame->frame());
- while (!framesToVisit.isEmpty()) {
- Frame* frame = framesToVisit[0];
- framesToVisit.remove(0);
- retrieveResourcesForFrame(frame, supportedSchemes,
- &visitedFrames, &framesToVisit,
- &frameKURLs, &resourceKURLs);
- }
-
- // Converts the results to WebURLs.
- WebVector<WebURL> resultResourceURLs(resourceKURLs.size());
- for (size_t i = 0; i < resourceKURLs.size(); ++i) {
- resultResourceURLs[i] = resourceKURLs[i];
- // A frame's src can point to the same URL as another resource, keep the
- // resource URL only in such cases.
- size_t index = frameKURLs.find(resourceKURLs[i]);
- if (index != notFound)
- frameKURLs.remove(index);
- }
- *resourceURLs = resultResourceURLs;
- WebVector<WebURL> resultFrameURLs(frameKURLs.size());
- for (size_t i = 0; i < frameKURLs.size(); ++i)
- resultFrameURLs[i] = frameKURLs[i];
- *frameURLs = resultFrameURLs;
-
- return true;
-}
-
-WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& charset)
-{
- return makeString("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=", static_cast<const String&>(charset), "\">");
-}
-
-WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url)
-{
- return String::format("\n<!-- saved from url=(%04d)%s -->\n",
- static_cast<int>(url.spec().length()),
- url.spec().data());
-}
-
-WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTarget)
-{
- if (baseTarget.isEmpty())
- return makeString("<base href=\".\">");
- return makeString("<base href=\".\" target=\"", static_cast<const String&>(baseTarget), "\">");
-}
-
-} // namespace WebKit