diff options
author | Allan Sandfeld Jensen <allan.jensen@digia.com> | 2013-09-13 12:51:20 +0200 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2013-09-19 20:50:05 +0200 |
commit | d441d6f39bb846989d95bcf5caf387b42414718d (patch) | |
tree | e367e64a75991c554930278175d403c072de6bb8 /Source/WebKit/chromium/src/WebPageSerializer.cpp | |
parent | 0060b2994c07842f4c59de64b5e3e430525c4b90 (diff) | |
download | qtwebkit-d441d6f39bb846989d95bcf5caf387b42414718d.tar.gz |
Import Qt5x2 branch of QtWebkit for Qt 5.2
Importing a new snapshot of webkit.
Change-Id: I2d01ad12cdc8af8cb015387641120a9d7ea5f10c
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@digia.com>
Diffstat (limited to 'Source/WebKit/chromium/src/WebPageSerializer.cpp')
-rw-r--r-- | Source/WebKit/chromium/src/WebPageSerializer.cpp | 289 |
1 files changed, 0 insertions, 289 deletions
diff --git a/Source/WebKit/chromium/src/WebPageSerializer.cpp b/Source/WebKit/chromium/src/WebPageSerializer.cpp deleted file mode 100644 index 6b4a4cbd3..000000000 --- a/Source/WebKit/chromium/src/WebPageSerializer.cpp +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (C) 2009 Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "WebPageSerializer.h" - -#include "DocumentLoader.h" -#include "Element.h" -#include "Frame.h" -#include "HTMLAllCollection.h" -#include "HTMLFrameOwnerElement.h" -#include "HTMLInputElement.h" -#include "HTMLNames.h" -#include "KURL.h" -#include "MHTMLArchive.h" -#include "PageSerializer.h" -#include "WebFrame.h" -#include "WebFrameImpl.h" -#include "WebPageSerializerClient.h" -#include "WebPageSerializerImpl.h" -#include "WebView.h" -#include "WebViewImpl.h" -#include <public/WebCString.h> -#include <public/WebString.h> -#include <public/WebURL.h> -#include <public/WebVector.h> -#include <wtf/Vector.h> -#include <wtf/text/StringConcatenate.h> - -using namespace WebCore; - -namespace { - -KURL getSubResourceURLFromElement(Element* element) -{ - ASSERT(element); - const QualifiedName* attributeName = 0; - if (element->hasTagName(HTMLNames::imgTag) || element->hasTagName(HTMLNames::scriptTag)) - attributeName = &HTMLNames::srcAttr; - else if (element->hasTagName(HTMLNames::inputTag)) { - HTMLInputElement* input = static_cast<HTMLInputElement*>(element); - if (input->isImageButton()) - attributeName = &HTMLNames::srcAttr; - } else if (element->hasTagName(HTMLNames::bodyTag) - || element->hasTagName(HTMLNames::tableTag) - || element->hasTagName(HTMLNames::trTag) - || element->hasTagName(HTMLNames::tdTag)) - attributeName = &HTMLNames::backgroundAttr; - else if (element->hasTagName(HTMLNames::blockquoteTag) - || element->hasTagName(HTMLNames::qTag) - || element->hasTagName(HTMLNames::delTag) - || element->hasTagName(HTMLNames::insTag)) - attributeName = &HTMLNames::citeAttr; - else if (element->hasTagName(HTMLNames::linkTag)) { - // If the link element is not css, ignore it. - if (equalIgnoringCase(element->getAttribute(HTMLNames::typeAttr), "text/css")) { - // FIXME: Add support for extracting links of sub-resources which - // are inside style-sheet such as @import, @font-face, url(), etc. - attributeName = &HTMLNames::hrefAttr; - } - } else if (element->hasTagName(HTMLNames::objectTag)) - attributeName = &HTMLNames::dataAttr; - else if (element->hasTagName(HTMLNames::embedTag)) - attributeName = &HTMLNames::srcAttr; - - if (!attributeName) - return KURL(); - - String value = element->getAttribute(*attributeName); - // Ignore javascript content. - if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", false)) - return KURL(); - - return element->document()->completeURL(value); -} - -void retrieveResourcesForElement(Element* element, - Vector<Frame*>* visitedFrames, - Vector<Frame*>* framesToVisit, - Vector<KURL>* frameURLs, - Vector<KURL>* resourceURLs) -{ - // If the node is a frame, we'll process it later in retrieveResourcesForFrame. - if ((element->hasTagName(HTMLNames::iframeTag) || element->hasTagName(HTMLNames::frameTag) - || element->hasTagName(HTMLNames::objectTag) || element->hasTagName(HTMLNames::embedTag)) - && element->isFrameOwnerElement()) { - Frame* frame = static_cast<HTMLFrameOwnerElement*>(element)->contentFrame(); - if (frame) { - if (!visitedFrames->contains(frame)) - framesToVisit->append(frame); - return; - } - } - - KURL url = getSubResourceURLFromElement(element); - if (url.isEmpty() || !url.isValid()) - return; // No subresource for this node. - - // Ignore URLs that have a non-standard protocols. Since the FTP protocol - // does no have a cache mechanism, we skip it as well. - if (!url.protocolIsInHTTPFamily() && !url.isLocalFile()) - return; - - if (!resourceURLs->contains(url)) - resourceURLs->append(url); -} - -void retrieveResourcesForFrame(Frame* frame, - const WebKit::WebVector<WebKit::WebCString>& supportedSchemes, - Vector<Frame*>* visitedFrames, - Vector<Frame*>* framesToVisit, - Vector<KURL>* frameURLs, - Vector<KURL>* resourceURLs) -{ - KURL frameURL = frame->loader()->documentLoader()->request().url(); - - // If the frame's URL is invalid, ignore it, it is not retrievable. - if (!frameURL.isValid()) - return; - - // Ignore frames from unsupported schemes. - bool isValidScheme = false; - for (size_t i = 0; i < supportedSchemes.size(); ++i) { - if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) { - isValidScheme = true; - break; - } - } - if (!isValidScheme) - return; - - // If we have already seen that frame, ignore it. - if (visitedFrames->contains(frame)) - return; - visitedFrames->append(frame); - if (!frameURLs->contains(frameURL)) - frameURLs->append(frameURL); - - // Now get the resources associated with each node of the document. - RefPtr<HTMLCollection> allNodes = frame->document()->all(); - for (unsigned i = 0; i < allNodes->length(); ++i) { - Node* node = allNodes->item(i); - // We are only interested in HTML resources. - if (!node->isElementNode()) - continue; - retrieveResourcesForElement(static_cast<Element*>(node), - visitedFrames, framesToVisit, - frameURLs, resourceURLs); - } -} - -} // namespace - -namespace WebKit { - -void WebPageSerializer::serialize(WebView* view, WebVector<WebPageSerializer::Resource>* resourcesParam) -{ - Vector<PageSerializer::Resource> resources; - PageSerializer serializer(&resources); - serializer.serialize(static_cast<WebViewImpl*>(view)->page()); - - Vector<Resource> result; - for (Vector<PageSerializer::Resource>::const_iterator iter = resources.begin(); iter != resources.end(); ++iter) { - Resource resource; - resource.url = iter->url; - resource.mimeType = iter->mimeType.ascii(); - // FIXME: we are copying all the resource data here. Idealy we would have a WebSharedData(). - resource.data = WebCString(iter->data->data(), iter->data->size()); - result.append(resource); - } - - *resourcesParam = result; -} - -WebCString WebPageSerializer::serializeToMHTML(WebView* view) -{ - RefPtr<SharedBuffer> mhtml = MHTMLArchive::generateMHTMLData(static_cast<WebViewImpl*>(view)->page()); - // FIXME: we are copying all the data here. Idealy we would have a WebSharedData(). - return WebCString(mhtml->data(), mhtml->size()); -} - -WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view) -{ - RefPtr<SharedBuffer> mhtml = MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(static_cast<WebViewImpl*>(view)->page()); - // FIXME: we are copying all the data here. Idealy we would have a WebSharedData(). - return WebCString(mhtml->data(), mhtml->size()); -} - -bool WebPageSerializer::serialize(WebFrame* frame, - bool recursive, - WebPageSerializerClient* client, - const WebVector<WebURL>& links, - const WebVector<WebString>& localPaths, - const WebString& localDirectoryName) -{ - WebPageSerializerImpl serializerImpl( - frame, recursive, client, links, localPaths, localDirectoryName); - return serializerImpl.serialize(); -} - -bool WebPageSerializer::retrieveAllResources(WebView* view, - const WebVector<WebCString>& supportedSchemes, - WebVector<WebURL>* resourceURLs, - WebVector<WebURL>* frameURLs) { - WebFrameImpl* mainFrame = static_cast<WebFrameImpl*>(view->mainFrame()); - if (!mainFrame) - return false; - - Vector<Frame*> framesToVisit; - Vector<Frame*> visitedFrames; - Vector<KURL> frameKURLs; - Vector<KURL> resourceKURLs; - - // Let's retrieve the resources from every frame in this page. - framesToVisit.append(mainFrame->frame()); - while (!framesToVisit.isEmpty()) { - Frame* frame = framesToVisit[0]; - framesToVisit.remove(0); - retrieveResourcesForFrame(frame, supportedSchemes, - &visitedFrames, &framesToVisit, - &frameKURLs, &resourceKURLs); - } - - // Converts the results to WebURLs. - WebVector<WebURL> resultResourceURLs(resourceKURLs.size()); - for (size_t i = 0; i < resourceKURLs.size(); ++i) { - resultResourceURLs[i] = resourceKURLs[i]; - // A frame's src can point to the same URL as another resource, keep the - // resource URL only in such cases. - size_t index = frameKURLs.find(resourceKURLs[i]); - if (index != notFound) - frameKURLs.remove(index); - } - *resourceURLs = resultResourceURLs; - WebVector<WebURL> resultFrameURLs(frameKURLs.size()); - for (size_t i = 0; i < frameKURLs.size(); ++i) - resultFrameURLs[i] = frameKURLs[i]; - *frameURLs = resultFrameURLs; - - return true; -} - -WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& charset) -{ - return makeString("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=", static_cast<const String&>(charset), "\">"); -} - -WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url) -{ - return String::format("\n<!-- saved from url=(%04d)%s -->\n", - static_cast<int>(url.spec().length()), - url.spec().data()); -} - -WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTarget) -{ - if (baseTarget.isEmpty()) - return makeString("<base href=\".\">"); - return makeString("<base href=\".\" target=\"", static_cast<const String&>(baseTarget), "\">"); -} - -} // namespace WebKit |