PageSerializer.cpp [plain text]
#include "config.h"
#include "PageSerializer.h"
#include "CSSImageValue.h"
#include "CSSImportRule.h"
#include "CSSStyleRule.h"
#include "CachedImage.h"
#include "Document.h"
#include "Element.h"
#include "Frame.h"
#include "HTMLFrameOwnerElement.h"
#include "HTMLHeadElement.h"
#include "HTMLImageElement.h"
#include "HTMLLinkElement.h"
#include "HTMLMetaCharsetParser.h"
#include "HTMLNames.h"
#include "HTMLStyleElement.h"
#include "HTTPParsers.h"
#include "Image.h"
#include "MIMETypeRegistry.h"
#include "MarkupAccumulator.h"
#include "Page.h"
#include "StyleCachedImage.h"
#include "StyleImage.h"
#include "Text.h"
#include "TextEncoding.h"
#include <wtf/text/StringBuilder.h>
#include <wtf/text/WTFString.h>
namespace WebCore {
static bool isCharsetSpecifyingNode(Node* node)
{
if (!node->isHTMLElement())
return false;
HTMLElement* element = toHTMLElement(node);
if (!element->hasTagName(HTMLNames::metaTag))
return false;
HTMLMetaCharsetParser::AttributeList attributes;
const NamedNodeMap* attributesMap = element->attributes(true);
for (unsigned i = 0; i < attributesMap->length(); ++i) {
Attribute* item = attributesMap->attributeItem(i);
attributes.append(make_pair(item->name().toString(), item->value().string()));
}
TextEncoding textEncoding = HTMLMetaCharsetParser::encodingFromMetaAttributes(attributes);
return textEncoding.isValid();
}
static bool shouldIgnoreElement(Element* element)
{
return element->hasTagName(HTMLNames::scriptTag) || element->hasTagName(HTMLNames::noscriptTag) || isCharsetSpecifyingNode(element);
}
static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerElement& frameOwner)
{
return frameOwner.hasTagName(HTMLNames::objectTag) ? HTMLNames::dataAttr : HTMLNames::srcAttr;
}
class SerializerMarkupAccumulator : public WebCore::MarkupAccumulator {
public:
SerializerMarkupAccumulator(PageSerializer*, Document*, Vector<Node*>*);
virtual ~SerializerMarkupAccumulator();
protected:
virtual void appendText(Vector<UChar>& out, Text*);
virtual void appendElement(Vector<UChar>& out, Element*, Namespaces*);
virtual void appendCustomAttributes(Vector<UChar>& out, Element*, Namespaces*);
virtual void appendEndTag(Node*);
private:
PageSerializer* m_serializer;
Document* m_document;
};
SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer* serializer, Document* document, Vector<Node*>* nodes)
: MarkupAccumulator(nodes, AbsoluteURLs)
, m_serializer(serializer)
, m_document(document)
{
if (m_document->isXHTMLDocument() || m_document->xmlStandalone() || m_document->isSVGDocument())
appendString("<?xml version=\"" + m_document->xmlVersion() + "\" encoding=\"" + m_document->charset() + "\"?>");
}
SerializerMarkupAccumulator::~SerializerMarkupAccumulator()
{
}
void SerializerMarkupAccumulator::appendText(Vector<UChar>& out, Text* text)
{
Element* parent = text->parentElement();
if (parent && !shouldIgnoreElement(parent))
MarkupAccumulator::appendText(out, text);
}
void SerializerMarkupAccumulator::appendElement(Vector<UChar>& out, Element* element, Namespaces* namespaces)
{
if (!shouldIgnoreElement(element))
MarkupAccumulator::appendElement(out, element, namespaces);
if (element->hasTagName(HTMLNames::headTag)) {
String meta = "<meta charset=\"" + m_document->charset() + "\">";
out.append(meta.characters(), meta.length());
}
}
void SerializerMarkupAccumulator::appendCustomAttributes(Vector<UChar>& out, Element* element, Namespaces* namespaces)
{
if (!element->isFrameOwnerElement())
return;
HTMLFrameOwnerElement* frameOwner = static_cast<HTMLFrameOwnerElement*>(element);
Frame* frame = frameOwner->contentFrame();
if (!frame)
return;
KURL url = frame->document()->url();
if (url.isValid() && !url.protocolIs("about"))
return;
url = m_serializer->urlForBlankFrame(frame);
RefPtr<Attribute> attribute = Attribute::create(frameOwnerURLAttributeName(*frameOwner), url.string());
appendAttribute(out, element, *attribute, namespaces);
}
void SerializerMarkupAccumulator::appendEndTag(Node* node)
{
if (node->isElementNode() && !shouldIgnoreElement(toElement(node)))
MarkupAccumulator::appendEndTag(node);
}
PageSerializer::Resource::Resource()
{
}
PageSerializer::Resource::Resource(const KURL& url, const String& mimeType, PassRefPtr<SharedBuffer> data)
: url(url)
, mimeType(mimeType)
, data(data)
{
}
PageSerializer::PageSerializer(Vector<PageSerializer::Resource>* resources)
: m_resources(resources)
, m_blankFrameCounter(0)
{
}
void PageSerializer::serialize(Page* page)
{
serializeFrame(page->mainFrame());
}
void PageSerializer::serializeFrame(Frame* frame)
{
Document* document = frame->document();
KURL url = document->url();
if (!url.isValid() || url.protocolIs("about")) {
url = urlForBlankFrame(frame);
}
if (m_resourceURLs.contains(url)) {
return;
}
Vector<Node*> nodes;
SerializerMarkupAccumulator accumulator(this, document, &nodes);
TextEncoding textEncoding(TextEncoding(document->charset()));
ASSERT(textEncoding.isValid());
String text = accumulator.serializeNodes(document->documentElement(), 0, IncludeNode);
CString frameHTML = textEncoding.encode(text.characters(), text.length(), EntitiesForUnencodables);
m_resources->append(Resource(url, document->suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
m_resourceURLs.add(url);
for (Vector<Node*>::iterator iter = nodes.begin(); iter != nodes.end(); ++iter) {
Node* node = *iter;
if (!node->isElementNode())
continue;
Element* element = toElement(node);
retrieveResourcesForCSSDeclaration(element->style());
if (element->hasTagName(HTMLNames::imgTag)) {
HTMLImageElement* imageElement = static_cast<HTMLImageElement*>(element);
KURL url = document->completeURL(imageElement->getAttribute(HTMLNames::srcAttr));
CachedImage* cachedImage = imageElement->cachedImage();
addImageToResources(cachedImage, url);
} else if (element->hasTagName(HTMLNames::linkTag)) {
HTMLLinkElement* linkElement = static_cast<HTMLLinkElement*>(element);
StyleSheet* sheet = linkElement->sheet();
if (sheet && sheet->isCSSStyleSheet()) {
KURL url = document->completeURL(linkElement->getAttribute(HTMLNames::hrefAttr));
serializeCSSStyleSheet(static_cast<CSSStyleSheet*>(sheet), url);
ASSERT(m_resourceURLs.contains(url));
}
} else if (element->hasTagName(HTMLNames::styleTag)) {
HTMLStyleElement* styleElement = static_cast<HTMLStyleElement*>(element);
StyleSheet* sheet = styleElement->sheet();
if (sheet && sheet->isCSSStyleSheet())
serializeCSSStyleSheet(static_cast<CSSStyleSheet*>(sheet), KURL());
}
}
for (Frame* childFrame = frame->tree()->firstChild(); childFrame; childFrame = childFrame->tree()->nextSibling())
serializeFrame(childFrame);
}
void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet* styleSheet, const KURL& url)
{
StringBuilder cssText;
for (unsigned i = 0; i < styleSheet->length(); ++i) {
StyleBase* item = styleSheet->item(i);
String itemText = item->cssText();
if (!itemText.isEmpty()) {
cssText.append(itemText);
if (i < styleSheet->length() - 1)
cssText.append("\n\n");
}
if (item->isImportRule()) {
CSSImportRule* importRule = static_cast<CSSImportRule*>(item);
KURL importURL = styleSheet->document()->completeURL(importRule->href());
if (m_resourceURLs.contains(importURL))
continue;
serializeCSSStyleSheet(importRule->styleSheet(), importURL);
} else if (item->isFontFaceRule()) {
} else if (item->isStyleRule())
retrieveResourcesForCSSRule(static_cast<CSSStyleRule*>(item));
}
if (url.isValid() && !m_resourceURLs.contains(url)) {
TextEncoding textEncoding = TextEncoding(styleSheet->charset());
ASSERT(textEncoding.isValid());
String textString = cssText.toString();
CString text = textEncoding.encode(textString.characters(), textString.length(), EntitiesForUnencodables);
m_resources->append(Resource(url, String("text/css"), SharedBuffer::create(text.data(), text.length())));
m_resourceURLs.add(url);
}
}
void PageSerializer::addImageToResources(CachedImage* image, const KURL& url)
{
if (!url.isValid() || m_resourceURLs.contains(url))
return;
if (!image || image->image() == Image::nullImage())
return;
String mimeType = image->response().mimeType();
m_resources->append(Resource(url, mimeType, image->image()->data()));
m_resourceURLs.add(url);
}
void PageSerializer::retrieveResourcesForCSSRule(CSSStyleRule* rule)
{
retrieveResourcesForCSSDeclaration(rule->style());
}
void PageSerializer::retrieveResourcesForCSSDeclaration(CSSStyleDeclaration* styleDeclaration)
{
if (!styleDeclaration)
return;
if (!styleDeclaration->stylesheet()->isCSSStyleSheet())
return;
CSSStyleSheet* cssStyleSheet = static_cast<CSSStyleSheet*>(styleDeclaration->stylesheet());
for (unsigned i = 0; i < styleDeclaration->length(); ++i) {
RefPtr<CSSValue> cssValue = styleDeclaration->getPropertyCSSValue(styleDeclaration->item(i));
if (!cssValue->isImageValue())
continue;
CSSImageValue* imageValue = static_cast<CSSImageValue*>(cssValue.get());
StyleImage* styleImage = imageValue->cachedOrPendingImage();
if (!styleImage || !styleImage->isCachedImage())
continue;
CachedImage* image = static_cast<StyleCachedImage*>(styleImage)->cachedImage();
KURL url = cssStyleSheet->document()->completeURL(image->url());
addImageToResources(image, url);
}
}
KURL PageSerializer::urlForBlankFrame(Frame* frame)
{
HashMap<Frame*, KURL>::iterator iter = m_blankFrameURLs.find(frame);
if (iter != m_blankFrameURLs.end())
return iter->second;
String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++);
KURL fakeURL(ParsedURLString, url);
m_blankFrameURLs.add(frame, fakeURL);
return fakeURL;
}
}