|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* HTMLtree.c : implementation of access function for an HTML tree.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* See Copyright for the status of this software.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* daniel@veillard.com
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
#define IN_LIBXML
|
|
Packit |
423ecb |
#include "libxml.h"
|
|
Packit |
423ecb |
#ifdef LIBXML_HTML_ENABLED
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
#include <string.h> /* for memset() only ! */
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
#ifdef HAVE_CTYPE_H
|
|
Packit |
423ecb |
#include <ctype.h>
|
|
Packit |
423ecb |
#endif
|
|
Packit |
423ecb |
#ifdef HAVE_STDLIB_H
|
|
Packit |
423ecb |
#include <stdlib.h>
|
|
Packit |
423ecb |
#endif
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
#include <libxml/xmlmemory.h>
|
|
Packit |
423ecb |
#include <libxml/HTMLparser.h>
|
|
Packit |
423ecb |
#include <libxml/HTMLtree.h>
|
|
Packit |
423ecb |
#include <libxml/entities.h>
|
|
Packit |
423ecb |
#include <libxml/valid.h>
|
|
Packit |
423ecb |
#include <libxml/xmlerror.h>
|
|
Packit |
423ecb |
#include <libxml/parserInternals.h>
|
|
Packit |
423ecb |
#include <libxml/globals.h>
|
|
Packit |
423ecb |
#include <libxml/uri.h>
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
#include "buf.h"
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/************************************************************************
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
* Getting/Setting encoding meta tags *
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
************************************************************************/
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlGetMetaEncoding:
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Encoding definition lookup in the Meta tags
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Returns the current encoding as flagged in the HTML source
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
const xmlChar *
|
|
Packit |
423ecb |
htmlGetMetaEncoding(htmlDocPtr doc) {
|
|
Packit |
423ecb |
htmlNodePtr cur;
|
|
Packit |
423ecb |
const xmlChar *content;
|
|
Packit |
423ecb |
const xmlChar *encoding;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (doc == NULL)
|
|
Packit |
423ecb |
return(NULL);
|
|
Packit |
423ecb |
cur = doc->children;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Search the html
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
while (cur != NULL) {
|
|
Packit |
423ecb |
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
|
Packit |
423ecb |
if (xmlStrEqual(cur->name, BAD_CAST"html"))
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
if (xmlStrEqual(cur->name, BAD_CAST"head"))
|
|
Packit |
423ecb |
goto found_head;
|
|
Packit |
423ecb |
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
|
|
Packit |
423ecb |
goto found_meta;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
cur = cur->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur == NULL)
|
|
Packit |
423ecb |
return(NULL);
|
|
Packit |
423ecb |
cur = cur->children;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Search the head
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
while (cur != NULL) {
|
|
Packit |
423ecb |
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
|
Packit |
423ecb |
if (xmlStrEqual(cur->name, BAD_CAST"head"))
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
|
|
Packit |
423ecb |
goto found_meta;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
cur = cur->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur == NULL)
|
|
Packit |
423ecb |
return(NULL);
|
|
Packit |
423ecb |
found_head:
|
|
Packit |
423ecb |
cur = cur->children;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Search the meta elements
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
found_meta:
|
|
Packit |
423ecb |
while (cur != NULL) {
|
|
Packit |
423ecb |
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
|
Packit |
423ecb |
if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
|
|
Packit |
423ecb |
xmlAttrPtr attr = cur->properties;
|
|
Packit |
423ecb |
int http;
|
|
Packit |
423ecb |
const xmlChar *value;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
content = NULL;
|
|
Packit |
423ecb |
http = 0;
|
|
Packit |
423ecb |
while (attr != NULL) {
|
|
Packit |
423ecb |
if ((attr->children != NULL) &&
|
|
Packit |
423ecb |
(attr->children->type == XML_TEXT_NODE) &&
|
|
Packit |
423ecb |
(attr->children->next == NULL)) {
|
|
Packit |
423ecb |
value = attr->children->content;
|
|
Packit |
423ecb |
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
|
|
Packit |
423ecb |
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
|
|
Packit |
423ecb |
http = 1;
|
|
Packit |
423ecb |
else if ((value != NULL)
|
|
Packit |
423ecb |
&& (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
|
|
Packit |
423ecb |
content = value;
|
|
Packit |
423ecb |
if ((http != 0) && (content != NULL))
|
|
Packit |
423ecb |
goto found_content;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
attr = attr->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
cur = cur->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
return(NULL);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
found_content:
|
|
Packit |
423ecb |
encoding = xmlStrstr(content, BAD_CAST"charset=");
|
|
Packit |
423ecb |
if (encoding == NULL)
|
|
Packit |
423ecb |
encoding = xmlStrstr(content, BAD_CAST"Charset=");
|
|
Packit |
423ecb |
if (encoding == NULL)
|
|
Packit |
423ecb |
encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
|
|
Packit |
423ecb |
if (encoding != NULL) {
|
|
Packit |
423ecb |
encoding += 8;
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
encoding = xmlStrstr(content, BAD_CAST"charset =");
|
|
Packit |
423ecb |
if (encoding == NULL)
|
|
Packit |
423ecb |
encoding = xmlStrstr(content, BAD_CAST"Charset =");
|
|
Packit |
423ecb |
if (encoding == NULL)
|
|
Packit |
423ecb |
encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
|
|
Packit |
423ecb |
if (encoding != NULL)
|
|
Packit |
423ecb |
encoding += 9;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (encoding != NULL) {
|
|
Packit |
423ecb |
while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
return(encoding);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlSetMetaEncoding:
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @encoding: the encoding string
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Sets the current encoding in the Meta tags
|
|
Packit |
423ecb |
* NOTE: this will not change the document content encoding, just
|
|
Packit |
423ecb |
* the META flag associated.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Returns 0 in case of success and -1 in case of error
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
int
|
|
Packit |
423ecb |
htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
|
|
Packit |
423ecb |
htmlNodePtr cur, meta = NULL, head = NULL;
|
|
Packit |
423ecb |
const xmlChar *content = NULL;
|
|
Packit |
423ecb |
char newcontent[100];
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
newcontent[0] = 0;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (doc == NULL)
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/* html isn't a real encoding it's just libxml2 way to get entities */
|
|
Packit |
423ecb |
if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (encoding != NULL) {
|
|
Packit |
423ecb |
snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
|
|
Packit |
423ecb |
(char *)encoding);
|
|
Packit |
423ecb |
newcontent[sizeof(newcontent) - 1] = 0;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
cur = doc->children;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Search the html
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
while (cur != NULL) {
|
|
Packit |
423ecb |
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
|
Packit |
423ecb |
if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
|
|
Packit |
423ecb |
goto found_head;
|
|
Packit |
423ecb |
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
|
|
Packit |
423ecb |
goto found_meta;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
cur = cur->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur == NULL)
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
cur = cur->children;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Search the head
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
while (cur != NULL) {
|
|
Packit |
423ecb |
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
|
Packit |
423ecb |
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
|
|
Packit |
423ecb |
head = cur->parent;
|
|
Packit |
423ecb |
goto found_meta;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
cur = cur->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur == NULL)
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
found_head:
|
|
Packit |
423ecb |
head = cur;
|
|
Packit |
423ecb |
if (cur->children == NULL)
|
|
Packit |
423ecb |
goto create;
|
|
Packit |
423ecb |
cur = cur->children;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
found_meta:
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Search and update all the remaining the meta elements carrying
|
|
Packit |
423ecb |
* encoding informations
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
while (cur != NULL) {
|
|
Packit |
423ecb |
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
|
Packit |
423ecb |
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
|
|
Packit |
423ecb |
xmlAttrPtr attr = cur->properties;
|
|
Packit |
423ecb |
int http;
|
|
Packit |
423ecb |
const xmlChar *value;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
content = NULL;
|
|
Packit |
423ecb |
http = 0;
|
|
Packit |
423ecb |
while (attr != NULL) {
|
|
Packit |
423ecb |
if ((attr->children != NULL) &&
|
|
Packit |
423ecb |
(attr->children->type == XML_TEXT_NODE) &&
|
|
Packit |
423ecb |
(attr->children->next == NULL)) {
|
|
Packit |
423ecb |
value = attr->children->content;
|
|
Packit |
423ecb |
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
|
|
Packit |
423ecb |
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
|
|
Packit |
423ecb |
http = 1;
|
|
Packit |
423ecb |
else
|
|
Packit |
423ecb |
{
|
|
Packit |
423ecb |
if ((value != NULL) &&
|
|
Packit |
423ecb |
(!xmlStrcasecmp(attr->name, BAD_CAST"content")))
|
|
Packit |
423ecb |
content = value;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if ((http != 0) && (content != NULL))
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
attr = attr->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if ((http != 0) && (content != NULL)) {
|
|
Packit |
423ecb |
meta = cur;
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
cur = cur->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
create:
|
|
Packit |
423ecb |
if (meta == NULL) {
|
|
Packit |
423ecb |
if ((encoding != NULL) && (head != NULL)) {
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Create a new Meta element with the right attributes
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
|
|
Packit |
423ecb |
if (head->children == NULL)
|
|
Packit |
423ecb |
xmlAddChild(head, meta);
|
|
Packit |
423ecb |
else
|
|
Packit |
423ecb |
xmlAddPrevSibling(head->children, meta);
|
|
Packit |
423ecb |
xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
|
|
Packit |
423ecb |
xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
/* remove the meta tag if NULL is passed */
|
|
Packit |
423ecb |
if (encoding == NULL) {
|
|
Packit |
423ecb |
xmlUnlinkNode(meta);
|
|
Packit |
423ecb |
xmlFreeNode(meta);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
/* change the document only if there is a real encoding change */
|
|
Packit |
423ecb |
else if (xmlStrcasestr(content, encoding) == NULL) {
|
|
Packit |
423ecb |
xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
return(0);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* booleanHTMLAttrs:
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* These are the HTML attributes which will be output
|
|
Packit |
423ecb |
* in minimized form, i.e. <option selected="selected"> will be
|
|
Packit |
423ecb |
* output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
static const char* htmlBooleanAttrs[] = {
|
|
Packit |
423ecb |
"checked", "compact", "declare", "defer", "disabled", "ismap",
|
|
Packit |
423ecb |
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
|
|
Packit |
423ecb |
"selected", NULL
|
|
Packit |
423ecb |
};
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlIsBooleanAttr:
|
|
Packit |
423ecb |
* @name: the name of the attribute to check
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Determine if a given attribute is a boolean attribute.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* returns: false if the attribute is not boolean, true otherwise.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
int
|
|
Packit |
423ecb |
htmlIsBooleanAttr(const xmlChar *name)
|
|
Packit |
423ecb |
{
|
|
Packit |
423ecb |
int i = 0;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
while (htmlBooleanAttrs[i] != NULL) {
|
|
Packit |
423ecb |
if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
|
|
Packit |
423ecb |
return 1;
|
|
Packit |
423ecb |
i++;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
return 0;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
#ifdef LIBXML_OUTPUT_ENABLED
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* private routine exported from xmlIO.c
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
xmlOutputBufferPtr
|
|
Packit |
423ecb |
xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
|
|
Packit |
423ecb |
/************************************************************************
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
* Output error handlers *
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
************************************************************************/
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlSaveErrMemory:
|
|
Packit |
423ecb |
* @extra: extra informations
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Handle an out of memory condition
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
static void
|
|
Packit |
423ecb |
htmlSaveErrMemory(const char *extra)
|
|
Packit |
423ecb |
{
|
|
Packit |
423ecb |
__xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlSaveErr:
|
|
Packit |
423ecb |
* @code: the error number
|
|
Packit |
423ecb |
* @node: the location of the error.
|
|
Packit |
423ecb |
* @extra: extra informations
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Handle an out of memory condition
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
static void
|
|
Packit |
423ecb |
htmlSaveErr(int code, xmlNodePtr node, const char *extra)
|
|
Packit |
423ecb |
{
|
|
Packit |
423ecb |
const char *msg = NULL;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
switch(code) {
|
|
Packit |
423ecb |
case XML_SAVE_NOT_UTF8:
|
|
Packit |
423ecb |
msg = "string is not in UTF-8\n";
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
case XML_SAVE_CHAR_INVALID:
|
|
Packit |
423ecb |
msg = "invalid character value\n";
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
case XML_SAVE_UNKNOWN_ENCODING:
|
|
Packit |
423ecb |
msg = "unknown encoding %s\n";
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
case XML_SAVE_NO_DOCTYPE:
|
|
Packit |
423ecb |
msg = "HTML has no DOCTYPE\n";
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
default:
|
|
Packit |
423ecb |
msg = "unexpected error number\n";
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
__xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/************************************************************************
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
* Dumping HTML tree content to a simple buffer *
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
************************************************************************/
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlBufNodeDumpFormat:
|
|
Packit |
423ecb |
* @buf: the xmlBufPtr output
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @cur: the current node
|
|
Packit |
423ecb |
* @format: should formatting spaces been added
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML node, recursive behaviour,children are printed too.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Returns the number of byte written or -1 in case of error
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
static size_t
|
|
Packit |
423ecb |
htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
|
Packit |
423ecb |
int format) {
|
|
Packit |
423ecb |
size_t use;
|
|
Packit |
423ecb |
int ret;
|
|
Packit |
423ecb |
xmlOutputBufferPtr outbuf;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (cur == NULL) {
|
|
Packit |
423ecb |
return (-1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (buf == NULL) {
|
|
Packit |
423ecb |
return (-1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
|
|
Packit |
423ecb |
if (outbuf == NULL) {
|
|
Packit |
423ecb |
htmlSaveErrMemory("allocating HTML output buffer");
|
|
Packit |
423ecb |
return (-1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
|
|
Packit |
423ecb |
outbuf->buffer = buf;
|
|
Packit |
423ecb |
outbuf->encoder = NULL;
|
|
Packit |
423ecb |
outbuf->writecallback = NULL;
|
|
Packit |
423ecb |
outbuf->closecallback = NULL;
|
|
Packit |
423ecb |
outbuf->context = NULL;
|
|
Packit |
423ecb |
outbuf->written = 0;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
use = xmlBufUse(buf);
|
|
Packit |
423ecb |
htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
|
|
Packit |
423ecb |
xmlFree(outbuf);
|
|
Packit |
423ecb |
ret = xmlBufUse(buf) - use;
|
|
Packit |
423ecb |
return (ret);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlNodeDump:
|
|
Packit |
423ecb |
* @buf: the HTML buffer output
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @cur: the current node
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML node, recursive behaviour,children are printed too,
|
|
Packit |
423ecb |
* and formatting returns are added.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Returns the number of byte written or -1 in case of error
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
int
|
|
Packit |
423ecb |
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
|
|
Packit |
423ecb |
xmlBufPtr buffer;
|
|
Packit |
423ecb |
size_t ret;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if ((buf == NULL) || (cur == NULL))
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlInitParser();
|
|
Packit |
423ecb |
buffer = xmlBufFromBuffer(buf);
|
|
Packit |
423ecb |
if (buffer == NULL)
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlBufBackToBuffer(buffer);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (ret > INT_MAX)
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
return((int) ret);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlNodeDumpFileFormat:
|
|
Packit |
423ecb |
* @out: the FILE pointer
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @cur: the current node
|
|
Packit |
423ecb |
* @encoding: the document encoding
|
|
Packit |
423ecb |
* @format: should formatting spaces been added
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML node, recursive behaviour,children are printed too.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* TODO: if encoding == NULL try to save in the doc encoding
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* returns: the number of byte written or -1 in case of failure.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
int
|
|
Packit |
423ecb |
htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
|
|
Packit |
423ecb |
xmlNodePtr cur, const char *encoding, int format) {
|
|
Packit |
423ecb |
xmlOutputBufferPtr buf;
|
|
Packit |
423ecb |
xmlCharEncodingHandlerPtr handler = NULL;
|
|
Packit |
423ecb |
int ret;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlInitParser();
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (encoding != NULL) {
|
|
Packit |
423ecb |
xmlCharEncoding enc;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
enc = xmlParseCharEncoding(encoding);
|
|
Packit |
423ecb |
if (enc != XML_CHAR_ENCODING_UTF8) {
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler(encoding);
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Fallback to HTML or ASCII when the encoding is unspecified
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("HTML");
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("ascii");
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* save the content to a temp buffer.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
buf = xmlOutputBufferCreateFile(out, handler);
|
|
Packit |
423ecb |
if (buf == NULL) return(0);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
ret = xmlOutputBufferClose(buf);
|
|
Packit |
423ecb |
return(ret);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlNodeDumpFile:
|
|
Packit |
423ecb |
* @out: the FILE pointer
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @cur: the current node
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML node, recursive behaviour,children are printed too,
|
|
Packit |
423ecb |
* and formatting returns are added.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
void
|
|
Packit |
423ecb |
htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
|
|
Packit |
423ecb |
htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlDocDumpMemoryFormat:
|
|
Packit |
423ecb |
* @cur: the document
|
|
Packit |
423ecb |
* @mem: OUT: the memory pointer
|
|
Packit |
423ecb |
* @size: OUT: the memory length
|
|
Packit |
423ecb |
* @format: should formatting spaces been added
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML document in memory and return the xmlChar * and it's size.
|
|
Packit |
423ecb |
* It's up to the caller to free the memory.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
void
|
|
Packit |
423ecb |
htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
|
|
Packit |
423ecb |
xmlOutputBufferPtr buf;
|
|
Packit |
423ecb |
xmlCharEncodingHandlerPtr handler = NULL;
|
|
Packit |
423ecb |
const char *encoding;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlInitParser();
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if ((mem == NULL) || (size == NULL))
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
if (cur == NULL) {
|
|
Packit |
423ecb |
*mem = NULL;
|
|
Packit |
423ecb |
*size = 0;
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
encoding = (const char *) htmlGetMetaEncoding(cur);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (encoding != NULL) {
|
|
Packit |
423ecb |
xmlCharEncoding enc;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
enc = xmlParseCharEncoding(encoding);
|
|
Packit |
423ecb |
if (enc != cur->charset) {
|
|
Packit |
423ecb |
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Not supported yet
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
*mem = NULL;
|
|
Packit |
423ecb |
*size = 0;
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler(encoding);
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler(encoding);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Fallback to HTML or ASCII when the encoding is unspecified
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("HTML");
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("ascii");
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
buf = xmlAllocOutputBufferInternal(handler);
|
|
Packit |
423ecb |
if (buf == NULL) {
|
|
Packit |
423ecb |
*mem = NULL;
|
|
Packit |
423ecb |
*size = 0;
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlOutputBufferFlush(buf);
|
|
Packit |
423ecb |
if (buf->conv != NULL) {
|
|
Packit |
423ecb |
*size = xmlBufUse(buf->conv);
|
|
Packit |
423ecb |
*mem = xmlStrndup(xmlBufContent(buf->conv), *size);
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
*size = xmlBufUse(buf->buffer);
|
|
Packit |
423ecb |
*mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
(void)xmlOutputBufferClose(buf);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlDocDumpMemory:
|
|
Packit |
423ecb |
* @cur: the document
|
|
Packit |
423ecb |
* @mem: OUT: the memory pointer
|
|
Packit |
423ecb |
* @size: OUT: the memory length
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML document in memory and return the xmlChar * and it's size.
|
|
Packit |
423ecb |
* It's up to the caller to free the memory.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
void
|
|
Packit |
423ecb |
htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
|
|
Packit |
423ecb |
htmlDocDumpMemoryFormat(cur, mem, size, 1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/************************************************************************
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
* Dumping HTML tree content to an I/O output buffer *
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
************************************************************************/
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlDtdDumpOutput:
|
|
Packit |
423ecb |
* @buf: the HTML buffer output
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @encoding: the encoding string
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* TODO: check whether encoding is needed
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump the HTML document DTD, if any.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
static void
|
|
Packit |
423ecb |
htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|
Packit |
423ecb |
const char *encoding ATTRIBUTE_UNUSED) {
|
|
Packit |
423ecb |
xmlDtdPtr cur = doc->intSubset;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (cur == NULL) {
|
|
Packit |
423ecb |
htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
Packit |
423ecb |
if (cur->ExternalID != NULL) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, " PUBLIC ");
|
|
Packit |
423ecb |
xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
|
|
Packit |
423ecb |
if (cur->SystemID != NULL) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, " ");
|
|
Packit |
423ecb |
xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
} else if (cur->SystemID != NULL &&
|
|
Packit |
423ecb |
xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, " SYSTEM ");
|
|
Packit |
423ecb |
xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ">\n");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlAttrDumpOutput:
|
|
Packit |
423ecb |
* @buf: the HTML buffer output
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @cur: the attribute pointer
|
|
Packit |
423ecb |
* @encoding: the encoding string
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML attribute
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
static void
|
|
Packit |
423ecb |
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
|
|
Packit |
423ecb |
const char *encoding ATTRIBUTE_UNUSED) {
|
|
Packit |
423ecb |
xmlChar *value;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* The html output method should not escape a & character
|
|
Packit |
423ecb |
* occurring in an attribute value immediately followed by
|
|
Packit |
423ecb |
* a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
|
|
Packit |
423ecb |
* This is implemented in xmlEncodeEntitiesReentrant
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (cur == NULL) {
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, " ");
|
|
Packit |
423ecb |
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ":");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
Packit |
423ecb |
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
|
|
Packit |
423ecb |
value = xmlNodeListGetString(doc, cur->children, 0);
|
|
Packit |
423ecb |
if (value) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "=");
|
|
Packit |
423ecb |
if ((cur->ns == NULL) && (cur->parent != NULL) &&
|
|
Packit |
423ecb |
(cur->parent->ns == NULL) &&
|
|
Packit |
423ecb |
((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
|
|
Packit |
423ecb |
(!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
|
|
Packit |
423ecb |
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
|
|
Packit |
423ecb |
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
|
|
Packit |
423ecb |
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
|
|
Packit |
423ecb |
xmlChar *tmp = value;
|
|
Packit |
423ecb |
/* xmlURIEscapeStr() escapes '"' so it can be safely used. */
|
|
Packit |
423ecb |
xmlBufCCat(buf->buffer, "\"");
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
while (IS_BLANK_CH(*tmp)) tmp++;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/* URI Escape everything, except server side includes. */
|
|
Packit |
423ecb |
for ( ; ; ) {
|
|
Packit |
423ecb |
xmlChar *escaped;
|
|
Packit |
423ecb |
xmlChar endChar;
|
|
Packit |
423ecb |
xmlChar *end = NULL;
|
|
Packit |
423ecb |
xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "
|
|
Packit |
423ecb |
if (start != NULL) {
|
|
Packit |
423ecb |
end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
|
|
Packit |
423ecb |
if (end != NULL) {
|
|
Packit |
423ecb |
*start = '\0';
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/* Escape the whole string, or until start (set to '\0'). */
|
|
Packit |
423ecb |
escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
|
|
Packit |
423ecb |
if (escaped != NULL) {
|
|
Packit |
423ecb |
xmlBufCat(buf->buffer, escaped);
|
|
Packit |
423ecb |
xmlFree(escaped);
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
xmlBufCat(buf->buffer, tmp);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (end == NULL) { /* Everything has been written. */
|
|
Packit |
423ecb |
break;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/* Do not escape anything within server side includes. */
|
|
Packit |
423ecb |
*start = '<'; /* Restore the first character of "
|
|
Packit |
423ecb |
end += 3; /* strlen("-->") */
|
|
Packit |
423ecb |
endChar = *end;
|
|
Packit |
423ecb |
*end = '\0';
|
|
Packit |
423ecb |
xmlBufCat(buf->buffer, start);
|
|
Packit |
423ecb |
*end = endChar;
|
|
Packit |
423ecb |
tmp = end;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlBufCCat(buf->buffer, "\"");
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
xmlBufWriteQuotedString(buf->buffer, value);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlFree(value);
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "=\"\"");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlAttrListDumpOutput:
|
|
Packit |
423ecb |
* @buf: the HTML buffer output
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @cur: the first attribute pointer
|
|
Packit |
423ecb |
* @encoding: the encoding string
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump a list of HTML attributes
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
static void
|
|
Packit |
423ecb |
htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
|
|
Packit |
423ecb |
if (cur == NULL) {
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
while (cur != NULL) {
|
|
Packit |
423ecb |
htmlAttrDumpOutput(buf, doc, cur, encoding);
|
|
Packit |
423ecb |
cur = cur->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlNodeListDumpOutput:
|
|
Packit |
423ecb |
* @buf: the HTML buffer output
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @cur: the first node
|
|
Packit |
423ecb |
* @encoding: the encoding string
|
|
Packit |
423ecb |
* @format: should formatting spaces been added
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML node list, recursive behaviour,children are printed too.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
static void
|
|
Packit |
423ecb |
htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|
Packit |
423ecb |
xmlNodePtr cur, const char *encoding, int format) {
|
|
Packit |
423ecb |
if (cur == NULL) {
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
while (cur != NULL) {
|
|
Packit |
423ecb |
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
|
|
Packit |
423ecb |
cur = cur->next;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlNodeDumpFormatOutput:
|
|
Packit |
423ecb |
* @buf: the HTML buffer output
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @cur: the current node
|
|
Packit |
423ecb |
* @encoding: the encoding string
|
|
Packit |
423ecb |
* @format: should formatting spaces been added
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML node, recursive behaviour,children are printed too.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
void
|
|
Packit |
423ecb |
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|
Packit |
423ecb |
xmlNodePtr cur, const char *encoding, int format) {
|
|
Packit |
423ecb |
const htmlElemDesc * info;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlInitParser();
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if ((cur == NULL) || (buf == NULL)) {
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Special cases.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
if (cur->type == XML_DTD_NODE)
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
|
|
Packit |
423ecb |
(cur->type == XML_DOCUMENT_NODE)){
|
|
Packit |
423ecb |
htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur->type == XML_ATTRIBUTE_NODE) {
|
|
Packit |
423ecb |
htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur->type == HTML_TEXT_NODE) {
|
|
Packit |
423ecb |
if (cur->content != NULL) {
|
|
Packit |
423ecb |
if (((cur->name == (const xmlChar *)xmlStringText) ||
|
|
Packit |
423ecb |
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
|
|
Packit |
423ecb |
((cur->parent == NULL) ||
|
|
Packit |
423ecb |
((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
|
|
Packit |
423ecb |
(xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
|
|
Packit |
423ecb |
xmlChar *buffer;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
|
|
Packit |
423ecb |
if (buffer != NULL) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)buffer);
|
|
Packit |
423ecb |
xmlFree(buffer);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur->type == HTML_COMMENT_NODE) {
|
|
Packit |
423ecb |
if (cur->content != NULL) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "-->");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur->type == HTML_PI_NODE) {
|
|
Packit |
423ecb |
if (cur->name == NULL)
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
Packit |
423ecb |
if (cur->content != NULL) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, " ");
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ">");
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur->type == HTML_ENTITY_REF_NODE) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "&";;
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ";");
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur->type == HTML_PRESERVE_NODE) {
|
|
Packit |
423ecb |
if (cur->content != NULL) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Get specific HTML info for that node.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
if (cur->ns == NULL)
|
|
Packit |
423ecb |
info = htmlTagLookup(cur->name);
|
|
Packit |
423ecb |
else
|
|
Packit |
423ecb |
info = NULL;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "<");
|
|
Packit |
423ecb |
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ":");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
Packit |
423ecb |
if (cur->nsDef)
|
|
Packit |
423ecb |
xmlNsListDumpOutput(buf, cur->nsDef);
|
|
Packit |
423ecb |
if (cur->properties != NULL)
|
|
Packit |
423ecb |
htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if ((info != NULL) && (info->empty)) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ">");
|
|
Packit |
423ecb |
if ((format) && (!info->isinline) && (cur->next != NULL)) {
|
|
Packit |
423ecb |
if ((cur->next->type != HTML_TEXT_NODE) &&
|
|
Packit |
423ecb |
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
|
Packit |
423ecb |
(cur->parent != NULL) &&
|
|
Packit |
423ecb |
(cur->parent->name != NULL) &&
|
|
Packit |
423ecb |
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "\n");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
|
|
Packit |
423ecb |
(cur->children == NULL)) {
|
|
Packit |
423ecb |
if ((info != NULL) && (info->saveEndTag != 0) &&
|
|
Packit |
423ecb |
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
|
|
Packit |
423ecb |
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ">");
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "></");
|
|
Packit |
423ecb |
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ":");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ">");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if ((format) && (cur->next != NULL) &&
|
|
Packit |
423ecb |
(info != NULL) && (!info->isinline)) {
|
|
Packit |
423ecb |
if ((cur->next->type != HTML_TEXT_NODE) &&
|
|
Packit |
423ecb |
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
|
Packit |
423ecb |
(cur->parent != NULL) &&
|
|
Packit |
423ecb |
(cur->parent->name != NULL) &&
|
|
Packit |
423ecb |
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "\n");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ">");
|
|
Packit |
423ecb |
if ((cur->type != XML_ELEMENT_NODE) &&
|
|
Packit |
423ecb |
(cur->content != NULL)) {
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Uses the OutputBuffer property to automatically convert
|
|
Packit |
423ecb |
* invalids to charrefs
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *) cur->content);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur->children != NULL) {
|
|
Packit |
423ecb |
if ((format) && (info != NULL) && (!info->isinline) &&
|
|
Packit |
423ecb |
(cur->children->type != HTML_TEXT_NODE) &&
|
|
Packit |
423ecb |
(cur->children->type != HTML_ENTITY_REF_NODE) &&
|
|
Packit |
423ecb |
(cur->children != cur->last) &&
|
|
Packit |
423ecb |
(cur->name != NULL) &&
|
|
Packit |
423ecb |
(cur->name[0] != 'p')) /* p, pre, param */
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "\n");
|
|
Packit |
423ecb |
htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
|
|
Packit |
423ecb |
if ((format) && (info != NULL) && (!info->isinline) &&
|
|
Packit |
423ecb |
(cur->last->type != HTML_TEXT_NODE) &&
|
|
Packit |
423ecb |
(cur->last->type != HTML_ENTITY_REF_NODE) &&
|
|
Packit |
423ecb |
(cur->children != cur->last) &&
|
|
Packit |
423ecb |
(cur->name != NULL) &&
|
|
Packit |
423ecb |
(cur->name[0] != 'p')) /* p, pre, param */
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "\n");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "</");
|
|
Packit |
423ecb |
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ":");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, ">");
|
|
Packit |
423ecb |
if ((format) && (info != NULL) && (!info->isinline) &&
|
|
Packit |
423ecb |
(cur->next != NULL)) {
|
|
Packit |
423ecb |
if ((cur->next->type != HTML_TEXT_NODE) &&
|
|
Packit |
423ecb |
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
|
Packit |
423ecb |
(cur->parent != NULL) &&
|
|
Packit |
423ecb |
(cur->parent->name != NULL) &&
|
|
Packit |
423ecb |
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "\n");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlNodeDumpOutput:
|
|
Packit |
423ecb |
* @buf: the HTML buffer output
|
|
Packit |
423ecb |
* @doc: the document
|
|
Packit |
423ecb |
* @cur: the current node
|
|
Packit |
423ecb |
* @encoding: the encoding string
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML node, recursive behaviour,children are printed too,
|
|
Packit |
423ecb |
* and formatting returns/spaces are added.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
void
|
|
Packit |
423ecb |
htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|
Packit |
423ecb |
xmlNodePtr cur, const char *encoding) {
|
|
Packit |
423ecb |
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlDocContentDumpFormatOutput:
|
|
Packit |
423ecb |
* @buf: the HTML buffer output
|
|
Packit |
423ecb |
* @cur: the document
|
|
Packit |
423ecb |
* @encoding: the encoding string
|
|
Packit |
423ecb |
* @format: should formatting spaces been added
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML document.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
void
|
|
Packit |
423ecb |
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
|
|
Packit |
423ecb |
const char *encoding, int format) {
|
|
Packit |
423ecb |
int type;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlInitParser();
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if ((buf == NULL) || (cur == NULL))
|
|
Packit |
423ecb |
return;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* force to output the stuff as HTML, especially for entities
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
type = cur->type;
|
|
Packit |
423ecb |
cur->type = XML_HTML_DOCUMENT_NODE;
|
|
Packit |
423ecb |
if (cur->intSubset != NULL) {
|
|
Packit |
423ecb |
htmlDtdDumpOutput(buf, cur, NULL);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
if (cur->children != NULL) {
|
|
Packit |
423ecb |
htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
xmlOutputBufferWriteString(buf, "\n");
|
|
Packit |
423ecb |
cur->type = (xmlElementType) type;
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlDocContentDumpOutput:
|
|
Packit |
423ecb |
* @buf: the HTML buffer output
|
|
Packit |
423ecb |
* @cur: the document
|
|
Packit |
423ecb |
* @encoding: the encoding string
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML document. Formating return/spaces are added.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
void
|
|
Packit |
423ecb |
htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
|
|
Packit |
423ecb |
const char *encoding) {
|
|
Packit |
423ecb |
htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/************************************************************************
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
* Saving functions front-ends *
|
|
Packit |
423ecb |
* *
|
|
Packit |
423ecb |
************************************************************************/
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlDocDump:
|
|
Packit |
423ecb |
* @f: the FILE*
|
|
Packit |
423ecb |
* @cur: the document
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML document to an open FILE.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* returns: the number of byte written or -1 in case of failure.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
int
|
|
Packit |
423ecb |
htmlDocDump(FILE *f, xmlDocPtr cur) {
|
|
Packit |
423ecb |
xmlOutputBufferPtr buf;
|
|
Packit |
423ecb |
xmlCharEncodingHandlerPtr handler = NULL;
|
|
Packit |
423ecb |
const char *encoding;
|
|
Packit |
423ecb |
int ret;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlInitParser();
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if ((cur == NULL) || (f == NULL)) {
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
encoding = (const char *) htmlGetMetaEncoding(cur);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (encoding != NULL) {
|
|
Packit |
423ecb |
xmlCharEncoding enc;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
enc = xmlParseCharEncoding(encoding);
|
|
Packit |
423ecb |
if (enc != cur->charset) {
|
|
Packit |
423ecb |
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Not supported yet
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler(encoding);
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler(encoding);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Fallback to HTML or ASCII when the encoding is unspecified
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("HTML");
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("ascii");
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
buf = xmlOutputBufferCreateFile(f, handler);
|
|
Packit |
423ecb |
if (buf == NULL) return(-1);
|
|
Packit |
423ecb |
htmlDocContentDumpOutput(buf, cur, NULL);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
ret = xmlOutputBufferClose(buf);
|
|
Packit |
423ecb |
return(ret);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlSaveFile:
|
|
Packit |
423ecb |
* @filename: the filename (or URL)
|
|
Packit |
423ecb |
* @cur: the document
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML document to a file. If @filename is "-" the stdout file is
|
|
Packit |
423ecb |
* used.
|
|
Packit |
423ecb |
* returns: the number of byte written or -1 in case of failure.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
int
|
|
Packit |
423ecb |
htmlSaveFile(const char *filename, xmlDocPtr cur) {
|
|
Packit |
423ecb |
xmlOutputBufferPtr buf;
|
|
Packit |
423ecb |
xmlCharEncodingHandlerPtr handler = NULL;
|
|
Packit |
423ecb |
const char *encoding;
|
|
Packit |
423ecb |
int ret;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if ((cur == NULL) || (filename == NULL))
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlInitParser();
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
encoding = (const char *) htmlGetMetaEncoding(cur);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (encoding != NULL) {
|
|
Packit |
423ecb |
xmlCharEncoding enc;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
enc = xmlParseCharEncoding(encoding);
|
|
Packit |
423ecb |
if (enc != cur->charset) {
|
|
Packit |
423ecb |
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Not supported yet
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler(encoding);
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Fallback to HTML or ASCII when the encoding is unspecified
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("HTML");
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("ascii");
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* save the content to a temp buffer.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
|
|
Packit |
423ecb |
if (buf == NULL) return(0);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
htmlDocContentDumpOutput(buf, cur, NULL);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
ret = xmlOutputBufferClose(buf);
|
|
Packit |
423ecb |
return(ret);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlSaveFileFormat:
|
|
Packit |
423ecb |
* @filename: the filename
|
|
Packit |
423ecb |
* @cur: the document
|
|
Packit |
423ecb |
* @format: should formatting spaces been added
|
|
Packit |
423ecb |
* @encoding: the document encoding
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML document to a file using a given encoding.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* returns: the number of byte written or -1 in case of failure.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
int
|
|
Packit |
423ecb |
htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
|
|
Packit |
423ecb |
const char *encoding, int format) {
|
|
Packit |
423ecb |
xmlOutputBufferPtr buf;
|
|
Packit |
423ecb |
xmlCharEncodingHandlerPtr handler = NULL;
|
|
Packit |
423ecb |
int ret;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if ((cur == NULL) || (filename == NULL))
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlInitParser();
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
if (encoding != NULL) {
|
|
Packit |
423ecb |
xmlCharEncoding enc;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
enc = xmlParseCharEncoding(encoding);
|
|
Packit |
423ecb |
if (enc != cur->charset) {
|
|
Packit |
423ecb |
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Not supported yet
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
return(-1);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler(encoding);
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
|
|
Packit |
423ecb |
} else {
|
|
Packit |
423ecb |
htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* Fallback to HTML or ASCII when the encoding is unspecified
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("HTML");
|
|
Packit |
423ecb |
if (handler == NULL)
|
|
Packit |
423ecb |
handler = xmlFindCharEncodingHandler("ascii");
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/*
|
|
Packit |
423ecb |
* save the content to a temp buffer.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
buf = xmlOutputBufferCreateFilename(filename, handler, 0);
|
|
Packit |
423ecb |
if (buf == NULL) return(0);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
ret = xmlOutputBufferClose(buf);
|
|
Packit |
423ecb |
return(ret);
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
/**
|
|
Packit |
423ecb |
* htmlSaveFileEnc:
|
|
Packit |
423ecb |
* @filename: the filename
|
|
Packit |
423ecb |
* @cur: the document
|
|
Packit |
423ecb |
* @encoding: the document encoding
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* Dump an HTML document to a file using a given encoding
|
|
Packit |
423ecb |
* and formatting returns/spaces are added.
|
|
Packit |
423ecb |
*
|
|
Packit |
423ecb |
* returns: the number of byte written or -1 in case of failure.
|
|
Packit |
423ecb |
*/
|
|
Packit |
423ecb |
int
|
|
Packit |
423ecb |
htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
|
|
Packit |
423ecb |
return(htmlSaveFileFormat(filename, cur, encoding, 1));
|
|
Packit |
423ecb |
}
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
#endif /* LIBXML_OUTPUT_ENABLED */
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
#define bottom_HTMLtree
|
|
Packit |
423ecb |
#include "elfgcchack.h"
|
|
Packit |
423ecb |
#endif /* LIBXML_HTML_ENABLED */
|