Blame doc/html/libxml-HTMLparser.html

Packit Service a31ea6
Packit Service a31ea6
Packit Service a31ea6
<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><link rel="SHORTCUT ICON" href="/favicon.ico" /><style type="text/css">
Packit Service a31ea6
TD {font-family: Verdana,Arial,Helvetica}
Packit Service a31ea6
BODY {font-family: Verdana,Arial,Helvetica; margin-top: 2em; margin-left: 0em; margin-right: 0em}
Packit Service a31ea6
H1 {font-family: Verdana,Arial,Helvetica}
Packit Service a31ea6
H2 {font-family: Verdana,Arial,Helvetica}
Packit Service a31ea6
H3 {font-family: Verdana,Arial,Helvetica}
Packit Service a31ea6
A:link, A:visited, A:active { text-decoration: underline }
Packit Service a31ea6
</style><style type="text/css">
Packit Service a31ea6
      div.deprecated pre.programlisting {border-style: double;border-color:red}
Packit Service a31ea6
      pre.programlisting {border-style: double;background: #EECFA1}
Packit Service a31ea6
    </style><title>Module HTMLparser from libxml2</title></head><body bgcolor="#8b7765" text="#000000" link="#a06060" vlink="#000000">
Action against software patentsGnome2 LogoW3C LogoRed Hat Logo
Made with Libxml2 Logo

Module HTMLparser from libxml2

<center>API Menu</center>
<form action="../search.php" enctype="application/x-www-form-urlencoded" method="get"><input name="query" type="text" size="20" value="" /><input name="submit" type="submit" value="Search ..." /></form>
<center>API Indexes</center>
<center>Related links</center>

this module implements an HTML 4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse "real world" HTML, even if severely broken from a specification point of view.

Table of Contents

#define htmlDefaultSubelement
#define htmlElementAllowedHereDesc
#define htmlRequiredAttrs
Typedef xmlDocPtr htmlDocPtr
Packit Service a31ea6
Structure htmlElemDesc
struct _htmlElemDesc
Packit Service a31ea6
Typedef htmlElemDesc * htmlElemDescPtr
Packit Service a31ea6
Structure htmlEntityDesc
struct _htmlEntityDesc
Packit Service a31ea6
Typedef htmlEntityDesc * htmlEntityDescPtr
Packit Service a31ea6
Typedef xmlNodePtr htmlNodePtr
Packit Service a31ea6
Typedef xmlParserCtxt htmlParserCtxt
Packit Service a31ea6
Typedef xmlParserCtxtPtr htmlParserCtxtPtr
Packit Service a31ea6
Typedef xmlParserInput htmlParserInput
Packit Service a31ea6
Typedef xmlParserInputPtr htmlParserInputPtr
Packit Service a31ea6
Typedef xmlParserNodeInfo htmlParserNodeInfo
Packit Service a31ea6
Enum htmlParserOption
Packit Service a31ea6
Typedef xmlSAXHandler htmlSAXHandler
Packit Service a31ea6
Typedef xmlSAXHandlerPtr htmlSAXHandlerPtr
Packit Service a31ea6
Enum htmlStatus
Packit Service a31ea6
int	UTF8ToHtml			(unsigned char * out, 
int * outlen,
const unsigned char * in,
int * inlen)
Packit Service a31ea6
htmlStatus	htmlAttrAllowed		(const htmlElemDesc * elt, 
const xmlChar * attr,
int legacy)
Packit Service a31ea6
int	htmlAutoCloseTag		(htmlDocPtr doc, 
const xmlChar * name,
htmlNodePtr elem)
Packit Service a31ea6
htmlParserCtxtPtr	htmlCreateMemoryParserCtxt	(const char * buffer, 
int size)
Packit Service a31ea6
htmlParserCtxtPtr	htmlCreatePushParserCtxt	(htmlSAXHandlerPtr sax, 
void * user_data,
const char * chunk,
int size,
const char * filename,
xmlCharEncoding enc)
Packit Service a31ea6
htmlDocPtr	htmlCtxtReadDoc		(htmlParserCtxtPtr ctxt, 
const xmlChar * cur,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6
htmlDocPtr	htmlCtxtReadFd		(htmlParserCtxtPtr ctxt, 
int fd,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6
htmlDocPtr	htmlCtxtReadFile	(htmlParserCtxtPtr ctxt, 
const char * filename,
const char * encoding,
int options)
Packit Service a31ea6
htmlDocPtr	htmlCtxtReadIO		(htmlParserCtxtPtr ctxt, 
xmlInputReadCallback ioread,
xmlInputCloseCallback ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6
htmlDocPtr	htmlCtxtReadMemory	(htmlParserCtxtPtr ctxt, 
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6
void	htmlCtxtReset			(htmlParserCtxtPtr ctxt)
Packit Service a31ea6
int	htmlCtxtUseOptions		(htmlParserCtxtPtr ctxt, 
int options)
Packit Service a31ea6
int	htmlElementAllowedHere		(const htmlElemDesc * parent, 
const xmlChar * elt)
Packit Service a31ea6
htmlStatus	htmlElementStatusHere	(const htmlElemDesc * parent, 
const htmlElemDesc * elt)
Packit Service a31ea6
int	htmlEncodeEntities		(unsigned char * out, 
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar)
Packit Service a31ea6
const htmlEntityDesc *	htmlEntityLookup	(const xmlChar * name)
Packit Service a31ea6
const htmlEntityDesc *	htmlEntityValueLookup	(unsigned int value)
Packit Service a31ea6
void	htmlFreeParserCtxt		(htmlParserCtxtPtr ctxt)
Packit Service a31ea6
int	htmlHandleOmittedElem		(int val)
Packit Service a31ea6
int	htmlIsAutoClosed		(htmlDocPtr doc, 
htmlNodePtr elem)
Packit Service a31ea6
int	htmlIsScriptAttribute		(const xmlChar * name)
Packit Service a31ea6
htmlParserCtxtPtr	htmlNewParserCtxt	(void)
Packit Service a31ea6
htmlStatus	htmlNodeStatus		(const htmlNodePtr node, 
int legacy)
Packit Service a31ea6
int	htmlParseCharRef		(htmlParserCtxtPtr ctxt)
Packit Service a31ea6
int	htmlParseChunk			(htmlParserCtxtPtr ctxt, 
const char * chunk,
int size,
int terminate)
Packit Service a31ea6
htmlDocPtr	htmlParseDoc		(const xmlChar * cur, 
const char * encoding)
Packit Service a31ea6
int	htmlParseDocument		(htmlParserCtxtPtr ctxt)
Packit Service a31ea6
void	htmlParseElement		(htmlParserCtxtPtr ctxt)
Packit Service a31ea6
const htmlEntityDesc *	htmlParseEntityRef	(htmlParserCtxtPtr ctxt, 
const xmlChar ** str)
Packit Service a31ea6
htmlDocPtr	htmlParseFile		(const char * filename, 
const char * encoding)
Packit Service a31ea6
htmlDocPtr	htmlReadDoc		(const xmlChar * cur, 
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6
htmlDocPtr	htmlReadFd		(int fd, 
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6
htmlDocPtr	htmlReadFile		(const char * filename, 
const char * encoding,
int options)
Packit Service a31ea6
htmlDocPtr	htmlReadIO		(xmlInputReadCallback ioread, 
xmlInputCloseCallback ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6
htmlDocPtr	htmlReadMemory		(const char * buffer, 
int size,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6
htmlDocPtr	htmlSAXParseDoc		(const xmlChar * cur, 
const char * encoding,
htmlSAXHandlerPtr sax,
void * userData)
Packit Service a31ea6
htmlDocPtr	htmlSAXParseFile	(const char * filename, 
const char * encoding,
htmlSAXHandlerPtr sax,
void * userData)
Packit Service a31ea6
const htmlElemDesc *	htmlTagLookup	(const xmlChar * tag)
Packit Service a31ea6

Description

Packit Service a31ea6

Macro: htmlDefaultSubelement

#define htmlDefaultSubelement

Returns the default subelement for this element

Packit Service a31ea6

Macro: htmlElementAllowedHereDesc

#define htmlElementAllowedHereDesc

Checks whether an HTML element description may be a direct child of the specified element. Returns 1 if allowed; 0 otherwise.

Packit Service a31ea6

Macro: htmlRequiredAttrs

#define htmlRequiredAttrs

Returns the attributes required for the specified element.

Packit Service a31ea6

Structure htmlElemDesc

Structure htmlElemDesc
struct _htmlElemDesc {
Packit Service a31ea6
    const char *	name	: The tag name
Packit Service a31ea6
    char	startTag	: Whether the start tag can be implied
Packit Service a31ea6
    char	endTag	: Whether the end tag can be implied
Packit Service a31ea6
    char	saveEndTag	: Whether the end tag should be saved
Packit Service a31ea6
    char	empty	: Is this an empty element ?
Packit Service a31ea6
    char	depr	: Is this a deprecated element ?
Packit Service a31ea6
    char	dtd	: 1: only in Loose DTD, 2: only Frameset
Packit Service a31ea6
    char	isinline	: is this a block 0 or inline 1 element
Packit Service a31ea6
    const char *	desc	: the description NRK Jan.2003 * New fiel
Packit Service a31ea6
    const char **	subelts	: allowed sub-elements of this element
Packit Service a31ea6
    const char *	defaultsubelt	: subelement for suggested auto-repair if
Packit Service a31ea6
    const char **	attrs_opt	: Optional Attributes
Packit Service a31ea6
    const char **	attrs_depr	: Additional deprecated attributes
Packit Service a31ea6
    const char **	attrs_req	: Required attributes
Packit Service a31ea6
}

Structure htmlEntityDesc

Structure htmlEntityDesc
struct _htmlEntityDesc {
Packit Service a31ea6
    unsigned int	value	: the UNICODE value for the character
Packit Service a31ea6
    const char *	name	: The entity name
Packit Service a31ea6
    const char *	desc	: the description
Packit Service a31ea6
}

Enum htmlParserOption

Enum htmlParserOption {
Packit Service a31ea6
    HTML_PARSE_RECOVER = 1 : Relaxed parsing
Packit Service a31ea6
    HTML_PARSE_NODEFDTD = 4 : do not default a doctype if not found
Packit Service a31ea6
    HTML_PARSE_NOERROR = 32 : suppress error reports
Packit Service a31ea6
    HTML_PARSE_NOWARNING = 64 : suppress warning reports
Packit Service a31ea6
    HTML_PARSE_PEDANTIC = 128 : pedantic error reporting
Packit Service a31ea6
    HTML_PARSE_NOBLANKS = 256 : remove blank nodes
Packit Service a31ea6
    HTML_PARSE_NONET = 2048 : Forbid network access
Packit Service a31ea6
    HTML_PARSE_NOIMPLIED = 8192 : Do not add implied html/body... elements
Packit Service a31ea6
    HTML_PARSE_COMPACT = 65536 : compact small text nodes
Packit Service a31ea6
    HTML_PARSE_IGNORE_ENC = 2097152 : ignore internal document encoding hint
Packit Service a31ea6
}
Packit Service a31ea6

Enum htmlStatus

Enum htmlStatus {
Packit Service a31ea6
    HTML_NA = 0 : something we don't check at all
Packit Service a31ea6
    HTML_INVALID = 1
Packit Service a31ea6
    HTML_DEPRECATED = 2
Packit Service a31ea6
    HTML_VALID = 4
Packit Service a31ea6
    HTML_REQUIRED = 12 : VALID bit set so ( & HTML_VALID ) is TRUE
Packit Service a31ea6
}
Packit Service a31ea6

Function: UTF8ToHtml

int	UTF8ToHtml			(unsigned char * out, 
int * outlen,
const unsigned char * in,
int * inlen)
Packit Service a31ea6

Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.

Packit Service a31ea6
<tt>out</tt>:a pointer to an array of bytes to store the result
<tt>outlen</tt>:the length of @out
<tt>in</tt>:a pointer to an array of UTF-8 chars
<tt>inlen</tt>:the length of @in
<tt>Returns</tt>:0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.

Function: htmlAttrAllowed

htmlStatus	htmlAttrAllowed		(const htmlElemDesc * elt, 
const xmlChar * attr,
int legacy)
Packit Service a31ea6

Checks whether an attribute is valid for an element Has full knowledge of Required and Deprecated attributes

Packit Service a31ea6
<tt>elt</tt>:HTML element
<tt>attr</tt>:HTML attribute
<tt>legacy</tt>:whether to allow deprecated attributes
<tt>Returns</tt>:one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID

Function: htmlAutoCloseTag

int	htmlAutoCloseTag		(htmlDocPtr doc, 
const xmlChar * name,
htmlNodePtr elem)
Packit Service a31ea6

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.

Packit Service a31ea6
<tt>doc</tt>:the HTML document
<tt>name</tt>:The tag name
<tt>elem</tt>:the HTML element
<tt>Returns</tt>:1 if autoclose, 0 otherwise

Function: htmlCreateMemoryParserCtxt

htmlParserCtxtPtr	htmlCreateMemoryParserCtxt	(const char * buffer, 
int size)
Packit Service a31ea6

Create a parser context for an HTML in-memory document.

Packit Service a31ea6
<tt>buffer</tt>:a pointer to a char array
<tt>size</tt>:the size of the array
<tt>Returns</tt>:the new parser context or NULL

Function: htmlCreatePushParserCtxt

htmlParserCtxtPtr	htmlCreatePushParserCtxt	(htmlSAXHandlerPtr sax, 
void * user_data,
const char * chunk,
int size,
const char * filename,
xmlCharEncoding enc)
Packit Service a31ea6

Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.

Packit Service a31ea6
<tt>sax</tt>:a SAX handler
<tt>user_data</tt>:The user data returned on SAX callbacks
<tt>chunk</tt>:a pointer to an array of chars
<tt>size</tt>:number of chars in the array
<tt>filename</tt>:an optional file name or URI
<tt>enc</tt>:an optional encoding
<tt>Returns</tt>:the new parser context or NULL

Function: htmlCtxtReadDoc

htmlDocPtr	htmlCtxtReadDoc		(htmlParserCtxtPtr ctxt, 
const xmlChar * cur,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6

parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>cur</tt>:a pointer to a zero terminated string
<tt>URL</tt>:the base URL to use for the document
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlCtxtReadFd

htmlDocPtr	htmlCtxtReadFd		(htmlParserCtxtPtr ctxt, 
int fd,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6

parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>fd</tt>:an open file descriptor
<tt>URL</tt>:the base URL to use for the document
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlCtxtReadFile

htmlDocPtr	htmlCtxtReadFile	(htmlParserCtxtPtr ctxt, 
const char * filename,
const char * encoding,
int options)
Packit Service a31ea6

parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>filename</tt>:a file or URL
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlCtxtReadIO

htmlDocPtr	htmlCtxtReadIO		(htmlParserCtxtPtr ctxt, 
xmlInputReadCallback ioread,
xmlInputCloseCallback ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6

parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>ioread</tt>:an I/O read function
<tt>ioclose</tt>:an I/O close function
<tt>ioctx</tt>:an I/O handler
<tt>URL</tt>:the base URL to use for the document
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlCtxtReadMemory

htmlDocPtr	htmlCtxtReadMemory	(htmlParserCtxtPtr ctxt, 
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6

parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>buffer</tt>:a pointer to a char array
<tt>size</tt>:the size of the array
<tt>URL</tt>:the base URL to use for the document
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlCtxtReset

void	htmlCtxtReset			(htmlParserCtxtPtr ctxt)
Packit Service a31ea6

Reset a parser context

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context

Function: htmlCtxtUseOptions

int	htmlCtxtUseOptions		(htmlParserCtxtPtr ctxt, 
int options)
Packit Service a31ea6

Applies the options to the parser context

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:0 in case of success, the set of unknown or unimplemented options in case of error.

Function: htmlElementAllowedHere

int	htmlElementAllowedHere		(const htmlElemDesc * parent, 
const xmlChar * elt)
Packit Service a31ea6

Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements

Packit Service a31ea6
<tt>parent</tt>:HTML parent element
<tt>elt</tt>:HTML element
<tt>Returns</tt>:1 if allowed; 0 otherwise.

Function: htmlElementStatusHere

htmlStatus	htmlElementStatusHere	(const htmlElemDesc * parent, 
const htmlElemDesc * elt)
Packit Service a31ea6

Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.

Packit Service a31ea6
<tt>parent</tt>:HTML parent element
<tt>elt</tt>:HTML element
<tt>Returns</tt>:one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID

Function: htmlEncodeEntities

int	htmlEncodeEntities		(unsigned char * out, 
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar)
Packit Service a31ea6

Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.

Packit Service a31ea6
<tt>out</tt>:a pointer to an array of bytes to store the result
<tt>outlen</tt>:the length of @out
<tt>in</tt>:a pointer to an array of UTF-8 chars
<tt>inlen</tt>:the length of @in
<tt>quoteChar</tt>:the quote character to escape (' or ") or zero.
<tt>Returns</tt>:0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.

Function: htmlEntityLookup

const htmlEntityDesc *	htmlEntityLookup	(const xmlChar * name)
Packit Service a31ea6

Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.

Packit Service a31ea6
<tt>name</tt>:the entity name
<tt>Returns</tt>:the associated htmlEntityDescPtr if found, NULL otherwise.

Function: htmlEntityValueLookup

const htmlEntityDesc *	htmlEntityValueLookup	(unsigned int value)
Packit Service a31ea6

Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.

Packit Service a31ea6
<tt>value</tt>:the entity's unicode value
<tt>Returns</tt>:the associated htmlEntityDescPtr if found, NULL otherwise.

Function: htmlFreeParserCtxt

void	htmlFreeParserCtxt		(htmlParserCtxtPtr ctxt)
Packit Service a31ea6

Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context

Function: htmlHandleOmittedElem

int	htmlHandleOmittedElem		(int val)
Packit Service a31ea6

Set and return the previous value for handling HTML omitted tags.

Packit Service a31ea6
<tt>val</tt>:int 0 or 1
<tt>Returns</tt>:the last value for 0 for no handling, 1 for auto insertion.

Function: htmlIsAutoClosed

int	htmlIsAutoClosed		(htmlDocPtr doc, 
htmlNodePtr elem)
Packit Service a31ea6

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child

Packit Service a31ea6
<tt>doc</tt>:the HTML document
<tt>elem</tt>:the HTML element
<tt>Returns</tt>:1 if autoclosed, 0 otherwise

Function: htmlIsScriptAttribute

int	htmlIsScriptAttribute		(const xmlChar * name)
Packit Service a31ea6

Check if an attribute is of content type Script

Packit Service a31ea6
<tt>name</tt>:an attribute name
<tt>Returns</tt>:1 is the attribute is a script 0 otherwise

Function: htmlNewParserCtxt

htmlParserCtxtPtr	htmlNewParserCtxt	(void)
Packit Service a31ea6

Allocate and initialize a new parser context.

Packit Service a31ea6
<tt>Returns</tt>:the htmlParserCtxtPtr or NULL in case of allocation error

Function: htmlNodeStatus

htmlStatus	htmlNodeStatus		(const htmlNodePtr node, 
int legacy)
Packit Service a31ea6

Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)

Packit Service a31ea6
<tt>node</tt>:an htmlNodePtr in a tree
<tt>legacy</tt>:whether to allow deprecated elements (YES is faster here for Element nodes)
<tt>Returns</tt>:for Element nodes, a return from htmlElementAllowedHere (if legacy allowed) or htmlElementStatusHere (otherwise). for Attribute nodes, a return from htmlAttrAllowed for other nodes, HTML_NA (no checks performed)

Function: htmlParseCharRef

int	htmlParseCharRef		(htmlParserCtxtPtr ctxt)
Packit Service a31ea6

parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>Returns</tt>:the value parsed (as an int)

Function: htmlParseChunk

int	htmlParseChunk			(htmlParserCtxtPtr ctxt, 
const char * chunk,
int size,
int terminate)
Packit Service a31ea6

Parse a Chunk of memory

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>chunk</tt>:an char array
<tt>size</tt>:the size in byte of the chunk
<tt>terminate</tt>:last chunk indicator
<tt>Returns</tt>:zero if no error, the xmlParserErrors otherwise.

Function: htmlParseDoc

htmlDocPtr	htmlParseDoc		(const xmlChar * cur, 
const char * encoding)
Packit Service a31ea6

parse an HTML in-memory document and build a tree.

Packit Service a31ea6
<tt>cur</tt>:a pointer to an array of xmlChar
<tt>encoding</tt>:a free form C string describing the HTML document encoding, or NULL
<tt>Returns</tt>:the resulting document tree

Function: htmlParseDocument

int	htmlParseDocument		(htmlParserCtxtPtr ctxt)
Packit Service a31ea6

parse an HTML document (and build a tree if using the standard SAX interface).

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>Returns</tt>:0, -1 in case of error. the parser context is augmented as a result of the parsing.

Function: htmlParseElement

void	htmlParseElement		(htmlParserCtxtPtr ctxt)
Packit Service a31ea6

parse an HTML element, this is highly recursive this is kept for compatibility with previous code versions [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context

Function: htmlParseEntityRef

const htmlEntityDesc *	htmlParseEntityRef	(htmlParserCtxtPtr ctxt, 
const xmlChar ** str)
Packit Service a31ea6

parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'

Packit Service a31ea6
<tt>ctxt</tt>:an HTML parser context
<tt>str</tt>:location to store the entity name
<tt>Returns</tt>:the associated htmlEntityDescPtr if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller.

Function: htmlParseFile

htmlDocPtr	htmlParseFile		(const char * filename, 
const char * encoding)
Packit Service a31ea6

parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.

Packit Service a31ea6
<tt>filename</tt>:the filename
<tt>encoding</tt>:a free form C string describing the HTML document encoding, or NULL
<tt>Returns</tt>:the resulting document tree

Function: htmlReadDoc

htmlDocPtr	htmlReadDoc		(const xmlChar * cur, 
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6

parse an XML in-memory document and build a tree.

Packit Service a31ea6
<tt>cur</tt>:a pointer to a zero terminated string
<tt>URL</tt>:the base URL to use for the document
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlReadFd

htmlDocPtr	htmlReadFd		(int fd, 
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6

parse an XML from a file descriptor and build a tree.

Packit Service a31ea6
<tt>fd</tt>:an open file descriptor
<tt>URL</tt>:the base URL to use for the document
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlReadFile

htmlDocPtr	htmlReadFile		(const char * filename, 
const char * encoding,
int options)
Packit Service a31ea6

parse an XML file from the filesystem or the network.

Packit Service a31ea6
<tt>filename</tt>:a file or URL
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlReadIO

htmlDocPtr	htmlReadIO		(xmlInputReadCallback ioread, 
xmlInputCloseCallback ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6

parse an HTML document from I/O functions and source and build a tree.

Packit Service a31ea6
<tt>ioread</tt>:an I/O read function
<tt>ioclose</tt>:an I/O close function
<tt>ioctx</tt>:an I/O handler
<tt>URL</tt>:the base URL to use for the document
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlReadMemory

htmlDocPtr	htmlReadMemory		(const char * buffer, 
int size,
const char * URL,
const char * encoding,
int options)
Packit Service a31ea6

parse an XML in-memory document and build a tree.

Packit Service a31ea6
<tt>buffer</tt>:a pointer to a char array
<tt>size</tt>:the size of the array
<tt>URL</tt>:the base URL to use for the document
<tt>encoding</tt>:the document encoding, or NULL
<tt>options</tt>:a combination of htmlParserOption(s)
<tt>Returns</tt>:the resulting document tree

Function: htmlSAXParseDoc

htmlDocPtr	htmlSAXParseDoc		(const xmlChar * cur, 
const char * encoding,
htmlSAXHandlerPtr sax,
void * userData)
Packit Service a31ea6

Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.

Packit Service a31ea6
<tt>cur</tt>:a pointer to an array of xmlChar
<tt>encoding</tt>:a free form C string describing the HTML document encoding, or NULL
<tt>sax</tt>:the SAX handler block
<tt>userData</tt>:if using SAX, this pointer will be provided on callbacks.
<tt>Returns</tt>:the resulting document tree unless SAX is NULL or the document is not well formed.

Function: htmlSAXParseFile

htmlDocPtr	htmlSAXParseFile	(const char * filename, 
const char * encoding,
htmlSAXHandlerPtr sax,
void * userData)
Packit Service a31ea6

parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

Packit Service a31ea6
<tt>filename</tt>:the filename
<tt>encoding</tt>:a free form C string describing the HTML document encoding, or NULL
<tt>sax</tt>:the SAX handler block
<tt>userData</tt>:if using SAX, this pointer will be provided on callbacks.
<tt>Returns</tt>:the resulting document tree unless SAX is NULL or the document is not well formed.

Function: htmlTagLookup

const htmlElemDesc *	htmlTagLookup	(const xmlChar * tag)
Packit Service a31ea6

Lookup the HTML tag in the ElementTable

Packit Service a31ea6
<tt>tag</tt>:The tag name in lowercase
<tt>Returns</tt>:the related htmlElemDescPtr or NULL if not found.

Daniel Veillard

</body></html>