Blame testHTML.c

Packit 423ecb
/*
Packit 423ecb
 * testHTML.c : a small tester program for HTML input.
Packit 423ecb
 *
Packit 423ecb
 * See Copyright for the status of this software.
Packit 423ecb
 *
Packit 423ecb
 * daniel@veillard.com
Packit 423ecb
 */
Packit 423ecb
Packit 423ecb
#include "libxml.h"
Packit 423ecb
Packit 423ecb
#ifdef LIBXML_HTML_ENABLED
Packit 423ecb
Packit 423ecb
#include <string.h>
Packit 423ecb
#include <stdarg.h>
Packit 423ecb
Packit 423ecb
Packit 423ecb
#ifdef HAVE_SYS_TYPES_H
Packit 423ecb
#include <sys/types.h>
Packit 423ecb
#endif
Packit 423ecb
#ifdef HAVE_SYS_STAT_H
Packit 423ecb
#include <sys/stat.h>
Packit 423ecb
#endif
Packit 423ecb
#ifdef HAVE_FCNTL_H
Packit 423ecb
#include <fcntl.h>
Packit 423ecb
#endif
Packit 423ecb
#ifdef HAVE_UNISTD_H
Packit 423ecb
#include <unistd.h>
Packit 423ecb
#endif
Packit 423ecb
#ifdef HAVE_STDLIB_H
Packit 423ecb
#include <stdlib.h>
Packit 423ecb
#endif
Packit 423ecb
Packit 423ecb
#include <libxml/xmlmemory.h>
Packit 423ecb
#include <libxml/HTMLparser.h>
Packit 423ecb
#include <libxml/HTMLtree.h>
Packit 423ecb
#include <libxml/debugXML.h>
Packit 423ecb
#include <libxml/xmlerror.h>
Packit 423ecb
#include <libxml/globals.h>
Packit 423ecb
Packit 423ecb
#ifdef LIBXML_DEBUG_ENABLED
Packit 423ecb
static int debug = 0;
Packit 423ecb
#endif
Packit 423ecb
static int copy = 0;
Packit 423ecb
static int sax = 0;
Packit 423ecb
static int repeat = 0;
Packit 423ecb
static int noout = 0;
Packit 423ecb
#ifdef LIBXML_PUSH_ENABLED
Packit 423ecb
static int push = 0;
Packit 423ecb
#endif /* LIBXML_PUSH_ENABLED */
Packit 423ecb
static char *encoding = NULL;
Packit 423ecb
static int options = 0;
Packit 423ecb
Packit 423ecb
static xmlSAXHandler emptySAXHandlerStruct = {
Packit 423ecb
    NULL, /* internalSubset */
Packit 423ecb
    NULL, /* isStandalone */
Packit 423ecb
    NULL, /* hasInternalSubset */
Packit 423ecb
    NULL, /* hasExternalSubset */
Packit 423ecb
    NULL, /* resolveEntity */
Packit 423ecb
    NULL, /* getEntity */
Packit 423ecb
    NULL, /* entityDecl */
Packit 423ecb
    NULL, /* notationDecl */
Packit 423ecb
    NULL, /* attributeDecl */
Packit 423ecb
    NULL, /* elementDecl */
Packit 423ecb
    NULL, /* unparsedEntityDecl */
Packit 423ecb
    NULL, /* setDocumentLocator */
Packit 423ecb
    NULL, /* startDocument */
Packit 423ecb
    NULL, /* endDocument */
Packit 423ecb
    NULL, /* startElement */
Packit 423ecb
    NULL, /* endElement */
Packit 423ecb
    NULL, /* reference */
Packit 423ecb
    NULL, /* characters */
Packit 423ecb
    NULL, /* ignorableWhitespace */
Packit 423ecb
    NULL, /* processingInstruction */
Packit 423ecb
    NULL, /* comment */
Packit 423ecb
    NULL, /* xmlParserWarning */
Packit 423ecb
    NULL, /* xmlParserError */
Packit 423ecb
    NULL, /* xmlParserError */
Packit 423ecb
    NULL, /* getParameterEntity */
Packit 423ecb
    NULL, /* cdataBlock */
Packit 423ecb
    NULL, /* externalSubset */
Packit 423ecb
    1,    /* initialized */
Packit 423ecb
    NULL, /* private */
Packit 423ecb
    NULL, /* startElementNsSAX2Func */
Packit 423ecb
    NULL, /* endElementNsSAX2Func */
Packit 423ecb
    NULL  /* xmlStructuredErrorFunc */
Packit 423ecb
};
Packit 423ecb
Packit 423ecb
static xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
Packit 423ecb
extern xmlSAXHandlerPtr debugSAXHandler;
Packit 423ecb
Packit 423ecb
/************************************************************************
Packit 423ecb
 *									*
Packit 423ecb
 *				Debug Handlers				*
Packit 423ecb
 *									*
Packit 423ecb
 ************************************************************************/
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * isStandaloneDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 *
Packit 423ecb
 * Is this document tagged standalone ?
Packit 423ecb
 *
Packit 423ecb
 * Returns 1 if true
Packit 423ecb
 */
Packit 423ecb
static int
Packit 423ecb
isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.isStandalone()\n");
Packit 423ecb
    return(0);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * hasInternalSubsetDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 *
Packit 423ecb
 * Does this document has an internal subset
Packit 423ecb
 *
Packit 423ecb
 * Returns 1 if true
Packit 423ecb
 */
Packit 423ecb
static int
Packit 423ecb
hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.hasInternalSubset()\n");
Packit 423ecb
    return(0);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * hasExternalSubsetDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 *
Packit 423ecb
 * Does this document has an external subset
Packit 423ecb
 *
Packit 423ecb
 * Returns 1 if true
Packit 423ecb
 */
Packit 423ecb
static int
Packit 423ecb
hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.hasExternalSubset()\n");
Packit 423ecb
    return(0);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * hasInternalSubsetDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 *
Packit 423ecb
 * Does this document has an internal subset
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Packit 423ecb
	       const xmlChar *ExternalID, const xmlChar *SystemID)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.internalSubset(%s,", name);
Packit 423ecb
    if (ExternalID == NULL)
Packit 423ecb
	fprintf(stdout, " ,");
Packit 423ecb
    else
Packit 423ecb
	fprintf(stdout, " %s,", ExternalID);
Packit 423ecb
    if (SystemID == NULL)
Packit 423ecb
	fprintf(stdout, " )\n");
Packit 423ecb
    else
Packit 423ecb
	fprintf(stdout, " %s)\n", SystemID);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * resolveEntityDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @publicId: The public ID of the entity
Packit 423ecb
 * @systemId: The system ID of the entity
Packit 423ecb
 *
Packit 423ecb
 * Special entity resolver, better left to the parser, it has
Packit 423ecb
 * more context than the application layer.
Packit 423ecb
 * The default behaviour is to NOT resolve the entities, in that case
Packit 423ecb
 * the ENTITY_REF nodes are built in the structure (and the parameter
Packit 423ecb
 * values).
Packit 423ecb
 *
Packit 423ecb
 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
Packit 423ecb
 */
Packit 423ecb
static xmlParserInputPtr
Packit 423ecb
resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Packit 423ecb
{
Packit 423ecb
    /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
Packit 423ecb
Packit 423ecb
Packit 423ecb
    fprintf(stdout, "SAX.resolveEntity(");
Packit 423ecb
    if (publicId != NULL)
Packit 423ecb
	fprintf(stdout, "%s", (char *)publicId);
Packit 423ecb
    else
Packit 423ecb
	fprintf(stdout, " ");
Packit 423ecb
    if (systemId != NULL)
Packit 423ecb
	fprintf(stdout, ", %s)\n", (char *)systemId);
Packit 423ecb
    else
Packit 423ecb
	fprintf(stdout, ", )\n");
Packit 423ecb
/*********
Packit 423ecb
    if (systemId != NULL) {
Packit 423ecb
        return(xmlNewInputFromFile(ctxt, (char *) systemId));
Packit 423ecb
    }
Packit 423ecb
 *********/
Packit 423ecb
    return(NULL);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * getEntityDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name: The entity name
Packit 423ecb
 *
Packit 423ecb
 * Get an entity by name
Packit 423ecb
 *
Packit 423ecb
 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
Packit 423ecb
 */
Packit 423ecb
static xmlEntityPtr
Packit 423ecb
getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.getEntity(%s)\n", name);
Packit 423ecb
    return(NULL);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * getParameterEntityDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name: The entity name
Packit 423ecb
 *
Packit 423ecb
 * Get a parameter entity by name
Packit 423ecb
 *
Packit 423ecb
 * Returns the xmlParserInputPtr
Packit 423ecb
 */
Packit 423ecb
static xmlEntityPtr
Packit 423ecb
getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
Packit 423ecb
    return(NULL);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * entityDeclDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name:  the entity name
Packit 423ecb
 * @type:  the entity type
Packit 423ecb
 * @publicId: The public ID of the entity
Packit 423ecb
 * @systemId: The system ID of the entity
Packit 423ecb
 * @content: the entity value (without processing).
Packit 423ecb
 *
Packit 423ecb
 * An entity definition has been parsed
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Packit 423ecb
          const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
Packit 423ecb
            name, type, publicId, systemId, content);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * attributeDeclDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name:  the attribute name
Packit 423ecb
 * @type:  the attribute type
Packit 423ecb
 *
Packit 423ecb
 * An attribute definition has been parsed
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Packit 423ecb
              int type, int def, const xmlChar *defaultValue,
Packit 423ecb
	      xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
Packit 423ecb
            elem, name, type, def, defaultValue);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * elementDeclDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name:  the element name
Packit 423ecb
 * @type:  the element type
Packit 423ecb
 * @content: the element value (without processing).
Packit 423ecb
 *
Packit 423ecb
 * An element definition has been parsed
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Packit 423ecb
	    xmlElementContentPtr content ATTRIBUTE_UNUSED)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
Packit 423ecb
            name, type);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * notationDeclDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name: The name of the notation
Packit 423ecb
 * @publicId: The public ID of the entity
Packit 423ecb
 * @systemId: The system ID of the entity
Packit 423ecb
 *
Packit 423ecb
 * What to do when a notation declaration has been parsed.
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Packit 423ecb
	     const xmlChar *publicId, const xmlChar *systemId)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
Packit 423ecb
            (char *) name, (char *) publicId, (char *) systemId);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * unparsedEntityDeclDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name: The name of the entity
Packit 423ecb
 * @publicId: The public ID of the entity
Packit 423ecb
 * @systemId: The system ID of the entity
Packit 423ecb
 * @notationName: the name of the notation
Packit 423ecb
 *
Packit 423ecb
 * What to do when an unparsed entity declaration is parsed
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Packit 423ecb
		   const xmlChar *publicId, const xmlChar *systemId,
Packit 423ecb
		   const xmlChar *notationName)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
Packit 423ecb
            (char *) name, (char *) publicId, (char *) systemId,
Packit 423ecb
	    (char *) notationName);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * setDocumentLocatorDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @loc: A SAX Locator
Packit 423ecb
 *
Packit 423ecb
 * Receive the document locator at startup, actually xmlDefaultSAXLocator
Packit 423ecb
 * Everything is available on the context, so this is useless in our case.
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.setDocumentLocator()\n");
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * startDocumentDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 *
Packit 423ecb
 * called when the document start being processed.
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.startDocument()\n");
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * endDocumentDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 *
Packit 423ecb
 * called when the document end has been detected.
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.endDocument()\n");
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * startElementDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name:  The element name
Packit 423ecb
 *
Packit 423ecb
 * called when an opening tag has been processed.
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Packit 423ecb
{
Packit 423ecb
    int i;
Packit 423ecb
Packit 423ecb
    fprintf(stdout, "SAX.startElement(%s", (char *) name);
Packit 423ecb
    if (atts != NULL) {
Packit 423ecb
        for (i = 0;(atts[i] != NULL);i++) {
Packit 423ecb
	    fprintf(stdout, ", %s", atts[i++]);
Packit 423ecb
	    if (atts[i] != NULL) {
Packit 423ecb
		unsigned char output[40];
Packit 423ecb
		const unsigned char *att = atts[i];
Packit 423ecb
		int outlen, attlen;
Packit 423ecb
	        fprintf(stdout, "='");
Packit 423ecb
		while ((attlen = strlen((char*)att)) > 0) {
Packit 423ecb
		    outlen = sizeof output - 1;
Packit 423ecb
		    htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
Packit 423ecb
		    output[outlen] = 0;
Packit 423ecb
		    fprintf(stdout, "%s", (char *) output);
Packit 423ecb
		    att += attlen;
Packit 423ecb
		}
Packit 423ecb
		fprintf(stdout, "'");
Packit 423ecb
	    }
Packit 423ecb
	}
Packit 423ecb
    }
Packit 423ecb
    fprintf(stdout, ")\n");
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * endElementDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name:  The element name
Packit 423ecb
 *
Packit 423ecb
 * called when the end of an element has been detected.
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * charactersDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @ch:  a xmlChar string
Packit 423ecb
 * @len: the number of xmlChar
Packit 423ecb
 *
Packit 423ecb
 * receiving some chars from the parser.
Packit 423ecb
 * Question: how much at a time ???
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Packit 423ecb
{
Packit 423ecb
    unsigned char output[40];
Packit 423ecb
    int inlen = len, outlen = 30;
Packit 423ecb
Packit 423ecb
    htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Packit 423ecb
    output[outlen] = 0;
Packit 423ecb
Packit 423ecb
    fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * cdataDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @ch:  a xmlChar string
Packit 423ecb
 * @len: the number of xmlChar
Packit 423ecb
 *
Packit 423ecb
 * receiving some cdata chars from the parser.
Packit 423ecb
 * Question: how much at a time ???
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Packit 423ecb
{
Packit 423ecb
    unsigned char output[40];
Packit 423ecb
    int inlen = len, outlen = 30;
Packit 423ecb
Packit 423ecb
    htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Packit 423ecb
    output[outlen] = 0;
Packit 423ecb
Packit 423ecb
    fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * referenceDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @name:  The entity name
Packit 423ecb
 *
Packit 423ecb
 * called when an entity reference is detected.
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.reference(%s)\n", name);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * ignorableWhitespaceDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @ch:  a xmlChar string
Packit 423ecb
 * @start: the first char in the string
Packit 423ecb
 * @len: the number of xmlChar
Packit 423ecb
 *
Packit 423ecb
 * receiving some ignorable whitespaces from the parser.
Packit 423ecb
 * Question: how much at a time ???
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Packit 423ecb
{
Packit 423ecb
    char output[40];
Packit 423ecb
    int i;
Packit 423ecb
Packit 423ecb
    for (i = 0;(i
Packit 423ecb
	output[i] = ch[i];
Packit 423ecb
    output[i] = 0;
Packit 423ecb
Packit 423ecb
    fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * processingInstructionDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @target:  the target name
Packit 423ecb
 * @data: the PI data's
Packit 423ecb
 * @len: the number of xmlChar
Packit 423ecb
 *
Packit 423ecb
 * A processing instruction has been parsed.
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Packit 423ecb
                      const xmlChar *data)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
Packit 423ecb
            (char *) target, (char *) data);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * commentDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @value:  the comment content
Packit 423ecb
 *
Packit 423ecb
 * A comment has been parsed.
Packit 423ecb
 */
Packit 423ecb
static void
Packit 423ecb
commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Packit 423ecb
{
Packit 423ecb
    fprintf(stdout, "SAX.comment(%s)\n", value);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * warningDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @msg:  the message to display/transmit
Packit 423ecb
 * @...:  extra parameters for the message display
Packit 423ecb
 *
Packit 423ecb
 * Display and format a warning messages, gives file, line, position and
Packit 423ecb
 * extra parameters.
Packit 423ecb
 */
Packit 423ecb
static void XMLCDECL
Packit 423ecb
warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Packit 423ecb
{
Packit 423ecb
    va_list args;
Packit 423ecb
Packit 423ecb
    va_start(args, msg);
Packit 423ecb
    fprintf(stdout, "SAX.warning: ");
Packit 423ecb
    vfprintf(stdout, msg, args);
Packit 423ecb
    va_end(args);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * errorDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @msg:  the message to display/transmit
Packit 423ecb
 * @...:  extra parameters for the message display
Packit 423ecb
 *
Packit 423ecb
 * Display and format a error messages, gives file, line, position and
Packit 423ecb
 * extra parameters.
Packit 423ecb
 */
Packit 423ecb
static void XMLCDECL
Packit 423ecb
errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Packit 423ecb
{
Packit 423ecb
    va_list args;
Packit 423ecb
Packit 423ecb
    va_start(args, msg);
Packit 423ecb
    fprintf(stdout, "SAX.error: ");
Packit 423ecb
    vfprintf(stdout, msg, args);
Packit 423ecb
    va_end(args);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
/**
Packit 423ecb
 * fatalErrorDebug:
Packit 423ecb
 * @ctxt:  An XML parser context
Packit 423ecb
 * @msg:  the message to display/transmit
Packit 423ecb
 * @...:  extra parameters for the message display
Packit 423ecb
 *
Packit 423ecb
 * Display and format a fatalError messages, gives file, line, position and
Packit 423ecb
 * extra parameters.
Packit 423ecb
 */
Packit 423ecb
static void XMLCDECL
Packit 423ecb
fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Packit 423ecb
{
Packit 423ecb
    va_list args;
Packit 423ecb
Packit 423ecb
    va_start(args, msg);
Packit 423ecb
    fprintf(stdout, "SAX.fatalError: ");
Packit 423ecb
    vfprintf(stdout, msg, args);
Packit 423ecb
    va_end(args);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
static xmlSAXHandler debugSAXHandlerStruct = {
Packit 423ecb
    internalSubsetDebug,
Packit 423ecb
    isStandaloneDebug,
Packit 423ecb
    hasInternalSubsetDebug,
Packit 423ecb
    hasExternalSubsetDebug,
Packit 423ecb
    resolveEntityDebug,
Packit 423ecb
    getEntityDebug,
Packit 423ecb
    entityDeclDebug,
Packit 423ecb
    notationDeclDebug,
Packit 423ecb
    attributeDeclDebug,
Packit 423ecb
    elementDeclDebug,
Packit 423ecb
    unparsedEntityDeclDebug,
Packit 423ecb
    setDocumentLocatorDebug,
Packit 423ecb
    startDocumentDebug,
Packit 423ecb
    endDocumentDebug,
Packit 423ecb
    startElementDebug,
Packit 423ecb
    endElementDebug,
Packit 423ecb
    referenceDebug,
Packit 423ecb
    charactersDebug,
Packit 423ecb
    ignorableWhitespaceDebug,
Packit 423ecb
    processingInstructionDebug,
Packit 423ecb
    commentDebug,
Packit 423ecb
    warningDebug,
Packit 423ecb
    errorDebug,
Packit 423ecb
    fatalErrorDebug,
Packit 423ecb
    getParameterEntityDebug,
Packit 423ecb
    cdataDebug,
Packit 423ecb
    NULL,
Packit 423ecb
    1,
Packit 423ecb
    NULL,
Packit 423ecb
    NULL,
Packit 423ecb
    NULL,
Packit 423ecb
    NULL
Packit 423ecb
};
Packit 423ecb
Packit 423ecb
xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Packit 423ecb
/************************************************************************
Packit 423ecb
 *									*
Packit 423ecb
 *				Debug					*
Packit 423ecb
 *									*
Packit 423ecb
 ************************************************************************/
Packit 423ecb
Packit 423ecb
static void
Packit 423ecb
parseSAXFile(char *filename) {
Packit 423ecb
    htmlDocPtr doc = NULL;
Packit 423ecb
Packit 423ecb
    /*
Packit 423ecb
     * Empty callbacks for checking
Packit 423ecb
     */
Packit 423ecb
#ifdef LIBXML_PUSH_ENABLED
Packit 423ecb
    if (push) {
Packit 423ecb
	FILE *f;
Packit 423ecb
Packit 423ecb
#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
Packit 423ecb
	f = fopen(filename, "rb");
Packit 423ecb
#else
Packit 423ecb
	f = fopen(filename, "r");
Packit 423ecb
#endif
Packit 423ecb
	if (f != NULL) {
Packit 423ecb
	    int res, size = 3;
Packit 423ecb
	    char chars[4096];
Packit 423ecb
	    htmlParserCtxtPtr ctxt;
Packit 423ecb
Packit 423ecb
	    /* if (repeat) */
Packit 423ecb
		size = 4096;
Packit 423ecb
	    res = fread(chars, 1, 4, f);
Packit 423ecb
	    if (res > 0) {
Packit 423ecb
		ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
Packit 423ecb
			    chars, res, filename, XML_CHAR_ENCODING_NONE);
Packit 423ecb
		while ((res = fread(chars, 1, size, f)) > 0) {
Packit 423ecb
		    htmlParseChunk(ctxt, chars, res, 0);
Packit 423ecb
		}
Packit 423ecb
		htmlParseChunk(ctxt, chars, 0, 1);
Packit 423ecb
		doc = ctxt->myDoc;
Packit 423ecb
		htmlFreeParserCtxt(ctxt);
Packit 423ecb
	    }
Packit 423ecb
	    if (doc != NULL) {
Packit 423ecb
		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
Packit 423ecb
		xmlFreeDoc(doc);
Packit 423ecb
	    }
Packit 423ecb
	    fclose(f);
Packit 423ecb
	}
Packit 423ecb
	if (!noout) {
Packit 423ecb
#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
Packit 423ecb
		f = fopen(filename, "rb");
Packit 423ecb
#else
Packit 423ecb
		f = fopen(filename, "r");
Packit 423ecb
#endif
Packit 423ecb
	    if (f != NULL) {
Packit 423ecb
		int res, size = 3;
Packit 423ecb
		char chars[4096];
Packit 423ecb
		htmlParserCtxtPtr ctxt;
Packit 423ecb
Packit 423ecb
		/* if (repeat) */
Packit 423ecb
		    size = 4096;
Packit 423ecb
		res = fread(chars, 1, 4, f);
Packit 423ecb
		if (res > 0) {
Packit 423ecb
		    ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
Packit 423ecb
				chars, res, filename, XML_CHAR_ENCODING_NONE);
Packit 423ecb
		    while ((res = fread(chars, 1, size, f)) > 0) {
Packit 423ecb
			htmlParseChunk(ctxt, chars, res, 0);
Packit 423ecb
		    }
Packit 423ecb
		    htmlParseChunk(ctxt, chars, 0, 1);
Packit 423ecb
		    doc = ctxt->myDoc;
Packit 423ecb
		    htmlFreeParserCtxt(ctxt);
Packit 423ecb
		}
Packit 423ecb
		if (doc != NULL) {
Packit 423ecb
		    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
Packit 423ecb
		    xmlFreeDoc(doc);
Packit 423ecb
		}
Packit 423ecb
		fclose(f);
Packit 423ecb
	    }
Packit 423ecb
	}
Packit 423ecb
    } else {
Packit 423ecb
#endif /* LIBXML_PUSH_ENABLED */
Packit 423ecb
	doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Packit 423ecb
	if (doc != NULL) {
Packit 423ecb
	    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
Packit 423ecb
	    xmlFreeDoc(doc);
Packit 423ecb
	}
Packit 423ecb
Packit 423ecb
	if (!noout) {
Packit 423ecb
	    /*
Packit 423ecb
	     * Debug callback
Packit 423ecb
	     */
Packit 423ecb
	    doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
Packit 423ecb
	    if (doc != NULL) {
Packit 423ecb
		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
Packit 423ecb
		xmlFreeDoc(doc);
Packit 423ecb
	    }
Packit 423ecb
	}
Packit 423ecb
#ifdef LIBXML_PUSH_ENABLED
Packit 423ecb
    }
Packit 423ecb
#endif /* LIBXML_PUSH_ENABLED */
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
static void
Packit 423ecb
parseAndPrintFile(char *filename) {
Packit 423ecb
    htmlDocPtr doc = NULL;
Packit 423ecb
Packit 423ecb
    /*
Packit 423ecb
     * build an HTML tree from a string;
Packit 423ecb
     */
Packit 423ecb
#ifdef LIBXML_PUSH_ENABLED
Packit 423ecb
    if (push) {
Packit 423ecb
	FILE *f;
Packit 423ecb
Packit 423ecb
#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
Packit 423ecb
	f = fopen(filename, "rb");
Packit 423ecb
#else
Packit 423ecb
	f = fopen(filename, "r");
Packit 423ecb
#endif
Packit 423ecb
	if (f != NULL) {
Packit 423ecb
	    int res, size = 3;
Packit 423ecb
	    char chars[4096];
Packit 423ecb
	    htmlParserCtxtPtr ctxt;
Packit 423ecb
Packit 423ecb
	    /* if (repeat) */
Packit 423ecb
		size = 4096;
Packit 423ecb
	    res = fread(chars, 1, 4, f);
Packit 423ecb
	    if (res > 0) {
Packit 423ecb
		ctxt = htmlCreatePushParserCtxt(NULL, NULL,
Packit 423ecb
			    chars, res, filename, XML_CHAR_ENCODING_NONE);
Packit 423ecb
		while ((res = fread(chars, 1, size, f)) > 0) {
Packit 423ecb
		    htmlParseChunk(ctxt, chars, res, 0);
Packit 423ecb
		}
Packit 423ecb
		htmlParseChunk(ctxt, chars, 0, 1);
Packit 423ecb
		doc = ctxt->myDoc;
Packit 423ecb
		htmlFreeParserCtxt(ctxt);
Packit 423ecb
	    }
Packit 423ecb
	    fclose(f);
Packit 423ecb
	}
Packit 423ecb
    } else {
Packit 423ecb
	doc = htmlReadFile(filename, NULL, options);
Packit 423ecb
    }
Packit 423ecb
#else
Packit 423ecb
	doc = htmlReadFile(filename,NULL,options);
Packit 423ecb
#endif
Packit 423ecb
    if (doc == NULL) {
Packit 423ecb
        xmlGenericError(xmlGenericErrorContext,
Packit 423ecb
		"Could not parse %s\n", filename);
Packit 423ecb
    }
Packit 423ecb
Packit 423ecb
#ifdef LIBXML_TREE_ENABLED
Packit 423ecb
    /*
Packit 423ecb
     * test intermediate copy if needed.
Packit 423ecb
     */
Packit 423ecb
    if (copy) {
Packit 423ecb
        htmlDocPtr tmp;
Packit 423ecb
Packit 423ecb
        tmp = doc;
Packit 423ecb
	doc = xmlCopyDoc(doc, 1);
Packit 423ecb
	xmlFreeDoc(tmp);
Packit 423ecb
    }
Packit 423ecb
#endif
Packit 423ecb
Packit 423ecb
#ifdef LIBXML_OUTPUT_ENABLED
Packit 423ecb
    /*
Packit 423ecb
     * print it.
Packit 423ecb
     */
Packit 423ecb
    if (!noout) {
Packit 423ecb
#ifdef LIBXML_DEBUG_ENABLED
Packit 423ecb
	if (!debug) {
Packit 423ecb
	    if (encoding)
Packit 423ecb
		htmlSaveFileEnc("-", doc, encoding);
Packit 423ecb
	    else
Packit 423ecb
		htmlDocDump(stdout, doc);
Packit 423ecb
	} else
Packit 423ecb
	    xmlDebugDumpDocument(stdout, doc);
Packit 423ecb
#else
Packit 423ecb
	if (encoding)
Packit 423ecb
	    htmlSaveFileEnc("-", doc, encoding);
Packit 423ecb
	else
Packit 423ecb
	    htmlDocDump(stdout, doc);
Packit 423ecb
#endif
Packit 423ecb
    }
Packit 423ecb
#endif /* LIBXML_OUTPUT_ENABLED */
Packit 423ecb
Packit 423ecb
    /*
Packit 423ecb
     * free it.
Packit 423ecb
     */
Packit 423ecb
    xmlFreeDoc(doc);
Packit 423ecb
}
Packit 423ecb
Packit 423ecb
int main(int argc, char **argv) {
Packit 423ecb
    int i, count;
Packit 423ecb
    int files = 0;
Packit 423ecb
Packit 423ecb
    for (i = 1; i < argc ; i++) {
Packit 423ecb
#ifdef LIBXML_DEBUG_ENABLED
Packit 423ecb
	if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
Packit 423ecb
	    debug++;
Packit 423ecb
	else
Packit 423ecb
#endif
Packit 423ecb
	    if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Packit 423ecb
	    copy++;
Packit 423ecb
#ifdef LIBXML_PUSH_ENABLED
Packit 423ecb
	else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
Packit 423ecb
	    push++;
Packit 423ecb
#endif /* LIBXML_PUSH_ENABLED */
Packit 423ecb
	else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
Packit 423ecb
	    sax++;
Packit 423ecb
	else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
Packit 423ecb
	    noout++;
Packit 423ecb
	else if ((!strcmp(argv[i], "-repeat")) ||
Packit 423ecb
	         (!strcmp(argv[i], "--repeat")))
Packit 423ecb
	    repeat++;
Packit 423ecb
	else if ((!strcmp(argv[i], "-encode")) ||
Packit 423ecb
	         (!strcmp(argv[i], "--encode"))) {
Packit 423ecb
	    i++;
Packit 423ecb
	    encoding = argv[i];
Packit 423ecb
        }
Packit 423ecb
    }
Packit 423ecb
    for (i = 1; i < argc ; i++) {
Packit 423ecb
	if ((!strcmp(argv[i], "-encode")) ||
Packit 423ecb
	         (!strcmp(argv[i], "--encode"))) {
Packit 423ecb
	    i++;
Packit 423ecb
	    continue;
Packit 423ecb
        }
Packit 423ecb
	if (argv[i][0] != '-') {
Packit 423ecb
	    if (repeat) {
Packit 423ecb
		for (count = 0;count < 100 * repeat;count++) {
Packit 423ecb
		    if (sax)
Packit 423ecb
			parseSAXFile(argv[i]);
Packit 423ecb
		    else
Packit 423ecb
			parseAndPrintFile(argv[i]);
Packit 423ecb
		}
Packit 423ecb
	    } else {
Packit 423ecb
		if (sax)
Packit 423ecb
		    parseSAXFile(argv[i]);
Packit 423ecb
		else
Packit 423ecb
		    parseAndPrintFile(argv[i]);
Packit 423ecb
	    }
Packit 423ecb
	    files ++;
Packit 423ecb
	}
Packit 423ecb
    }
Packit 423ecb
    if (files == 0) {
Packit 423ecb
	printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Packit 423ecb
	       argv[0]);
Packit 423ecb
	printf("\tParse the HTML files and output the result of the parsing\n");
Packit 423ecb
#ifdef LIBXML_DEBUG_ENABLED
Packit 423ecb
	printf("\t--debug : dump a debug tree of the in-memory document\n");
Packit 423ecb
#endif
Packit 423ecb
	printf("\t--copy : used to test the internal copy implementation\n");
Packit 423ecb
	printf("\t--sax : debug the sequence of SAX callbacks\n");
Packit 423ecb
	printf("\t--repeat : parse the file 100 times, for timing\n");
Packit 423ecb
	printf("\t--noout : do not print the result\n");
Packit 423ecb
#ifdef LIBXML_PUSH_ENABLED
Packit 423ecb
	printf("\t--push : use the push mode parser\n");
Packit 423ecb
#endif /* LIBXML_PUSH_ENABLED */
Packit 423ecb
	printf("\t--encode encoding : output in the given encoding\n");
Packit 423ecb
    }
Packit 423ecb
    xmlCleanupParser();
Packit 423ecb
    xmlMemoryDump();
Packit 423ecb
Packit 423ecb
    return(0);
Packit 423ecb
}
Packit 423ecb
#else /* !LIBXML_HTML_ENABLED */
Packit 423ecb
#include <stdio.h>
Packit 423ecb
int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
Packit 423ecb
    printf("%s : HTML support not compiled in\n", argv[0]);
Packit 423ecb
    return(0);
Packit 423ecb
}
Packit 423ecb
#endif