Blame src/HTML.h

Packit f574b8
/*
Packit f574b8
 * $LynxId: HTML.h,v 1.33 2011/05/19 09:57:53 tom Exp $
Packit f574b8
 *
Packit f574b8
 *					HTML to rich text converter for libwww
Packit f574b8
 *
Packit f574b8
 *			THE HTML TO RTF OBJECT CONVERTER
Packit f574b8
 *
Packit f574b8
 *  This interprets the HTML semantics.
Packit f574b8
 */
Packit f574b8
#ifndef HTML_H
Packit f574b8
#define HTML_H
Packit f574b8
Packit f574b8
#ifndef HTUTILS_H
Packit f574b8
#include <HTUtils.h>
Packit f574b8
#endif /* HTUTILS_H */
Packit f574b8
Packit f574b8
#include <UCDefs.h>
Packit f574b8
#include <UCAux.h>
Packit f574b8
#include <HTAnchor.h>
Packit f574b8
#include <HTMLDTD.h>
Packit f574b8
Packit f574b8
#ifdef __cplusplus
Packit f574b8
extern "C" {
Packit f574b8
#endif
Packit f574b8
/* #define ATTR_CS_IN (me->T.output_utf8 ? me->UCLYhndl : 0) */
Packit f574b8
#define ATTR_CS_IN me->tag_charset
Packit f574b8
#define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \
Packit f574b8
	LYUCTranslateHTMLString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML)
Packit f574b8
#define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \
Packit f574b8
	LYUCTranslateHTMLString(s, cs_from, cs_to, YES, p, h, st_HTML)
Packit f574b8
#define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \
Packit f574b8
	LYUCTranslateHTMLString(s, cs_from, cs_to, spcls, p, h, st_HTML)
Packit f574b8
#define TRANSLATE_HTML(s,p,h) \
Packit f574b8
	LYUCFullyTranslateString(s, me->UCLYhndl, current_char_set, NO, YES, p, h, NO, st_HTML)
Packit f574b8
#define TRANSLATE_HTML5(s,cs_from,cs_to,p,h) \
Packit f574b8
	LYUCFullyTranslateString(s, cs_from, cs_to, NO, YES, p, h, NO, st_HTML)
Packit f574b8
#define TRANSLATE_HTML7(s,cs_from,cs_to,spcls,p,h,Back) \
Packit f574b8
	LYUCFullyTranslateString(s, cs_from, cs_to, NO, spcls, p, h, Back, st_HTML)
Packit f574b8
/*
Packit f574b8
 * Strings from attributes which should be converted to some kind of "standard"
Packit f574b8
 * representation (character encoding), was Latin-1, esp.  URLs (incl. 
Packit f574b8
 * #fragments) and HTML NAME and ID stuff.
Packit f574b8
 */
Packit f574b8
#define TRANSLATE_AND_UNESCAPE_TO_STD(s) \
Packit f574b8
	LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_URL)
Packit f574b8
#define UNESCAPE_FIELDNAME_TO_STD(s) \
Packit f574b8
	LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_HTML)
Packit f574b8
    extern const HTStructuredClass HTMLPresentation;
Packit f574b8
Packit f574b8
#ifdef Lynx_HTML_Handler
Packit f574b8
/*
Packit f574b8
 *	This section is semi-private to HTML.c and it's helper modules. - FM
Packit f574b8
 *	--------------------------------------------------------------------
Packit f574b8
 */
Packit f574b8
Packit f574b8
    typedef struct _stack_element {
Packit f574b8
	HTStyle *style;
Packit f574b8
	int tag_number;
Packit f574b8
    } stack_element;
Packit f574b8
Packit f574b8
/*		HTML Object
Packit f574b8
 *		-----------
Packit f574b8
 */
Packit f574b8
#define MAX_NESTING 800		/* Should be checked by parser */
Packit f574b8
Packit f574b8
    struct _HTStructured {
Packit f574b8
	const HTStructuredClass *isa;
Packit f574b8
	HTParentAnchor *node_anchor;
Packit f574b8
	HText *text;
Packit f574b8
Packit f574b8
	HTStream *target;	/* Output stream */
Packit f574b8
	HTStreamClass targetClass;	/* Output routines */
Packit f574b8
Packit f574b8
	HTChildAnchor *CurrentA;	/* current HTML_A anchor */
Packit f574b8
	int CurrentANum;	/* current HTML_A number */
Packit f574b8
	char *base_href;	/* current HTML_BASE href */
Packit f574b8
	char *map_address;	/* current HTML_MAP address */
Packit f574b8
Packit f574b8
	HTChunk title;		/* Grow by 128 */
Packit f574b8
	HTChunk object;		/* Grow by 128 */
Packit f574b8
	BOOL object_started;
Packit f574b8
	BOOL object_declare;
Packit f574b8
	BOOL object_shapes;
Packit f574b8
	BOOL object_ismap;
Packit f574b8
	char *object_usemap;
Packit f574b8
	char *object_id;
Packit f574b8
	char *object_title;
Packit f574b8
	char *object_data;
Packit f574b8
	char *object_type;
Packit f574b8
	char *object_classid;
Packit f574b8
	char *object_codebase;
Packit f574b8
	char *object_codetype;
Packit f574b8
	char *object_name;
Packit f574b8
	int objects_mixed_open, objects_figged_open;
Packit f574b8
	HTChunk option;		/* Grow by 128 */
Packit f574b8
	BOOL first_option;	/* First OPTION in SELECT? */
Packit f574b8
	char *LastOptionValue;
Packit f574b8
	BOOL LastOptionChecked;
Packit f574b8
	BOOL select_disabled;
Packit f574b8
	HTChunk textarea;	/* Grow by 128 */
Packit f574b8
	char *textarea_name;
Packit f574b8
	int textarea_name_cs;
Packit f574b8
	char *textarea_accept_cs;
Packit f574b8
	int textarea_cols;
Packit f574b8
	int textarea_rows;
Packit f574b8
	int textarea_disabled;
Packit f574b8
	int textarea_readonly;
Packit f574b8
	char *textarea_id;
Packit f574b8
	HTChunk math;		/* Grow by 128 */
Packit f574b8
	HTChunk style_block;	/* Grow by 128 */
Packit f574b8
	HTChunk script;		/* Grow by 128 */
Packit f574b8
Packit f574b8
	/*
Packit f574b8
	 *  Used for nested lists. - FM
Packit f574b8
	 */
Packit f574b8
	int List_Nesting_Level;	/* counter for list nesting level */
Packit f574b8
	int OL_Counter[12];	/* counter for ordered lists */
Packit f574b8
	char OL_Type[12];	/* types for ordered lists */
Packit f574b8
	int Last_OL_Count;	/* last count in ordered lists */
Packit f574b8
	char Last_OL_Type;	/* last type in ordered lists */
Packit f574b8
Packit f574b8
	int Division_Level;
Packit f574b8
	short DivisionAlignments[MAX_NESTING];
Packit f574b8
	int Underline_Level;
Packit f574b8
	int Quote_Level;
Packit f574b8
Packit f574b8
	BOOL UsePlainSpace;
Packit f574b8
	BOOL HiddenValue;
Packit f574b8
	int lastraw;
Packit f574b8
Packit f574b8
	const char *comment_start;	/* for literate programming */
Packit f574b8
	const char *comment_end;
Packit f574b8
Packit f574b8
	HTTag *current_tag;
Packit f574b8
	BOOL style_change;
Packit f574b8
	HTStyle *new_style;
Packit f574b8
	HTStyle *old_style;
Packit f574b8
	int current_default_alignment;
Packit f574b8
	BOOL in_word;		/* Have just had a non-white char */
Packit f574b8
	stack_element stack[MAX_NESTING];
Packit f574b8
	stack_element *sp;	/* Style stack pointer */
Packit f574b8
	BOOL stack_overrun;	/* Was MAX_NESTING exceeded? */
Packit f574b8
	int skip_stack;		/* flag to skip next style stack operation */
Packit f574b8
Packit f574b8
	/*
Packit f574b8
	 *  Track if we are in an anchor, paragraph, address, base, etc.
Packit f574b8
	 */
Packit f574b8
	BOOL inA;
Packit f574b8
	BOOL inAPPLET;
Packit f574b8
	BOOL inAPPLETwithP;
Packit f574b8
	BOOL inBadBASE;
Packit f574b8
	BOOL inBadHREF;
Packit f574b8
	BOOL inBadHTML;
Packit f574b8
	BOOL inBASE;
Packit f574b8
	BOOL inBoldA;
Packit f574b8
	BOOL inBoldH;
Packit f574b8
	BOOL inCAPTION;
Packit f574b8
	BOOL inCREDIT;
Packit f574b8
	BOOL inFIG;
Packit f574b8
	BOOL inFIGwithP;
Packit f574b8
	BOOL inFONT;
Packit f574b8
	BOOL inFORM;
Packit f574b8
	BOOL inLABEL;
Packit f574b8
	BOOL inP;
Packit f574b8
	BOOL inPRE;
Packit f574b8
	BOOL inSELECT;
Packit f574b8
	BOOL inTABLE;
Packit f574b8
	BOOL inTEXTAREA;
Packit f574b8
	BOOL inUnderline;
Packit f574b8
Packit f574b8
	BOOL needBoldH;
Packit f574b8
Packit f574b8
	char *xinclude;		/* if no include strin address passed */
Packit f574b8
	/*
Packit f574b8
	 * UCI and UCLYhndl give the UCInfo and charset registered for the HTML
Packit f574b8
	 * parser in the node_anchor's UCStages structure.  It indicates what is
Packit f574b8
	 * fed to the HTML parser as the stream of character data (not necessarily
Packit f574b8
	 * tags and attributes).  It should currently always be set to be the same
Packit f574b8
	 * as UCI and UCLhndl for the HTEXT stage in the node_anchor's UCStages
Packit f574b8
	 * structure, since the HTML parser sends its input character data to the
Packit f574b8
	 * output without further charset translation.
Packit f574b8
	 */
Packit f574b8
	LYUCcharset *UCI;
Packit f574b8
	int UCLYhndl;
Packit f574b8
	/*
Packit f574b8
	 * inUCI and inUCLYhndl indicate the UCInfo and charset which the HTML
Packit f574b8
	 * parser treats at the input charset.  It is normally set to the UCI and
Packit f574b8
	 * UCLhndl for the SGML parser in the node_anchor's UCStages structure
Packit f574b8
	 * (which may be a dummy, based on the MIME parser's UCI and UCLhndl in
Packit f574b8
	 * that structure, when we are handling a local file or non-http(s)
Packit f574b8
	 * gateway).  It could be changed temporarily by the HTML parser, for
Packit f574b8
	 * conversions of attribute strings, but should be reset once done.  - FM
Packit f574b8
	 */
Packit f574b8
	LYUCcharset *inUCI;
Packit f574b8
	int inUCLYhndl;
Packit f574b8
	/*
Packit f574b8
	 * outUCI and outUCLYhndl indicate the UCInfo and charset which the HTML
Packit f574b8
	 * parser treats as the output charset.  It is normally set to its own UCI
Packit f574b8
	 * and UCLhndl.  It could be changed for conversions of attribute strings,
Packit f574b8
	 * but should be reset once done.  - FM
Packit f574b8
	 */
Packit f574b8
	LYUCcharset *outUCI;
Packit f574b8
	int outUCLYhndl;
Packit f574b8
	/*
Packit f574b8
	 * T holds the transformation rules for conversions of strings between the
Packit f574b8
	 * input and output charsets by the HTML parser.  - FM
Packit f574b8
	 */
Packit f574b8
	UCTransParams T;
Packit f574b8
Packit f574b8
	int tag_charset;	/* charset for attribute values etc. */
Packit f574b8
    };
Packit f574b8
Packit f574b8
    extern HTStyle *LYstyles(int style_number);
Packit f574b8
    extern BOOL LYBadHTML(HTStructured * me);
Packit f574b8
    extern void LYShowBadHTML(const char *s);
Packit f574b8
Packit f574b8
/*
Packit f574b8
 *	Semi-Private functions. - FM
Packit f574b8
 */
Packit f574b8
    extern void HTML_put_character(HTStructured * me, int c);
Packit f574b8
    extern void HTML_put_string(HTStructured * me, const char *s);
Packit f574b8
    extern void HTML_write(HTStructured * me, const char *s, int l);
Packit f574b8
    extern int HTML_put_entity(HTStructured * me, int entity_number);
Packit f574b8
    extern void actually_set_style(HTStructured * me);
Packit f574b8
Packit f574b8
/*	Style buffering avoids dummy paragraph begin/ends.
Packit f574b8
*/
Packit f574b8
#define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
Packit f574b8
#endif				/* Lynx_HTML_Handler */
Packit f574b8
Packit f574b8
    extern void strtolower(char *i);
Packit f574b8
Packit f574b8
/*				P U B L I C
Packit f574b8
*/
Packit f574b8
Packit f574b8
/*
Packit f574b8
 *  HTConverter to present HTML
Packit f574b8
 */
Packit f574b8
    extern HTStream *HTMLToPlain(HTPresentation *pres,
Packit f574b8
				 HTParentAnchor *anchor,
Packit f574b8
				 HTStream *sink);
Packit f574b8
Packit f574b8
    extern HTStream *HTMLParsedPresent(HTPresentation *pres,
Packit f574b8
				       HTParentAnchor *anchor,
Packit f574b8
				       HTStream *sink);
Packit f574b8
Packit f574b8
    extern HTStream *HTMLToC(HTPresentation *pres,
Packit f574b8
			     HTParentAnchor *anchor,
Packit f574b8
			     HTStream *sink);
Packit f574b8
Packit f574b8
    extern HTStream *HTMLPresent(HTPresentation *pres,
Packit f574b8
				 HTParentAnchor *anchor,
Packit f574b8
				 HTStream *sink);
Packit f574b8
Packit f574b8
    extern HTStructured *HTML_new(HTParentAnchor *anchor,
Packit f574b8
				  HTFormat format_out,
Packit f574b8
				  HTStream *target);
Packit f574b8
Packit f574b8
/*
Packit f574b8
 * Record error message as a hypertext object.
Packit f574b8
 *
Packit f574b8
 * The error message should be marked as an error so that it can be reloaded
Packit f574b8
 * later.  This implementation just throws up an error message and leaves the
Packit f574b8
 * document unloaded.
Packit f574b8
 *
Packit f574b8
 * On entry,
Packit f574b8
 *      sink    is a stream to the output device if any
Packit f574b8
 *      number  is the HTTP error number
Packit f574b8
 *      message is the human readable message.
Packit f574b8
 * On exit,
Packit f574b8
 *      a return code like HT_LOADED if object exists else 60; 0
Packit f574b8
 */
Packit f574b8
    extern int HTLoadError(HTStream *sink,
Packit f574b8
			   int number,
Packit f574b8
			   const char *message);
Packit f574b8
Packit f574b8
#ifdef __cplusplus
Packit f574b8
}
Packit f574b8
#endif
Packit f574b8
#endif				/* HTML_H */