|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* $LynxId: HTML.h,v 1.33 2011/05/19 09:57:53 tom Exp $
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* HTML to rich text converter for libwww
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* THE HTML TO RTF OBJECT CONVERTER
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This interprets the HTML semantics.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#ifndef HTML_H
|
|
Packit |
f574b8 |
#define HTML_H
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifndef HTUTILS_H
|
|
Packit |
f574b8 |
#include <HTUtils.h>
|
|
Packit |
f574b8 |
#endif /* HTUTILS_H */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#include <UCDefs.h>
|
|
Packit |
f574b8 |
#include <UCAux.h>
|
|
Packit |
f574b8 |
#include <HTAnchor.h>
|
|
Packit |
f574b8 |
#include <HTMLDTD.h>
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef __cplusplus
|
|
Packit |
f574b8 |
extern "C" {
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
/* #define ATTR_CS_IN (me->T.output_utf8 ? me->UCLYhndl : 0) */
|
|
Packit |
f574b8 |
#define ATTR_CS_IN me->tag_charset
|
|
Packit |
f574b8 |
#define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \
|
|
Packit |
f574b8 |
LYUCTranslateHTMLString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML)
|
|
Packit |
f574b8 |
#define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \
|
|
Packit |
f574b8 |
LYUCTranslateHTMLString(s, cs_from, cs_to, YES, p, h, st_HTML)
|
|
Packit |
f574b8 |
#define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \
|
|
Packit |
f574b8 |
LYUCTranslateHTMLString(s, cs_from, cs_to, spcls, p, h, st_HTML)
|
|
Packit |
f574b8 |
#define TRANSLATE_HTML(s,p,h) \
|
|
Packit |
f574b8 |
LYUCFullyTranslateString(s, me->UCLYhndl, current_char_set, NO, YES, p, h, NO, st_HTML)
|
|
Packit |
f574b8 |
#define TRANSLATE_HTML5(s,cs_from,cs_to,p,h) \
|
|
Packit |
f574b8 |
LYUCFullyTranslateString(s, cs_from, cs_to, NO, YES, p, h, NO, st_HTML)
|
|
Packit |
f574b8 |
#define TRANSLATE_HTML7(s,cs_from,cs_to,spcls,p,h,Back) \
|
|
Packit |
f574b8 |
LYUCFullyTranslateString(s, cs_from, cs_to, NO, spcls, p, h, Back, st_HTML)
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Strings from attributes which should be converted to some kind of "standard"
|
|
Packit |
f574b8 |
* representation (character encoding), was Latin-1, esp. URLs (incl.
|
|
Packit |
f574b8 |
* #fragments) and HTML NAME and ID stuff.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define TRANSLATE_AND_UNESCAPE_TO_STD(s) \
|
|
Packit |
f574b8 |
LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_URL)
|
|
Packit |
f574b8 |
#define UNESCAPE_FIELDNAME_TO_STD(s) \
|
|
Packit |
f574b8 |
LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_HTML)
|
|
Packit |
f574b8 |
extern const HTStructuredClass HTMLPresentation;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef Lynx_HTML_Handler
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* This section is semi-private to HTML.c and it's helper modules. - FM
|
|
Packit |
f574b8 |
* --------------------------------------------------------------------
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
typedef struct _stack_element {
|
|
Packit |
f574b8 |
HTStyle *style;
|
|
Packit |
f574b8 |
int tag_number;
|
|
Packit |
f574b8 |
} stack_element;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* HTML Object
|
|
Packit |
f574b8 |
* -----------
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define MAX_NESTING 800 /* Should be checked by parser */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
struct _HTStructured {
|
|
Packit |
f574b8 |
const HTStructuredClass *isa;
|
|
Packit |
f574b8 |
HTParentAnchor *node_anchor;
|
|
Packit |
f574b8 |
HText *text;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
HTStream *target; /* Output stream */
|
|
Packit |
f574b8 |
HTStreamClass targetClass; /* Output routines */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
HTChildAnchor *CurrentA; /* current HTML_A anchor */
|
|
Packit |
f574b8 |
int CurrentANum; /* current HTML_A number */
|
|
Packit |
f574b8 |
char *base_href; /* current HTML_BASE href */
|
|
Packit |
f574b8 |
char *map_address; /* current HTML_MAP address */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
HTChunk title; /* Grow by 128 */
|
|
Packit |
f574b8 |
HTChunk object; /* Grow by 128 */
|
|
Packit |
f574b8 |
BOOL object_started;
|
|
Packit |
f574b8 |
BOOL object_declare;
|
|
Packit |
f574b8 |
BOOL object_shapes;
|
|
Packit |
f574b8 |
BOOL object_ismap;
|
|
Packit |
f574b8 |
char *object_usemap;
|
|
Packit |
f574b8 |
char *object_id;
|
|
Packit |
f574b8 |
char *object_title;
|
|
Packit |
f574b8 |
char *object_data;
|
|
Packit |
f574b8 |
char *object_type;
|
|
Packit |
f574b8 |
char *object_classid;
|
|
Packit |
f574b8 |
char *object_codebase;
|
|
Packit |
f574b8 |
char *object_codetype;
|
|
Packit |
f574b8 |
char *object_name;
|
|
Packit |
f574b8 |
int objects_mixed_open, objects_figged_open;
|
|
Packit |
f574b8 |
HTChunk option; /* Grow by 128 */
|
|
Packit |
f574b8 |
BOOL first_option; /* First OPTION in SELECT? */
|
|
Packit |
f574b8 |
char *LastOptionValue;
|
|
Packit |
f574b8 |
BOOL LastOptionChecked;
|
|
Packit |
f574b8 |
BOOL select_disabled;
|
|
Packit |
f574b8 |
HTChunk textarea; /* Grow by 128 */
|
|
Packit |
f574b8 |
char *textarea_name;
|
|
Packit |
f574b8 |
int textarea_name_cs;
|
|
Packit |
f574b8 |
char *textarea_accept_cs;
|
|
Packit |
f574b8 |
int textarea_cols;
|
|
Packit |
f574b8 |
int textarea_rows;
|
|
Packit |
f574b8 |
int textarea_disabled;
|
|
Packit |
f574b8 |
int textarea_readonly;
|
|
Packit |
f574b8 |
char *textarea_id;
|
|
Packit |
f574b8 |
HTChunk math; /* Grow by 128 */
|
|
Packit |
f574b8 |
HTChunk style_block; /* Grow by 128 */
|
|
Packit |
f574b8 |
HTChunk script; /* Grow by 128 */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Used for nested lists. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
int List_Nesting_Level; /* counter for list nesting level */
|
|
Packit |
f574b8 |
int OL_Counter[12]; /* counter for ordered lists */
|
|
Packit |
f574b8 |
char OL_Type[12]; /* types for ordered lists */
|
|
Packit |
f574b8 |
int Last_OL_Count; /* last count in ordered lists */
|
|
Packit |
f574b8 |
char Last_OL_Type; /* last type in ordered lists */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
int Division_Level;
|
|
Packit |
f574b8 |
short DivisionAlignments[MAX_NESTING];
|
|
Packit |
f574b8 |
int Underline_Level;
|
|
Packit |
f574b8 |
int Quote_Level;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
BOOL UsePlainSpace;
|
|
Packit |
f574b8 |
BOOL HiddenValue;
|
|
Packit |
f574b8 |
int lastraw;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
const char *comment_start; /* for literate programming */
|
|
Packit |
f574b8 |
const char *comment_end;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
HTTag *current_tag;
|
|
Packit |
f574b8 |
BOOL style_change;
|
|
Packit |
f574b8 |
HTStyle *new_style;
|
|
Packit |
f574b8 |
HTStyle *old_style;
|
|
Packit |
f574b8 |
int current_default_alignment;
|
|
Packit |
f574b8 |
BOOL in_word; /* Have just had a non-white char */
|
|
Packit |
f574b8 |
stack_element stack[MAX_NESTING];
|
|
Packit |
f574b8 |
stack_element *sp; /* Style stack pointer */
|
|
Packit |
f574b8 |
BOOL stack_overrun; /* Was MAX_NESTING exceeded? */
|
|
Packit |
f574b8 |
int skip_stack; /* flag to skip next style stack operation */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Track if we are in an anchor, paragraph, address, base, etc.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
BOOL inA;
|
|
Packit |
f574b8 |
BOOL inAPPLET;
|
|
Packit |
f574b8 |
BOOL inAPPLETwithP;
|
|
Packit |
f574b8 |
BOOL inBadBASE;
|
|
Packit |
f574b8 |
BOOL inBadHREF;
|
|
Packit |
f574b8 |
BOOL inBadHTML;
|
|
Packit |
f574b8 |
BOOL inBASE;
|
|
Packit |
f574b8 |
BOOL inBoldA;
|
|
Packit |
f574b8 |
BOOL inBoldH;
|
|
Packit |
f574b8 |
BOOL inCAPTION;
|
|
Packit |
f574b8 |
BOOL inCREDIT;
|
|
Packit |
f574b8 |
BOOL inFIG;
|
|
Packit |
f574b8 |
BOOL inFIGwithP;
|
|
Packit |
f574b8 |
BOOL inFONT;
|
|
Packit |
f574b8 |
BOOL inFORM;
|
|
Packit |
f574b8 |
BOOL inLABEL;
|
|
Packit |
f574b8 |
BOOL inP;
|
|
Packit |
f574b8 |
BOOL inPRE;
|
|
Packit |
f574b8 |
BOOL inSELECT;
|
|
Packit |
f574b8 |
BOOL inTABLE;
|
|
Packit |
f574b8 |
BOOL inTEXTAREA;
|
|
Packit |
f574b8 |
BOOL inUnderline;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
BOOL needBoldH;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
char *xinclude; /* if no include strin address passed */
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* UCI and UCLYhndl give the UCInfo and charset registered for the HTML
|
|
Packit |
f574b8 |
* parser in the node_anchor's UCStages structure. It indicates what is
|
|
Packit |
f574b8 |
* fed to the HTML parser as the stream of character data (not necessarily
|
|
Packit |
f574b8 |
* tags and attributes). It should currently always be set to be the same
|
|
Packit |
f574b8 |
* as UCI and UCLhndl for the HTEXT stage in the node_anchor's UCStages
|
|
Packit |
f574b8 |
* structure, since the HTML parser sends its input character data to the
|
|
Packit |
f574b8 |
* output without further charset translation.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
LYUCcharset *UCI;
|
|
Packit |
f574b8 |
int UCLYhndl;
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* inUCI and inUCLYhndl indicate the UCInfo and charset which the HTML
|
|
Packit |
f574b8 |
* parser treats at the input charset. It is normally set to the UCI and
|
|
Packit |
f574b8 |
* UCLhndl for the SGML parser in the node_anchor's UCStages structure
|
|
Packit |
f574b8 |
* (which may be a dummy, based on the MIME parser's UCI and UCLhndl in
|
|
Packit |
f574b8 |
* that structure, when we are handling a local file or non-http(s)
|
|
Packit |
f574b8 |
* gateway). It could be changed temporarily by the HTML parser, for
|
|
Packit |
f574b8 |
* conversions of attribute strings, but should be reset once done. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
LYUCcharset *inUCI;
|
|
Packit |
f574b8 |
int inUCLYhndl;
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* outUCI and outUCLYhndl indicate the UCInfo and charset which the HTML
|
|
Packit |
f574b8 |
* parser treats as the output charset. It is normally set to its own UCI
|
|
Packit |
f574b8 |
* and UCLhndl. It could be changed for conversions of attribute strings,
|
|
Packit |
f574b8 |
* but should be reset once done. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
LYUCcharset *outUCI;
|
|
Packit |
f574b8 |
int outUCLYhndl;
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* T holds the transformation rules for conversions of strings between the
|
|
Packit |
f574b8 |
* input and output charsets by the HTML parser. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
UCTransParams T;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
int tag_charset; /* charset for attribute values etc. */
|
|
Packit |
f574b8 |
};
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
extern HTStyle *LYstyles(int style_number);
|
|
Packit |
f574b8 |
extern BOOL LYBadHTML(HTStructured * me);
|
|
Packit |
f574b8 |
extern void LYShowBadHTML(const char *s);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Semi-Private functions. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern void HTML_put_character(HTStructured * me, int c);
|
|
Packit |
f574b8 |
extern void HTML_put_string(HTStructured * me, const char *s);
|
|
Packit |
f574b8 |
extern void HTML_write(HTStructured * me, const char *s, int l);
|
|
Packit |
f574b8 |
extern int HTML_put_entity(HTStructured * me, int entity_number);
|
|
Packit |
f574b8 |
extern void actually_set_style(HTStructured * me);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Style buffering avoids dummy paragraph begin/ends.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
|
|
Packit |
f574b8 |
#endif /* Lynx_HTML_Handler */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
extern void strtolower(char *i);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* P U B L I C
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* HTConverter to present HTML
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern HTStream *HTMLToPlain(HTPresentation *pres,
|
|
Packit |
f574b8 |
HTParentAnchor *anchor,
|
|
Packit |
f574b8 |
HTStream *sink);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
extern HTStream *HTMLParsedPresent(HTPresentation *pres,
|
|
Packit |
f574b8 |
HTParentAnchor *anchor,
|
|
Packit |
f574b8 |
HTStream *sink);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
extern HTStream *HTMLToC(HTPresentation *pres,
|
|
Packit |
f574b8 |
HTParentAnchor *anchor,
|
|
Packit |
f574b8 |
HTStream *sink);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
extern HTStream *HTMLPresent(HTPresentation *pres,
|
|
Packit |
f574b8 |
HTParentAnchor *anchor,
|
|
Packit |
f574b8 |
HTStream *sink);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
extern HTStructured *HTML_new(HTParentAnchor *anchor,
|
|
Packit |
f574b8 |
HTFormat format_out,
|
|
Packit |
f574b8 |
HTStream *target);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Record error message as a hypertext object.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* The error message should be marked as an error so that it can be reloaded
|
|
Packit |
f574b8 |
* later. This implementation just throws up an error message and leaves the
|
|
Packit |
f574b8 |
* document unloaded.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On entry,
|
|
Packit |
f574b8 |
* sink is a stream to the output device if any
|
|
Packit |
f574b8 |
* number is the HTTP error number
|
|
Packit |
f574b8 |
* message is the human readable message.
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* a return code like HT_LOADED if object exists else 60; 0
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern int HTLoadError(HTStream *sink,
|
|
Packit |
f574b8 |
int number,
|
|
Packit |
f574b8 |
const char *message);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef __cplusplus
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
#endif /* HTML_H */
|