Blame WWW/Library/Implementation/SGML.h

Packit f574b8
/*
Packit f574b8
 * $LynxId: SGML.h,v 1.46 2012/02/10 18:32:26 tom Exp $
Packit f574b8
 *			       SGML parse and stream definition for libwww
Packit f574b8
 *                             SGML AND STRUCTURED STREAMS
Packit f574b8
 *
Packit f574b8
 * The SGML parser is a state machine.	It is called for every character
Packit f574b8
 * of the input stream.	 The DTD data structure contains pointers
Packit f574b8
 * to functions which are called to implement the actual effect of the
Packit f574b8
 * text read. When these functions are called, the attribute structures pointed to by the
Packit f574b8
 * DTD are valid, and the function is passed a pointer to the current tag structure, and an
Packit f574b8
 * "element stack" which represents the state of nesting within SGML elements.
Packit f574b8
 *
Packit f574b8
 * The following aspects are from Dan Connolly's suggestions:  Binary search,
Packit f574b8
 * Structured object scheme basically, SGML content enum type.
Packit f574b8
 *
Packit f574b8
 * (c) Copyright CERN 1991 - See Copyright.html
Packit f574b8
 *
Packit f574b8
 */
Packit f574b8
#ifndef SGML_H
Packit f574b8
#define SGML_H
Packit f574b8
Packit f574b8
#include <HTStream.h>
Packit f574b8
#include <HTAnchor.h>
Packit f574b8
#include <LYJustify.h>
Packit f574b8
Packit f574b8
#ifdef __cplusplus
Packit f574b8
extern "C" {
Packit f574b8
#endif
Packit f574b8
/*
Packit f574b8
 *
Packit f574b8
 * SGML content types
Packit f574b8
 *
Packit f574b8
 */ typedef enum {
Packit f574b8
	SGML_EMPTY,		/* No content. */
Packit f574b8
	SGML_LITTERAL,		/* Literal character data.  Recognize exact close tag only.
Packit f574b8
				   Old www server compatibility only!  Not SGML */
Packit f574b8
	SGML_CDATA,		/* Character data.  Recognize </ only.
Packit f574b8
				   (But we treat it just as SGML_LITTERAL.) */
Packit f574b8
	SGML_SCRIPT,		/* Like CDATA, but allow it to be a comment */
Packit f574b8
	SGML_RCDATA,		/* Replaceable character data. Should recognize </ and &ref;
Packit f574b8
				   (but we treat it like SGML_MIXED for old times' sake). */
Packit f574b8
	SGML_MIXED,		/* Elements and parsed character data.
Packit f574b8
				   Recognize all markup. */
Packit f574b8
	SGML_ELEMENT,		/* Any data found should be regarded as an error.
Packit f574b8
				   (But we treat it just like SGML_MIXED.) */
Packit f574b8
	SGML_PCDATA		/* Should contain no elements but &ref; is parsed.
Packit f574b8
				   (We treat it like SGML_CDATA wrt. contained tags
Packit f574b8
				   i.e. pass them on literally, i.e. like we should
Packit f574b8
				   treat SGML_RCDATA) (added by KW). */
Packit f574b8
    } SGMLContent;
Packit f574b8
Packit f574b8
    typedef struct {
Packit f574b8
	const char *name;	/* The name of the attribute */
Packit f574b8
#ifdef USE_PRETTYSRC
Packit f574b8
	char type;		/* code of the type of the attribute. Code
Packit f574b8
				   values are in HTMLDTD.h */
Packit f574b8
#endif
Packit f574b8
    } attr;
Packit f574b8
Packit f574b8
    typedef const attr *AttrList;
Packit f574b8
Packit f574b8
    typedef struct {
Packit f574b8
	const char *name;
Packit f574b8
	AttrList list;
Packit f574b8
    } AttrType;
Packit f574b8
Packit f574b8
    typedef int TagClass;
Packit f574b8
Packit f574b8
    /* textflow */
Packit f574b8
#define Tgc_FONTlike	0x00001	/* S,STRIKE,I,B,TT,U,BIG,SMALL,STYLE,BLINK;BR,TAB */
Packit f574b8
#define Tgc_EMlike	0x00002	/* EM,STRONG,DFN,CODE,SAMP,KBD,VAR,CITE,Q,INS,DEL,SPAN,.. */
Packit f574b8
#define Tgc_MATHlike	0x00004	/* SUB,SUP,MATH,COMMENT */
Packit f574b8
#define Tgc_Alike	0x00008	/* A */
Packit f574b8
#define Tgc_formula	0x00010	/* not used until math is supported better... */
Packit f574b8
    /* used for special structures: forms, tables,... */
Packit f574b8
#define Tgc_TRlike	0x00020	/* TR and similar */
Packit f574b8
#define Tgc_SELECTlike	0x00040	/* SELECT,INPUT,TEXTAREA(,...) */
Packit f574b8
    /* structure */
Packit f574b8
#define Tgc_FORMlike	0x00080	/* FORM itself */
Packit f574b8
#define Tgc_Plike	0x00100	/* P,H1..H6,... structures containing text or
Packit f574b8
				   insertion but not other structures */
Packit f574b8
#define Tgc_DIVlike	0x00200	/* ADDRESS,FIG,BDO,NOTE,FN,DIV,CENTER;FIG
Packit f574b8
				   structures which can contain other structures */
Packit f574b8
#define Tgc_LIlike	0x00400	/* LH,LI,DT,DD;TH,TD structure-like, only valid
Packit f574b8
				   within certain other structures */
Packit f574b8
#define Tgc_ULlike	0x00800	/* UL,OL,DL,DIR,MENU;TABLE;XMP,LISTING
Packit f574b8
				   special in some way, cannot contain (parsed)
Packit f574b8
				   text directly */
Packit f574b8
    /* insertions */
Packit f574b8
#define Tgc_BRlike	0x01000	/* BR,IMG,TAB allowed in any text */
Packit f574b8
#define Tgc_APPLETlike	0x02000	/* APPLET,OBJECT,EMBED,SCRIPT;BUTTON */
Packit f574b8
#define Tgc_HRlike	0x04000	/* HR,MARQUEE can contain all kinds of things
Packit f574b8
				   and/or are not allowed (?) in running text */
Packit f574b8
#define Tgc_MAPlike	0x08000	/* MAP,AREA some specials that never contain
Packit f574b8
				   (directly or indirectly) other things than
Packit f574b8
				   special insertions */
Packit f574b8
#define Tgc_outer	0x10000	/* HTML,FRAMESET,FRAME,PLAINTEXT; */
Packit f574b8
#define Tgc_BODYlike	0x20000	/* BODY,BODYTEXT,NOFRAMES,TEXTFLOW; */
Packit f574b8
#define Tgc_HEADstuff	0x40000	/* HEAD,BASE,STYLE,TITLE; */
Packit f574b8
    /* special relations */
Packit f574b8
#define Tgc_same	0x80000
Packit f574b8
Packit f574b8
/*
Packit f574b8
 * Groups for contains-data.
Packit f574b8
 */
Packit f574b8
#define Tgc_INLINElike	(Tgc_Alike | Tgc_APPLETlike | Tgc_BRlike | Tgc_EMlike | Tgc_FONTlike | Tgc_SELECTlike)
Packit f574b8
#define Tgc_LISTlike	(Tgc_LIlike | Tgc_ULlike)
Packit f574b8
#define Tgc_BLOCKlike	(Tgc_DIVlike | Tgc_LISTlike)
Packit f574b8
Packit f574b8
/* Some more properties of tags (or rather, elements) and rules how
Packit f574b8
   to deal with them. - kw */
Packit f574b8
    typedef int TagFlags;
Packit f574b8
Packit f574b8
#define Tgf_endO	0x00001	/* end tag can be Omitted */
Packit f574b8
#define Tgf_startO	0x00002	/* start tag can be Omitted */
Packit f574b8
#define Tgf_mafse	0x00004	/* Make Attribute-Free Start-tag End instead
Packit f574b8
				   (if found invalid) */
Packit f574b8
#define Tgf_strict	0x00008	/* Ignore contained invalid elements,
Packit f574b8
				   don't pass them on; or other variant
Packit f574b8
				   handling for some content types */
Packit f574b8
#define Tgf_nreie	0x00010	/* Not Really Empty If Empty,
Packit f574b8
				   used by color style code */
Packit f574b8
#define Tgf_frecyc	0x00020	/* Pass element content on in a form that
Packit f574b8
				   allows recycling, i.e. don't translate to
Packit f574b8
				   output (display) character set yet (treat
Packit f574b8
				   content similar to attribute values) */
Packit f574b8
#define Tgf_nolyspcl	0x00040	/* Don't generate lynx special characters
Packit f574b8
				   for soft hyphen and various spaces (nbsp,
Packit f574b8
				   ensp,..) */
Packit f574b8
Packit f574b8
/*		A tag structure describes an SGML element.
Packit f574b8
 *		-----------------------------------------
Packit f574b8
 *
Packit f574b8
 *
Packit f574b8
 *	name		is the string which comes after the tag opener "<".
Packit f574b8
 *
Packit f574b8
 *	attributes	points to a zero-terminated array
Packit f574b8
 *			of attribute names.
Packit f574b8
 *
Packit f574b8
 *	litteral	determines how the SGML engine parses the characters
Packit f574b8
 *			within the element.  If set, tag openers are ignored
Packit f574b8
 *			except for that which opens a matching closing tag.
Packit f574b8
 *
Packit f574b8
 */
Packit f574b8
    typedef struct _tag HTTag;
Packit f574b8
    struct _tag {
Packit f574b8
	const char *name;	/* The name of the tag */
Packit f574b8
#ifdef USE_COLOR_STYLE
Packit f574b8
	unsigned name_len;	/* The length of the name */
Packit f574b8
#endif
Packit f574b8
#ifdef USE_JUSTIFY_ELTS
Packit f574b8
	BOOL can_justify;	/* justification allowed? */
Packit f574b8
#endif
Packit f574b8
	AttrList attributes;	/* The list of acceptable attributes */
Packit f574b8
	int number_of_attributes;	/* Number of possible attributes */
Packit f574b8
	const AttrType *attr_types;
Packit f574b8
	SGMLContent contents;	/* End only on end tag @@ */
Packit f574b8
	TagClass tagclass;
Packit f574b8
	TagClass contains;	/* which classes of elements this one can contain directly */
Packit f574b8
	TagClass icontains;	/* which classes of elements this one can contain indirectly */
Packit f574b8
	TagClass contained;	/* in which classes can this tag be contained ? */
Packit f574b8
	TagClass icontained;	/* in which classes can this tag be indirectly contained ? */
Packit f574b8
	TagClass canclose;	/* which classes of elements can this one close
Packit f574b8
				   if something looks wrong ? */
Packit f574b8
	TagFlags flags;
Packit f574b8
    };
Packit f574b8
Packit f574b8
/*		DTD Information
Packit f574b8
 *		---------------
Packit f574b8
 *
Packit f574b8
 *  Not the whole DTD, but all this parser uses of it.
Packit f574b8
 */
Packit f574b8
    typedef struct {
Packit f574b8
	HTTag *tags;		/* Must be in strcmp order by name */
Packit f574b8
	int number_of_tags;
Packit f574b8
	STRING2PTR entity_names;	/* Must be in strcmp order by name */
Packit f574b8
	size_t number_of_entities;
Packit f574b8
	/*  "entity_names" table probably unused,
Packit f574b8
	 *  see comments in HTMLDTD.c near the top
Packit f574b8
	 */
Packit f574b8
    } SGML_dtd;
Packit f574b8
Packit f574b8
/*	SGML context passed to parsers
Packit f574b8
*/
Packit f574b8
    typedef struct _HTSGMLContext *HTSGMLContext;	/* Hidden */
Packit f574b8
Packit f574b8
/*__________________________________________________________________________
Packit f574b8
*/
Packit f574b8
Packit f574b8
/*
Packit f574b8
Packit f574b8
Structured Object definition
Packit f574b8
Packit f574b8
   A structured object is something which can reasonably be represented
Packit f574b8
   in SGML.  I'll rephrase that.  A structured object is an ordered
Packit f574b8
   tree-structured arrangement of data which is representable as text.
Packit f574b8
   The SGML parser outputs to a Structured object.  A Structured object
Packit f574b8
   can output its contents to another Structured Object.  It's a kind of
Packit f574b8
   typed stream.  The architecture is largely Dan Conolly's.  Elements and
Packit f574b8
   entities are passed to the sob by number, implying a knowledge of the
Packit f574b8
   DTD.	 Knowledge of the SGML syntax is not here, though.
Packit f574b8
Packit f574b8
   Superclass: HTStream
Packit f574b8
Packit f574b8
   The creation methods will vary on the type of Structured Object.
Packit f574b8
   Maybe the callerData is enough info to pass along.
Packit f574b8
Packit f574b8
 */
Packit f574b8
    typedef struct _HTStructured HTStructured;
Packit f574b8
Packit f574b8
    typedef struct _HTStructuredClass {
Packit f574b8
Packit f574b8
	const char *name;	/* Just for diagnostics */
Packit f574b8
Packit f574b8
	void (*_free) (HTStructured * me);
Packit f574b8
Packit f574b8
	void (*_abort) (HTStructured * me, HTError e);
Packit f574b8
Packit f574b8
	void (*put_character) (HTStructured * me, int ch);
Packit f574b8
Packit f574b8
	void (*put_string) (HTStructured * me, const char *str);
Packit f574b8
Packit f574b8
	void (*put_block) (HTStructured * me, const char *str, int len);
Packit f574b8
Packit f574b8
	/* HTStreamClass ends here */
Packit f574b8
Packit f574b8
	int (*start_element) (HTStructured * me, int element_number,
Packit f574b8
			      const BOOL *attribute_present,
Packit f574b8
			      STRING2PTR attribute_value,
Packit f574b8
			      int charset,
Packit f574b8
			      char **include);
Packit f574b8
Packit f574b8
	int (*end_element) (HTStructured * me, int element_number,
Packit f574b8
			    char **include);
Packit f574b8
Packit f574b8
	int (*put_entity) (HTStructured * me, int entity_number);
Packit f574b8
Packit f574b8
    } HTStructuredClass;
Packit f574b8
Packit f574b8
/*
Packit f574b8
  Equivalents to the following functions possibly could be generalised
Packit f574b8
  into additional HTStructuredClass members.  For now they don't do
Packit f574b8
  anything target-specific. - kw
Packit f574b8
  */
Packit f574b8
    extern BOOLEAN LYCheckForCSI(HTParentAnchor *anchor, char **url);
Packit f574b8
    extern void LYDoCSI(char *url, const char *comment, char **csi);
Packit f574b8
    extern BOOLEAN LYCommentHacks(HTParentAnchor *anchor, const char *comment);
Packit f574b8
Packit f574b8
/*
Packit f574b8
Packit f574b8
Find a Tag by Name
Packit f574b8
Packit f574b8
   Returns a pointer to the tag within the DTD.
Packit f574b8
Packit f574b8
 */
Packit f574b8
    extern HTTag *SGMLFindTag(const SGML_dtd * dtd,
Packit f574b8
			      const char *string);
Packit f574b8
Packit f574b8
/*
Packit f574b8
 * Return the current offset within the file that SGML is parsing
Packit f574b8
 */
Packit f574b8
    extern int SGML_offset(void);
Packit f574b8
Packit f574b8
/*
Packit f574b8
Packit f574b8
Create an SGML parser
Packit f574b8
Packit f574b8
 */
Packit f574b8
/*
Packit f574b8
 * On entry,
Packit f574b8
 *	dtd		must point to a DTD structure as defined above
Packit f574b8
 *	callbacks	must point to user routines.
Packit f574b8
 *	callData	is returned in callbacks transparently.
Packit f574b8
 * On exit,
Packit f574b8
 *		The default tag starter has been processed.
Packit f574b8
 */
Packit f574b8
    extern HTStream *SGML_new(const SGML_dtd * dtd,
Packit f574b8
			      HTParentAnchor *anchor,
Packit f574b8
			      HTStructured * target);
Packit f574b8
Packit f574b8
    extern const HTStreamClass SGMLParser;
Packit f574b8
Packit f574b8
#ifdef __cplusplus
Packit f574b8
}
Packit f574b8
#endif
Packit f574b8
#endif				/* SGML_H */