|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* $LynxId: SGML.h,v 1.46 2012/02/10 18:32:26 tom Exp $
|
|
Packit |
f574b8 |
* SGML parse and stream definition for libwww
|
|
Packit |
f574b8 |
* SGML AND STRUCTURED STREAMS
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* The SGML parser is a state machine. It is called for every character
|
|
Packit |
f574b8 |
* of the input stream. The DTD data structure contains pointers
|
|
Packit |
f574b8 |
* to functions which are called to implement the actual effect of the
|
|
Packit |
f574b8 |
* text read. When these functions are called, the attribute structures pointed to by the
|
|
Packit |
f574b8 |
* DTD are valid, and the function is passed a pointer to the current tag structure, and an
|
|
Packit |
f574b8 |
* "element stack" which represents the state of nesting within SGML elements.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* The following aspects are from Dan Connolly's suggestions: Binary search,
|
|
Packit |
f574b8 |
* Structured object scheme basically, SGML content enum type.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* (c) Copyright CERN 1991 - See Copyright.html
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#ifndef SGML_H
|
|
Packit |
f574b8 |
#define SGML_H
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#include <HTStream.h>
|
|
Packit |
f574b8 |
#include <HTAnchor.h>
|
|
Packit |
f574b8 |
#include <LYJustify.h>
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef __cplusplus
|
|
Packit |
f574b8 |
extern "C" {
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* SGML content types
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
*/ typedef enum {
|
|
Packit |
f574b8 |
SGML_EMPTY, /* No content. */
|
|
Packit |
f574b8 |
SGML_LITTERAL, /* Literal character data. Recognize exact close tag only.
|
|
Packit |
f574b8 |
Old www server compatibility only! Not SGML */
|
|
Packit |
f574b8 |
SGML_CDATA, /* Character data. Recognize </ only.
|
|
Packit |
f574b8 |
(But we treat it just as SGML_LITTERAL.) */
|
|
Packit |
f574b8 |
SGML_SCRIPT, /* Like CDATA, but allow it to be a comment */
|
|
Packit |
f574b8 |
SGML_RCDATA, /* Replaceable character data. Should recognize </ and &ref;
|
|
Packit |
f574b8 |
(but we treat it like SGML_MIXED for old times' sake). */
|
|
Packit |
f574b8 |
SGML_MIXED, /* Elements and parsed character data.
|
|
Packit |
f574b8 |
Recognize all markup. */
|
|
Packit |
f574b8 |
SGML_ELEMENT, /* Any data found should be regarded as an error.
|
|
Packit |
f574b8 |
(But we treat it just like SGML_MIXED.) */
|
|
Packit |
f574b8 |
SGML_PCDATA /* Should contain no elements but &ref; is parsed.
|
|
Packit |
f574b8 |
(We treat it like SGML_CDATA wrt. contained tags
|
|
Packit |
f574b8 |
i.e. pass them on literally, i.e. like we should
|
|
Packit |
f574b8 |
treat SGML_RCDATA) (added by KW). */
|
|
Packit |
f574b8 |
} SGMLContent;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
typedef struct {
|
|
Packit |
f574b8 |
const char *name; /* The name of the attribute */
|
|
Packit |
f574b8 |
#ifdef USE_PRETTYSRC
|
|
Packit |
f574b8 |
char type; /* code of the type of the attribute. Code
|
|
Packit |
f574b8 |
values are in HTMLDTD.h */
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
} attr;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
typedef const attr *AttrList;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
typedef struct {
|
|
Packit |
f574b8 |
const char *name;
|
|
Packit |
f574b8 |
AttrList list;
|
|
Packit |
f574b8 |
} AttrType;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
typedef int TagClass;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* textflow */
|
|
Packit |
f574b8 |
#define Tgc_FONTlike 0x00001 /* S,STRIKE,I,B,TT,U,BIG,SMALL,STYLE,BLINK;BR,TAB */
|
|
Packit |
f574b8 |
#define Tgc_EMlike 0x00002 /* EM,STRONG,DFN,CODE,SAMP,KBD,VAR,CITE,Q,INS,DEL,SPAN,.. */
|
|
Packit |
f574b8 |
#define Tgc_MATHlike 0x00004 /* SUB,SUP,MATH,COMMENT */
|
|
Packit |
f574b8 |
#define Tgc_Alike 0x00008 /* A */
|
|
Packit |
f574b8 |
#define Tgc_formula 0x00010 /* not used until math is supported better... */
|
|
Packit |
f574b8 |
/* used for special structures: forms, tables,... */
|
|
Packit |
f574b8 |
#define Tgc_TRlike 0x00020 /* TR and similar */
|
|
Packit |
f574b8 |
#define Tgc_SELECTlike 0x00040 /* SELECT,INPUT,TEXTAREA(,...) */
|
|
Packit |
f574b8 |
/* structure */
|
|
Packit |
f574b8 |
#define Tgc_FORMlike 0x00080 /* FORM itself */
|
|
Packit |
f574b8 |
#define Tgc_Plike 0x00100 /* P,H1..H6,... structures containing text or
|
|
Packit |
f574b8 |
insertion but not other structures */
|
|
Packit |
f574b8 |
#define Tgc_DIVlike 0x00200 /* ADDRESS,FIG,BDO,NOTE,FN,DIV,CENTER;FIG
|
|
Packit |
f574b8 |
structures which can contain other structures */
|
|
Packit |
f574b8 |
#define Tgc_LIlike 0x00400 /* LH,LI,DT,DD;TH,TD structure-like, only valid
|
|
Packit |
f574b8 |
within certain other structures */
|
|
Packit |
f574b8 |
#define Tgc_ULlike 0x00800 /* UL,OL,DL,DIR,MENU;TABLE;XMP,LISTING
|
|
Packit |
f574b8 |
special in some way, cannot contain (parsed)
|
|
Packit |
f574b8 |
text directly */
|
|
Packit |
f574b8 |
/* insertions */
|
|
Packit |
f574b8 |
#define Tgc_BRlike 0x01000 /* BR,IMG,TAB allowed in any text */
|
|
Packit |
f574b8 |
#define Tgc_APPLETlike 0x02000 /* APPLET,OBJECT,EMBED,SCRIPT;BUTTON */
|
|
Packit |
f574b8 |
#define Tgc_HRlike 0x04000 /* HR,MARQUEE can contain all kinds of things
|
|
Packit |
f574b8 |
and/or are not allowed (?) in running text */
|
|
Packit |
f574b8 |
#define Tgc_MAPlike 0x08000 /* MAP,AREA some specials that never contain
|
|
Packit |
f574b8 |
(directly or indirectly) other things than
|
|
Packit |
f574b8 |
special insertions */
|
|
Packit |
f574b8 |
#define Tgc_outer 0x10000 /* HTML,FRAMESET,FRAME,PLAINTEXT; */
|
|
Packit |
f574b8 |
#define Tgc_BODYlike 0x20000 /* BODY,BODYTEXT,NOFRAMES,TEXTFLOW; */
|
|
Packit |
f574b8 |
#define Tgc_HEADstuff 0x40000 /* HEAD,BASE,STYLE,TITLE; */
|
|
Packit |
f574b8 |
/* special relations */
|
|
Packit |
f574b8 |
#define Tgc_same 0x80000
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Groups for contains-data.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define Tgc_INLINElike (Tgc_Alike | Tgc_APPLETlike | Tgc_BRlike | Tgc_EMlike | Tgc_FONTlike | Tgc_SELECTlike)
|
|
Packit |
f574b8 |
#define Tgc_LISTlike (Tgc_LIlike | Tgc_ULlike)
|
|
Packit |
f574b8 |
#define Tgc_BLOCKlike (Tgc_DIVlike | Tgc_LISTlike)
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Some more properties of tags (or rather, elements) and rules how
|
|
Packit |
f574b8 |
to deal with them. - kw */
|
|
Packit |
f574b8 |
typedef int TagFlags;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#define Tgf_endO 0x00001 /* end tag can be Omitted */
|
|
Packit |
f574b8 |
#define Tgf_startO 0x00002 /* start tag can be Omitted */
|
|
Packit |
f574b8 |
#define Tgf_mafse 0x00004 /* Make Attribute-Free Start-tag End instead
|
|
Packit |
f574b8 |
(if found invalid) */
|
|
Packit |
f574b8 |
#define Tgf_strict 0x00008 /* Ignore contained invalid elements,
|
|
Packit |
f574b8 |
don't pass them on; or other variant
|
|
Packit |
f574b8 |
handling for some content types */
|
|
Packit |
f574b8 |
#define Tgf_nreie 0x00010 /* Not Really Empty If Empty,
|
|
Packit |
f574b8 |
used by color style code */
|
|
Packit |
f574b8 |
#define Tgf_frecyc 0x00020 /* Pass element content on in a form that
|
|
Packit |
f574b8 |
allows recycling, i.e. don't translate to
|
|
Packit |
f574b8 |
output (display) character set yet (treat
|
|
Packit |
f574b8 |
content similar to attribute values) */
|
|
Packit |
f574b8 |
#define Tgf_nolyspcl 0x00040 /* Don't generate lynx special characters
|
|
Packit |
f574b8 |
for soft hyphen and various spaces (nbsp,
|
|
Packit |
f574b8 |
ensp,..) */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* A tag structure describes an SGML element.
|
|
Packit |
f574b8 |
* -----------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* name is the string which comes after the tag opener "<".
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* attributes points to a zero-terminated array
|
|
Packit |
f574b8 |
* of attribute names.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* litteral determines how the SGML engine parses the characters
|
|
Packit |
f574b8 |
* within the element. If set, tag openers are ignored
|
|
Packit |
f574b8 |
* except for that which opens a matching closing tag.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
typedef struct _tag HTTag;
|
|
Packit |
f574b8 |
struct _tag {
|
|
Packit |
f574b8 |
const char *name; /* The name of the tag */
|
|
Packit |
f574b8 |
#ifdef USE_COLOR_STYLE
|
|
Packit |
f574b8 |
unsigned name_len; /* The length of the name */
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
#ifdef USE_JUSTIFY_ELTS
|
|
Packit |
f574b8 |
BOOL can_justify; /* justification allowed? */
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
AttrList attributes; /* The list of acceptable attributes */
|
|
Packit |
f574b8 |
int number_of_attributes; /* Number of possible attributes */
|
|
Packit |
f574b8 |
const AttrType *attr_types;
|
|
Packit |
f574b8 |
SGMLContent contents; /* End only on end tag @@ */
|
|
Packit |
f574b8 |
TagClass tagclass;
|
|
Packit |
f574b8 |
TagClass contains; /* which classes of elements this one can contain directly */
|
|
Packit |
f574b8 |
TagClass icontains; /* which classes of elements this one can contain indirectly */
|
|
Packit |
f574b8 |
TagClass contained; /* in which classes can this tag be contained ? */
|
|
Packit |
f574b8 |
TagClass icontained; /* in which classes can this tag be indirectly contained ? */
|
|
Packit |
f574b8 |
TagClass canclose; /* which classes of elements can this one close
|
|
Packit |
f574b8 |
if something looks wrong ? */
|
|
Packit |
f574b8 |
TagFlags flags;
|
|
Packit |
f574b8 |
};
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* DTD Information
|
|
Packit |
f574b8 |
* ---------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Not the whole DTD, but all this parser uses of it.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
typedef struct {
|
|
Packit |
f574b8 |
HTTag *tags; /* Must be in strcmp order by name */
|
|
Packit |
f574b8 |
int number_of_tags;
|
|
Packit |
f574b8 |
STRING2PTR entity_names; /* Must be in strcmp order by name */
|
|
Packit |
f574b8 |
size_t number_of_entities;
|
|
Packit |
f574b8 |
/* "entity_names" table probably unused,
|
|
Packit |
f574b8 |
* see comments in HTMLDTD.c near the top
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
} SGML_dtd;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* SGML context passed to parsers
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
typedef struct _HTSGMLContext *HTSGMLContext; /* Hidden */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*__________________________________________________________________________
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
Structured Object definition
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
A structured object is something which can reasonably be represented
|
|
Packit |
f574b8 |
in SGML. I'll rephrase that. A structured object is an ordered
|
|
Packit |
f574b8 |
tree-structured arrangement of data which is representable as text.
|
|
Packit |
f574b8 |
The SGML parser outputs to a Structured object. A Structured object
|
|
Packit |
f574b8 |
can output its contents to another Structured Object. It's a kind of
|
|
Packit |
f574b8 |
typed stream. The architecture is largely Dan Conolly's. Elements and
|
|
Packit |
f574b8 |
entities are passed to the sob by number, implying a knowledge of the
|
|
Packit |
f574b8 |
DTD. Knowledge of the SGML syntax is not here, though.
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
Superclass: HTStream
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
The creation methods will vary on the type of Structured Object.
|
|
Packit |
f574b8 |
Maybe the callerData is enough info to pass along.
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
typedef struct _HTStructured HTStructured;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
typedef struct _HTStructuredClass {
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
const char *name; /* Just for diagnostics */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
void (*_free) (HTStructured * me);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
void (*_abort) (HTStructured * me, HTError e);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
void (*put_character) (HTStructured * me, int ch);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
void (*put_string) (HTStructured * me, const char *str);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
void (*put_block) (HTStructured * me, const char *str, int len);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* HTStreamClass ends here */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
int (*start_element) (HTStructured * me, int element_number,
|
|
Packit |
f574b8 |
const BOOL *attribute_present,
|
|
Packit |
f574b8 |
STRING2PTR attribute_value,
|
|
Packit |
f574b8 |
int charset,
|
|
Packit |
f574b8 |
char **include);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
int (*end_element) (HTStructured * me, int element_number,
|
|
Packit |
f574b8 |
char **include);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
int (*put_entity) (HTStructured * me, int entity_number);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
} HTStructuredClass;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
Equivalents to the following functions possibly could be generalised
|
|
Packit |
f574b8 |
into additional HTStructuredClass members. For now they don't do
|
|
Packit |
f574b8 |
anything target-specific. - kw
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern BOOLEAN LYCheckForCSI(HTParentAnchor *anchor, char **url);
|
|
Packit |
f574b8 |
extern void LYDoCSI(char *url, const char *comment, char **csi);
|
|
Packit |
f574b8 |
extern BOOLEAN LYCommentHacks(HTParentAnchor *anchor, const char *comment);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
Find a Tag by Name
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
Returns a pointer to the tag within the DTD.
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern HTTag *SGMLFindTag(const SGML_dtd * dtd,
|
|
Packit |
f574b8 |
const char *string);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Return the current offset within the file that SGML is parsing
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern int SGML_offset(void);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
Create an SGML parser
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* On entry,
|
|
Packit |
f574b8 |
* dtd must point to a DTD structure as defined above
|
|
Packit |
f574b8 |
* callbacks must point to user routines.
|
|
Packit |
f574b8 |
* callData is returned in callbacks transparently.
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* The default tag starter has been processed.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern HTStream *SGML_new(const SGML_dtd * dtd,
|
|
Packit |
f574b8 |
HTParentAnchor *anchor,
|
|
Packit |
f574b8 |
HTStructured * target);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
extern const HTStreamClass SGMLParser;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef __cplusplus
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
#endif /* SGML_H */
|