Blame WWW/Library/Implementation/HTMLDTD.c

Packit f574b8
/*
Packit f574b8
 * $LynxId: HTMLDTD.c,v 1.57 2010/09/25 00:30:56 tom Exp $
Packit f574b8
 *
Packit f574b8
 *		Our Static DTD for HTML
Packit f574b8
 *		-----------------------
Packit f574b8
 */
Packit f574b8
Packit f574b8
/* Implements:
Packit f574b8
*/
Packit f574b8
Packit f574b8
#include <HTUtils.h>
Packit f574b8
#include <HTMLDTD.h>
Packit f574b8
#include <LYLeaks.h>
Packit f574b8
#include <LYJustify.h>
Packit f574b8
Packit f574b8
/*
Packit f574b8
 * Character entities like &nbsp now excluded from our DTD tables, they are
Packit f574b8
 * mapped to Unicode and handled by chartrans code directly the similar way the
Packit f574b8
 * numeric entities like &#123 does.  See src/chrtrans/entities.h for real
Packit f574b8
 * mapping.
Packit f574b8
 */
Packit f574b8
Packit f574b8
/*	Entity Names
Packit f574b8
 *	------------
Packit f574b8
 *
Packit f574b8
 *	This table must be matched exactly with ALL the translation tables
Packit f574b8
 *		(this is an obsolete translation mechanism, probably unused,
Packit f574b8
 *		currently replaced with Unicode chartrans in most cases...)
Packit f574b8
 */
Packit f574b8
static const char *entities[] =
Packit f574b8
{
Packit f574b8
    "AElig",			/* capital AE diphthong (ligature) */
Packit f574b8
    "Aacute",			/* capital A, acute accent */
Packit f574b8
    "Acirc",			/* capital A, circumflex accent */
Packit f574b8
    "Agrave",			/* capital A, grave accent */
Packit f574b8
    "Aring",			/* capital A, ring */
Packit f574b8
    "Atilde",			/* capital A, tilde */
Packit f574b8
    "Auml",			/* capital A, dieresis or umlaut mark */
Packit f574b8
    "Ccedil",			/* capital C, cedilla */
Packit f574b8
    "Dstrok",			/* capital Eth, Icelandic */
Packit f574b8
    "ETH",			/* capital Eth, Icelandic */
Packit f574b8
    "Eacute",			/* capital E, acute accent */
Packit f574b8
    "Ecirc",			/* capital E, circumflex accent */
Packit f574b8
    "Egrave",			/* capital E, grave accent */
Packit f574b8
    "Euml",			/* capital E, dieresis or umlaut mark */
Packit f574b8
    "Iacute",			/* capital I, acute accent */
Packit f574b8
    "Icirc",			/* capital I, circumflex accent */
Packit f574b8
    "Igrave",			/* capital I, grave accent */
Packit f574b8
    "Iuml",			/* capital I, dieresis or umlaut mark */
Packit f574b8
    "Ntilde",			/* capital N, tilde */
Packit f574b8
    "Oacute",			/* capital O, acute accent */
Packit f574b8
    "Ocirc",			/* capital O, circumflex accent */
Packit f574b8
    "Ograve",			/* capital O, grave accent */
Packit f574b8
    "Oslash",			/* capital O, slash */
Packit f574b8
    "Otilde",			/* capital O, tilde */
Packit f574b8
    "Ouml",			/* capital O, dieresis or umlaut mark */
Packit f574b8
    "THORN",			/* capital THORN, Icelandic */
Packit f574b8
    "Uacute",			/* capital U, acute accent */
Packit f574b8
    "Ucirc",			/* capital U, circumflex accent */
Packit f574b8
    "Ugrave",			/* capital U, grave accent */
Packit f574b8
    "Uuml",			/* capital U, dieresis or umlaut mark */
Packit f574b8
    "Yacute",			/* capital Y, acute accent */
Packit f574b8
    "aacute",			/* small a, acute accent */
Packit f574b8
    "acirc",			/* small a, circumflex accent */
Packit f574b8
    "acute",			/* spacing acute */
Packit f574b8
    "aelig",			/* small ae diphthong (ligature) */
Packit f574b8
    "agrave",			/* small a, grave accent */
Packit f574b8
    "amp",			/* ampersand */
Packit f574b8
    "aring",			/* small a, ring */
Packit f574b8
    "atilde",			/* small a, tilde */
Packit f574b8
    "auml",			/* small a, dieresis or umlaut mark */
Packit f574b8
    "brkbar",			/* broken vertical bar */
Packit f574b8
    "brvbar",			/* broken vertical bar */
Packit f574b8
    "ccedil",			/* small c, cedilla */
Packit f574b8
    "cedil",			/* spacing cedilla */
Packit f574b8
    "cent",			/* cent sign */
Packit f574b8
    "copy",			/* copyright sign */
Packit f574b8
    "curren",			/* currency sign */
Packit f574b8
    "deg",			/* degree sign */
Packit f574b8
    "die",			/* spacing dieresis */
Packit f574b8
    "divide",			/* division sign */
Packit f574b8
    "eacute",			/* small e, acute accent */
Packit f574b8
    "ecirc",			/* small e, circumflex accent */
Packit f574b8
    "egrave",			/* small e, grave accent */
Packit f574b8
    "emdash",			/* dash the width of emsp */
Packit f574b8
    "emsp",			/* em space - not collapsed */
Packit f574b8
    "endash",			/* dash the width of ensp */
Packit f574b8
    "ensp",			/* en space - not collapsed */
Packit f574b8
    "eth",			/* small eth, Icelandic */
Packit f574b8
    "euml",			/* small e, dieresis or umlaut mark */
Packit f574b8
    "frac12",			/* fraction 1/2 */
Packit f574b8
    "frac14",			/* fraction 1/4 */
Packit f574b8
    "frac34",			/* fraction 3/4 */
Packit f574b8
    "gt",			/* greater than */
Packit f574b8
    "hibar",			/* spacing macron */
Packit f574b8
    "iacute",			/* small i, acute accent */
Packit f574b8
    "icirc",			/* small i, circumflex accent */
Packit f574b8
    "iexcl",			/* inverted exclamation mark */
Packit f574b8
    "igrave",			/* small i, grave accent */
Packit f574b8
    "iquest",			/* inverted question mark */
Packit f574b8
    "iuml",			/* small i, dieresis or umlaut mark */
Packit f574b8
    "laquo",			/* angle quotation mark, left */
Packit f574b8
    "lt",			/* less than */
Packit f574b8
    "macr",			/* spacing macron */
Packit f574b8
    "mdash",			/* dash the width of emsp */
Packit f574b8
    "micro",			/* micro sign */
Packit f574b8
    "middot",			/* middle dot */
Packit f574b8
    "nbsp",			/* non breaking space */
Packit f574b8
    "ndash",			/* dash the width of ensp */
Packit f574b8
    "not",			/* negation sign */
Packit f574b8
    "ntilde",			/* small n, tilde */
Packit f574b8
    "oacute",			/* small o, acute accent */
Packit f574b8
    "ocirc",			/* small o, circumflex accent */
Packit f574b8
    "ograve",			/* small o, grave accent */
Packit f574b8
    "ordf",			/* feminine ordinal indicator */
Packit f574b8
    "ordm",			/* masculine ordinal indicator */
Packit f574b8
    "oslash",			/* small o, slash */
Packit f574b8
    "otilde",			/* small o, tilde */
Packit f574b8
    "ouml",			/* small o, dieresis or umlaut mark */
Packit f574b8
    "para",			/* paragraph sign */
Packit f574b8
    "plusmn",			/* plus-or-minus sign */
Packit f574b8
    "pound",			/* pound sign */
Packit f574b8
    "quot",			/* quote '"' */
Packit f574b8
    "raquo",			/* angle quotation mark, right */
Packit f574b8
    "reg",			/* circled R registered sign */
Packit f574b8
    "sect",			/* section sign */
Packit f574b8
    "shy",			/* soft hyphen */
Packit f574b8
    "sup1",			/* superscript 1 */
Packit f574b8
    "sup2",			/* superscript 2 */
Packit f574b8
    "sup3",			/* superscript 3 */
Packit f574b8
    "szlig",			/* small sharp s, German (sz ligature) */
Packit f574b8
    "thinsp",			/* thin space (not collapsed) */
Packit f574b8
    "thorn",			/* small thorn, Icelandic */
Packit f574b8
    "times",			/* multiplication sign */
Packit f574b8
    "trade",			/* trade mark sign (U+2122) */
Packit f574b8
    "uacute",			/* small u, acute accent */
Packit f574b8
    "ucirc",			/* small u, circumflex accent */
Packit f574b8
    "ugrave",			/* small u, grave accent */
Packit f574b8
    "uml",			/* spacing dieresis */
Packit f574b8
    "uuml",			/* small u, dieresis or umlaut mark */
Packit f574b8
    "yacute",			/* small y, acute accent */
Packit f574b8
    "yen",			/* yen sign */
Packit f574b8
    "yuml",			/* small y, dieresis or umlaut mark */
Packit f574b8
};
Packit f574b8
Packit f574b8
/*		Attribute Lists
Packit f574b8
 *		---------------
Packit f574b8
 *
Packit f574b8
 *	Lists must be in alphabetical order by attribute name
Packit f574b8
 *	The tag elements contain the number of attributes
Packit f574b8
 */
Packit f574b8
Packit f574b8
/* From Peter Flynn's intro to the HTML Pro DTD:
Packit f574b8
Packit f574b8
   %structure;
Packit f574b8
Packit f574b8
   DIV, CENTER, H1 to H6, P, UL, OL, DL, DIR, MENU, PRE, XMP, LISTING, BLOCKQUOTE, BQ,
Packit f574b8
   2	1	2     2   1  8	 8   8	 8    8     8	 8    8        4	   4
Packit f574b8
   MULTICOL,?NOBR, FORM, TABLE, ADDRESS, FIG, BDO, NOTE, and FN; plus?WBR, LI, and LH
Packit f574b8
   8 n	    ?1 n   8	 8	2	 2    2    2	     2	    ?1 nE  4	   4
Packit f574b8
Packit f574b8
   %insertions;
Packit f574b8
Packit f574b8
   Elements which usually contain special-purpose material, or no text material at all.
Packit f574b8
Packit f574b8
   BASEFONT, APPLET, OBJECT, EMBED, SCRIPT, MAP, MARQUEE, HR, ISINDEX, BGSOUND, TAB,?IMG,
Packit f574b8
   1 e?      2	     2 l     1 e    2 l     8	 4	  4 E 1? E     1 E	! E ?1 E
Packit f574b8
   IMAGE, BR, plus NOEMBED, SERVER, SPACER, AUDIOSCOPE, and SIDEBAR; ?area
Packit f574b8
   1 n	  1 E	     n	      n	      n	      n		      n	      8 E
Packit f574b8
Packit f574b8
   %text;
Packit f574b8
Packit f574b8
   Elements within the %structure; which directly contain running text.
Packit f574b8
Packit f574b8
   Descriptive or analytic markup: EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, Q, LANG, AU,
Packit f574b8
				   2   2       2    2	  2	2    2	  2	2  2 n	 2
Packit f574b8
   AUTHOR, PERSON, ACRONYM, ABBR, INS, DEL, and SPAN
Packit f574b8
   2	   2 n	   2	    2	    2	 2	  2
Packit f574b8
   Visual markup:S, STRIKE, I, B, TT, U,?NOBR,?WBR, BR, BIG, SMALL, FONT, STYLE, BLINK, TAB,
Packit f574b8
		 1  1	    1  1  1   1  ?1 n ?1nE? 1 E  1   1	    1	  1 l	 1	1 E?
Packit f574b8
   BLACKFACE, LIMITTEXT, NOSMARTQUOTES, and SHADOW
Packit f574b8
   1 n	      1 n	 1 n		    1 n
Packit f574b8
   Hypertext and graphics: A and?IMG
Packit f574b8
			   8	?8 E
Packit f574b8
   Mathematical: SUB, SUP, and MATH
Packit f574b8
		 4    4        4 l
Packit f574b8
   Documentary: COMMENT, ENTITY, ELEMENT, and ATTRIB
Packit f574b8
		4	 4 n	 4 n	      4 n
Packit f574b8
   %formula;
Packit f574b8
 */
Packit f574b8
Packit f574b8
/*	Elements
Packit f574b8
 *	--------
Packit f574b8
 *
Packit f574b8
 *	Must match definitions in HTMLDTD.html!
Packit f574b8
 *	Must be in alphabetical order.
Packit f574b8
 *
Packit f574b8
 *  The T_* extra info is listed here, even though most fields are not used
Packit f574b8
 *  in SGML.c if Old_DTD is set (with the exception of some Tgf_* flags).
Packit f574b8
 *  This simplifies comparison of the tags_table0[] table (otherwise unchanged
Packit f574b8
 *  from original Lynx treatment) with the tags_table1[] table below. - kw
Packit f574b8
 *
Packit f574b8
 *    Name*,	Attributes,	No. of attributes,     content,   extra info...
Packit f574b8
 */
Packit f574b8
Packit f574b8
#include <src0_HTMLDTD.h>
Packit f574b8
#include <src1_HTMLDTD.h>
Packit f574b8
Packit f574b8
/* Dummy space, will be filled with the contents of either tags_table1
Packit f574b8
   or tags_table0 on calling HTSwitchDTD - kw */
Packit f574b8
Packit f574b8
static HTTag tags[HTML_ALL_ELEMENTS];
Packit f574b8
Packit f574b8
const SGML_dtd HTML_dtd =
Packit f574b8
{
Packit f574b8
    tags,
Packit f574b8
    HTML_ELEMENTS,
Packit f574b8
    entities,			/* probably unused */
Packit f574b8
    TABLESIZE(entities),
Packit f574b8
};
Packit f574b8
Packit f574b8
/* This function fills the "tags" part of the HTML_dtd structure with
Packit f574b8
   what we want to use, either tags_table0 or tags_table1.  Note that it
Packit f574b8
   has to be called at least once before HTML_dtd is used, otherwise
Packit f574b8
   the HTML_dtd contents will be invalid!  This could be coded in a way
Packit f574b8
   that would make an initialisation call unnecessary, but my C knowledge
Packit f574b8
   is limited and I didn't want to list the whole tags_table1 table
Packit f574b8
   twice... - kw */
Packit f574b8
void HTSwitchDTD(int new_flag)
Packit f574b8
{
Packit f574b8
    if (TRACE)
Packit f574b8
	CTRACE((tfp,
Packit f574b8
		"HTMLDTD: Copying %s DTD element info of size %d, %d * %d\n",
Packit f574b8
		new_flag ? "strict" : "tagsoup",
Packit f574b8
		(int) (new_flag ? sizeof(tags_table1) : sizeof(tags_table0)),
Packit f574b8
		HTML_ALL_ELEMENTS,
Packit f574b8
		(int) sizeof(HTTag)));
Packit f574b8
    if (new_flag)
Packit f574b8
	MemCpy(tags, tags_table1, HTML_ALL_ELEMENTS * sizeof(HTTag));
Packit f574b8
    else
Packit f574b8
	MemCpy(tags, tags_table0, HTML_ALL_ELEMENTS * sizeof(HTTag));
Packit f574b8
}
Packit f574b8
Packit f574b8
HTTag HTTag_unrecognized =
Packit f574b8
Packit f574b8
{NULL_HTTag, NULL, 0, 0, SGML_EMPTY, T__UNREC_};
Packit f574b8
Packit f574b8
/*
Packit f574b8
 *	Utility Routine:  Useful for people building HTML objects.
Packit f574b8
 */
Packit f574b8
Packit f574b8
/*	Start anchor element
Packit f574b8
 *	--------------------
Packit f574b8
 *
Packit f574b8
 *	It is kinda convenient to have a particulr routine for
Packit f574b8
 *	starting an anchor element, as everything else for HTML is
Packit f574b8
 *	simple anyway.
Packit f574b8
 */
Packit f574b8
struct _HTStructured {
Packit f574b8
    HTStructuredClass *isa;
Packit f574b8
    /* ... */
Packit f574b8
};
Packit f574b8
Packit f574b8
void HTStartAnchor(HTStructured * obj, const char *name,
Packit f574b8
		   const char *href)
Packit f574b8
{
Packit f574b8
    BOOL present[HTML_A_ATTRIBUTES];
Packit f574b8
    const char *value[HTML_A_ATTRIBUTES];
Packit f574b8
    int i;
Packit f574b8
Packit f574b8
    for (i = 0; i < HTML_A_ATTRIBUTES; i++)
Packit f574b8
	present[i] = NO;
Packit f574b8
Packit f574b8
    if (name && *name) {
Packit f574b8
	present[HTML_A_NAME] = YES;
Packit f574b8
	value[HTML_A_NAME] = (const char *) name;
Packit f574b8
    }
Packit f574b8
    if (href) {
Packit f574b8
	present[HTML_A_HREF] = YES;
Packit f574b8
	value[HTML_A_HREF] = (const char *) href;
Packit f574b8
    }
Packit f574b8
Packit f574b8
    (*obj->isa->start_element) (obj, HTML_A, present, value, -1, 0);
Packit f574b8
}
Packit f574b8
Packit f574b8
void HTStartAnchor5(HTStructured * obj, const char *name,
Packit f574b8
		    const char *href,
Packit f574b8
		    const char *linktype,
Packit f574b8
		    int tag_charset)
Packit f574b8
{
Packit f574b8
    BOOL present[HTML_A_ATTRIBUTES];
Packit f574b8
    const char *value[HTML_A_ATTRIBUTES];
Packit f574b8
    int i;
Packit f574b8
Packit f574b8
    for (i = 0; i < HTML_A_ATTRIBUTES; i++)
Packit f574b8
	present[i] = NO;
Packit f574b8
Packit f574b8
    if (name && *name) {
Packit f574b8
	present[HTML_A_NAME] = YES;
Packit f574b8
	value[HTML_A_NAME] = name;
Packit f574b8
    }
Packit f574b8
    if (href && *href) {
Packit f574b8
	present[HTML_A_HREF] = YES;
Packit f574b8
	value[HTML_A_HREF] = href;
Packit f574b8
    }
Packit f574b8
    if (linktype && *linktype) {
Packit f574b8
	present[HTML_A_TYPE] = YES;
Packit f574b8
	value[HTML_A_TYPE] = linktype;
Packit f574b8
    }
Packit f574b8
Packit f574b8
    (*obj->isa->start_element) (obj, HTML_A, present, value, tag_charset, 0);
Packit f574b8
}
Packit f574b8
Packit f574b8
void HTStartIsIndex(HTStructured * obj, const char *prompt,
Packit f574b8
		    const char *href)
Packit f574b8
{
Packit f574b8
    BOOL present[HTML_ISINDEX_ATTRIBUTES];
Packit f574b8
    const char *value[HTML_ISINDEX_ATTRIBUTES];
Packit f574b8
    int i;
Packit f574b8
Packit f574b8
    for (i = 0; i < HTML_ISINDEX_ATTRIBUTES; i++)
Packit f574b8
	present[i] = NO;
Packit f574b8
Packit f574b8
    if (prompt && *prompt) {
Packit f574b8
	present[HTML_ISINDEX_PROMPT] = YES;
Packit f574b8
	value[HTML_ISINDEX_PROMPT] = (const char *) prompt;
Packit f574b8
    }
Packit f574b8
    if (href) {
Packit f574b8
	present[HTML_ISINDEX_HREF] = YES;
Packit f574b8
	value[HTML_ISINDEX_HREF] = (const char *) href;
Packit f574b8
    }
Packit f574b8
Packit f574b8
    (*obj->isa->start_element) (obj, HTML_ISINDEX, present, value, -1, 0);
Packit f574b8
}