|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* entities.c
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Copyright (c) Chris Putnam 2003-2018
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Source code released under the GPL version 2
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
#include <stdio.h>
|
|
Packit |
89ede9 |
#include <string.h>
|
|
Packit |
89ede9 |
#include <ctype.h>
|
|
Packit |
89ede9 |
#include "entities.h"
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* HTML 4.0 entities */
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
typedef struct entities {
|
|
Packit |
89ede9 |
char html[20];
|
|
Packit |
89ede9 |
unsigned int unicode;
|
|
Packit |
89ede9 |
} entities;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
entities html_entities[] = {
|
|
Packit |
89ede9 |
/* Special Entities */
|
|
Packit |
89ede9 |
{ """, 34 }, /* quotation mark */
|
|
Packit |
89ede9 |
{ "&", 38 }, /* ampersand */
|
|
Packit |
89ede9 |
{ "'", 39 }, /* apostrophe (note not defined in HTML) */
|
|
Packit |
89ede9 |
{ "(", 40 }, /* left parenthesis */
|
|
Packit |
89ede9 |
{ ")", 41 }, /* right parenthesis */
|
|
Packit |
89ede9 |
{ "‐", 45 }, /* hyphen */
|
|
Packit |
89ede9 |
{ "<", 60 }, /* less-than sign */
|
|
Packit |
89ede9 |
{ ">", 62 }, /* greater-than sign */
|
|
Packit |
89ede9 |
{ "?", 63 }, /* question mark */
|
|
Packit |
89ede9 |
{ "Œ", 338 }, /* Latin cap ligature OE */
|
|
Packit |
89ede9 |
{ "œ", 339 }, /* Latin small ligature OE */
|
|
Packit |
89ede9 |
{ "Š", 352 }, /* Latin cap S with caron */
|
|
Packit |
89ede9 |
{ "š", 353 }, /* Latin cap S with caron */
|
|
Packit |
89ede9 |
{ "Ÿ", 376 }, /* Latin cap y with diaeresis */
|
|
Packit |
89ede9 |
{ "ˆ", 710 }, /* modifier letter circumflex */
|
|
Packit |
89ede9 |
{ "˜", 732 }, /* small tilde */
|
|
Packit |
89ede9 |
{ " ", 8194 }, /* en space */
|
|
Packit |
89ede9 |
{ " ", 8195 }, /* em space */
|
|
Packit |
89ede9 |
{ " ", 8201 }, /* thin space */
|
|
Packit |
89ede9 |
{ "", 8204 }, /* zero width non-joiner */
|
|
Packit |
89ede9 |
{ "", 8205 }, /* zero width joiner */
|
|
Packit |
89ede9 |
{ "", 8206 }, /* left-to-right mark */
|
|
Packit |
89ede9 |
{ "", 8207 }, /* right-to-left mark */
|
|
Packit |
89ede9 |
{ "–", 8211 }, /* en dash */
|
|
Packit |
89ede9 |
{ "—", 8212 }, /* em dash */
|
|
Packit |
89ede9 |
{ "‘", 8216 }, /* left single quotation mark */
|
|
Packit |
89ede9 |
{ "’", 8217 }, /* right single quot. mark */
|
|
Packit |
89ede9 |
{ "‚", 8218 }, /* single low-9 quot. mark */
|
|
Packit |
89ede9 |
{ "“", 8220 }, /* left double quot. mark */
|
|
Packit |
89ede9 |
{ "”", 8221 }, /* right double quot. mark */
|
|
Packit |
89ede9 |
{ "„", 8222 }, /* double low-9 quot. mark */
|
|
Packit |
89ede9 |
{ "†", 8224 }, /* dagger */
|
|
Packit |
89ede9 |
{ "‡", 8225 }, /* double dagger */
|
|
Packit |
89ede9 |
{ "‰", 8240 }, /* per mille sign */
|
|
Packit |
89ede9 |
{ "‹", 8249 }, /* sin. left angle quot mark */
|
|
Packit |
89ede9 |
{ "›", 8250 }, /* sin. right angle quot mark */
|
|
Packit |
89ede9 |
{ "€", 8364 }, /* euro sign */
|
|
Packit |
89ede9 |
/* Symbols and Greek characters */
|
|
Packit |
89ede9 |
{ "ƒ", 402 }, /* small f with hook = function */
|
|
Packit |
89ede9 |
{ "Α", 913 }, /* capital alpha */
|
|
Packit |
89ede9 |
{ "Β", 914 }, /* capital beta */
|
|
Packit |
89ede9 |
{ "Γ", 915 }, /* capital gamma */
|
|
Packit |
89ede9 |
{ "Δ", 916 }, /* capital delta */
|
|
Packit |
89ede9 |
{ "Ε", 917 }, /* capital epsilon */
|
|
Packit |
89ede9 |
{ "Ζ", 918 }, /* capital zeta */
|
|
Packit |
89ede9 |
{ "Η", 919 }, /* capital eta */
|
|
Packit |
89ede9 |
{ "Θ", 920 }, /* capital theta */
|
|
Packit |
89ede9 |
{ "Ι", 921 }, /* capital iota */
|
|
Packit |
89ede9 |
{ "Κ", 922 }, /* capital kappa */
|
|
Packit |
89ede9 |
{ "Λ", 923 }, /* capital lambda */
|
|
Packit |
89ede9 |
{ "Μ", 924 }, /* capital mu */
|
|
Packit |
89ede9 |
{ "Ν", 925 }, /* capital nu */
|
|
Packit |
89ede9 |
{ "Ξ", 926 }, /* capital xi */
|
|
Packit |
89ede9 |
{ "Ο", 927 }, /* capital omicron */
|
|
Packit |
89ede9 |
{ "Π", 928 }, /* capital pi */
|
|
Packit |
89ede9 |
{ "Ρ", 929 }, /* capital rho */
|
|
Packit |
89ede9 |
{ "Σ", 931 }, /* capital sigma */
|
|
Packit |
89ede9 |
{ "Τ", 932 }, /* capital tau */
|
|
Packit |
89ede9 |
{ "Υ", 933 }, /* capital upsilon */
|
|
Packit |
89ede9 |
{ "Φ", 934 }, /* capital phi */
|
|
Packit |
89ede9 |
{ "Χ", 935 }, /* capital chi */
|
|
Packit |
89ede9 |
{ "Ψ", 936 }, /* capital psi */
|
|
Packit |
89ede9 |
{ "Ω", 937 }, /* capital omega */
|
|
Packit |
89ede9 |
{ "α", 945 }, /* small alpha */
|
|
Packit |
89ede9 |
{ "β", 946 }, /* small beta */
|
|
Packit |
89ede9 |
{ "γ", 947 }, /* small gamma */
|
|
Packit |
89ede9 |
{ "δ", 948 }, /* small delta */
|
|
Packit |
89ede9 |
{ "ε", 949 }, /* small epsilon */
|
|
Packit |
89ede9 |
{ "ζ", 950 }, /* small zeta */
|
|
Packit |
89ede9 |
{ "η", 951 }, /* small eta */
|
|
Packit |
89ede9 |
{ "θ", 952 }, /* small theta */
|
|
Packit |
89ede9 |
{ "ι", 953 }, /* small iota */
|
|
Packit |
89ede9 |
{ "κ", 954 }, /* small kappa */
|
|
Packit |
89ede9 |
{ "λ", 955 }, /* small lambda */
|
|
Packit |
89ede9 |
{ "μ", 956 }, /* small mu */
|
|
Packit |
89ede9 |
{ "ν", 957 }, /* small nu */
|
|
Packit |
89ede9 |
{ "ξ", 958 }, /* small xi */
|
|
Packit |
89ede9 |
{ "ο", 959 }, /* small omicron */
|
|
Packit |
89ede9 |
{ "π", 960 }, /* small pi */
|
|
Packit |
89ede9 |
{ "ρ", 961 }, /* small rho */
|
|
Packit |
89ede9 |
{ "ς", 962 }, /* small final sigma */
|
|
Packit |
89ede9 |
{ "σ", 963 }, /* small simga */
|
|
Packit |
89ede9 |
{ "τ", 964 }, /* small tau */
|
|
Packit |
89ede9 |
{ "υ", 965 }, /* small upsilon */
|
|
Packit |
89ede9 |
{ "φ", 966 }, /* small phi */
|
|
Packit |
89ede9 |
{ "χ", 967 }, /* small chi */
|
|
Packit |
89ede9 |
{ "ψ", 968 }, /* small psi */
|
|
Packit |
89ede9 |
{ "ω", 969 }, /* small omega */
|
|
Packit |
89ede9 |
{ "ϑ",977 }, /* small theta symbol */
|
|
Packit |
89ede9 |
{ "ϒ", 978 }, /* small upsilon with hook */
|
|
Packit |
89ede9 |
{ "ϖ", 982 }, /* pi symbol */
|
|
Packit |
89ede9 |
{ "•", 8226 }, /* bullet = small blk circle */
|
|
Packit |
89ede9 |
{ "…", 8230 }, /* horizontal ellipsis */
|
|
Packit |
89ede9 |
{ "′", 8242 }, /* prime = minutes = feet */
|
|
Packit |
89ede9 |
{ "″", 8243 }, /* double prime */
|
|
Packit |
89ede9 |
{ "‾", 8254 }, /* overline */
|
|
Packit |
89ede9 |
{ "⁄", 8260 }, /* fraction slash */
|
|
Packit |
89ede9 |
{ "℘", 8472 }, /* Weierstrass p = power set */
|
|
Packit |
89ede9 |
{ "ℑ", 8465 }, /* imaginary part-black cap I */
|
|
Packit |
89ede9 |
{ "ℜ", 8476 }, /* real part-black cap R */
|
|
Packit |
89ede9 |
{ "™", 8482 }, /* trademark sign */
|
|
Packit |
89ede9 |
{ "ℵ",8501 }, /* alef symbol */
|
|
Packit |
89ede9 |
{ "←", 8592 }, /* left arrow */
|
|
Packit |
89ede9 |
{ "↑", 8593 }, /* up arrow */
|
|
Packit |
89ede9 |
{ "→", 8594 }, /* right arrow */
|
|
Packit |
89ede9 |
{ "↓", 8595 }, /* down arrow */
|
|
Packit |
89ede9 |
{ "↔", 8596 }, /* left/right arrow */
|
|
Packit |
89ede9 |
{ "↵", 8629 }, /* down arrow with corner left */
|
|
Packit |
89ede9 |
{ "⇐", 8656 }, /* left double arrow */
|
|
Packit |
89ede9 |
{ "⇑", 8657 }, /* up double arrow */
|
|
Packit |
89ede9 |
{ "⇒", 8658 }, /* up double arrow */
|
|
Packit |
89ede9 |
{ "⇓", 8659 }, /* up double arrow */
|
|
Packit |
89ede9 |
{ "⇔", 8660 }, /* up double arrow */
|
|
Packit |
89ede9 |
{ "∀", 8704}, /* for all */
|
|
Packit |
89ede9 |
{ "∂", 8706}, /* partial differential */
|
|
Packit |
89ede9 |
{ "∃", 8707}, /* there exists */
|
|
Packit |
89ede9 |
{ "∅", 8709}, /* empty set */
|
|
Packit |
89ede9 |
{ "∇", 8711}, /* nabla=backwards difference */
|
|
Packit |
89ede9 |
{ "∈", 8712}, /* element of */
|
|
Packit |
89ede9 |
{ "∉", 8713}, /* not an element of */
|
|
Packit |
89ede9 |
{ "∋", 8715}, /* contains as member */
|
|
Packit |
89ede9 |
{ "∏", 8719}, /* n-ary product */
|
|
Packit |
89ede9 |
{ "∑", 8721}, /* n-ary summation */
|
|
Packit |
89ede9 |
{ "−", 8722}, /* minuss sign */
|
|
Packit |
89ede9 |
{ "∗", 8727}, /* asterisk operator */
|
|
Packit |
89ede9 |
{ "√", 8730}, /* square root */
|
|
Packit |
89ede9 |
{ "∝", 8733}, /* proportional to */
|
|
Packit |
89ede9 |
{ "∞", 8734}, /* infinity */
|
|
Packit |
89ede9 |
{ "∠", 8736}, /* angle */
|
|
Packit |
89ede9 |
{ "∧", 8743}, /* logical and */
|
|
Packit |
89ede9 |
{ "∨", 8744}, /* logical or */
|
|
Packit |
89ede9 |
{ "∩", 8745}, /* intersection */
|
|
Packit |
89ede9 |
{ "∪", 8746}, /* union */
|
|
Packit |
89ede9 |
{ "∫", 8747}, /* integral */
|
|
Packit |
89ede9 |
{ "∴", 8756}, /* therefore */
|
|
Packit |
89ede9 |
{ "∼", 8764}, /* tilde operator */
|
|
Packit |
89ede9 |
{ "≅", 8773}, /* approximately equal to */
|
|
Packit |
89ede9 |
{ "≈", 8776}, /* asymptotic to */
|
|
Packit |
89ede9 |
{ "≠", 8800}, /* not equal to */
|
|
Packit |
89ede9 |
{ "≡", 8801}, /* identical to */
|
|
Packit |
89ede9 |
{ "≤", 8804}, /* less-than or equal to */
|
|
Packit |
89ede9 |
{ "≥", 8805}, /* greater-than or equal to */
|
|
Packit |
89ede9 |
{ "⊂", 8834}, /* subset of */
|
|
Packit |
89ede9 |
{ "⊃", 8835}, /* superset of */
|
|
Packit |
89ede9 |
{ "⊄", 8836}, /* not a subset of */
|
|
Packit |
89ede9 |
{ "⊆", 8838}, /* subset of or equal to */
|
|
Packit |
89ede9 |
{ "⊇", 8839}, /* superset of or equal to */
|
|
Packit |
89ede9 |
{ "⊕", 8853}, /* circled plus = direct sum */
|
|
Packit |
89ede9 |
{ "⊗", 8855}, /* circled times = vec prod */
|
|
Packit |
89ede9 |
{ "⊥", 8869}, /* perpendicular */
|
|
Packit |
89ede9 |
{ "⋅", 8901}, /* dot operator */
|
|
Packit |
89ede9 |
{ "⌈", 8968}, /* left ceiling */
|
|
Packit |
89ede9 |
{ "⌉", 8969}, /* right ceiling */
|
|
Packit |
89ede9 |
{ "⌊", 8970}, /* left floor */
|
|
Packit |
89ede9 |
{ "⌋", 8971}, /* right floor */
|
|
Packit |
89ede9 |
{ "〈", 9001}, /* left angle bracket */
|
|
Packit |
89ede9 |
{ "〉", 9002}, /* right angle bracket */
|
|
Packit |
89ede9 |
{ "◊", 9674}, /* lozenge */
|
|
Packit |
89ede9 |
{ "♠", 9824}, /* spades */
|
|
Packit |
89ede9 |
{ "♣", 9827}, /* clubs */
|
|
Packit |
89ede9 |
{ "♥", 9829}, /* hearts */
|
|
Packit |
89ede9 |
{ "♦", 9830}, /* diamonds */
|
|
Packit |
89ede9 |
/* Latin-1 */
|
|
Packit |
89ede9 |
{ " ", 32 }, /* non-breaking space */
|
|
Packit |
89ede9 |
{ "¡", 161 }, /* inverted exclamation mark */
|
|
Packit |
89ede9 |
{ "¢", 162 }, /* cent sign */
|
|
Packit |
89ede9 |
{ "£", 163 }, /* pound sign */
|
|
Packit |
89ede9 |
{ "¤", 164 }, /* currency sign */
|
|
Packit |
89ede9 |
{ "¥", 165 }, /* yen sign */
|
|
Packit |
89ede9 |
{ "¦", 166 }, /* broken vertical bar */
|
|
Packit |
89ede9 |
{ "§", 167 }, /* section sign */
|
|
Packit |
89ede9 |
{ "¨", 168 }, /* diaeresis - spacing diaeresis */
|
|
Packit |
89ede9 |
{ "©", 169 }, /* copyright sign */
|
|
Packit |
89ede9 |
{ "ª", 170 }, /* feminine ordinal indicator */
|
|
Packit |
89ede9 |
{ "«", 171 }, /* left-pointing guillemet */
|
|
Packit |
89ede9 |
{ "¬", 172 }, /* not sign */
|
|
Packit |
89ede9 |
{ "", 173 }, /* soft (discretionary) hyphen */
|
|
Packit |
89ede9 |
{ "®", 174 }, /* registered sign */
|
|
Packit |
89ede9 |
{ "¯", 175 }, /* macron = overline */
|
|
Packit |
89ede9 |
{ "°", 176 }, /* degree sign */
|
|
Packit |
89ede9 |
{ "±", 177 }, /* plus-minus sign */
|
|
Packit |
89ede9 |
{ "²", 178 }, /* superscript two */
|
|
Packit |
89ede9 |
{ "³", 179 }, /* superscript three */
|
|
Packit |
89ede9 |
{ "´", 180 }, /* acute accent = spacing acute */
|
|
Packit |
89ede9 |
{ "µ", 181 }, /* micro sign */
|
|
Packit |
89ede9 |
{ "¶", 182 }, /* pilcrow (paragraph) sign */
|
|
Packit |
89ede9 |
{ "·", 183 }, /* middle dot (georgian comma) */
|
|
Packit |
89ede9 |
{ "¸", 184 }, /* cedilla = spacing cedilla */
|
|
Packit |
89ede9 |
{ "¹", 185 }, /* superscript one */
|
|
Packit |
89ede9 |
{ "º", 186 }, /* masculine ordinal indicator */
|
|
Packit |
89ede9 |
{ "»", 187 }, /* right pointing guillemet */
|
|
Packit |
89ede9 |
{ "¼", 188 }, /* 1/4 */
|
|
Packit |
89ede9 |
{ "½", 189 }, /* 1/2 */
|
|
Packit |
89ede9 |
{ "¾", 190 }, /* 3/4 */
|
|
Packit |
89ede9 |
{ "¿", 191 }, /* inverted question mark */
|
|
Packit |
89ede9 |
{ "À", 192 }, /* cap A with grave */
|
|
Packit |
89ede9 |
{ "Á", 193 }, /* cap A with acute */
|
|
Packit |
89ede9 |
{ "Â", 194 }, /* cap A with circumflex */
|
|
Packit |
89ede9 |
{ "Ã", 195 }, /* cap A with tilde */
|
|
Packit |
89ede9 |
{ "Ä", 196 }, /* cap A with diaeresis */
|
|
Packit |
89ede9 |
{ "Å", 197 }, /* cap A with ring */
|
|
Packit |
89ede9 |
{ "Æ", 198 }, /* cap AE ligature */
|
|
Packit |
89ede9 |
{ "Ç", 199 }, /* cap C with cedilla */
|
|
Packit |
89ede9 |
{ "È", 200 }, /* cap E with grave */
|
|
Packit |
89ede9 |
{ "É", 201 }, /* cap E with acute */
|
|
Packit |
89ede9 |
{ "Ê", 202 }, /* cap E with circumflex */
|
|
Packit |
89ede9 |
{ "Ë", 203 }, /* cap E with diaeresis */
|
|
Packit |
89ede9 |
{ "Ì", 204 }, /* cap I with grave */
|
|
Packit |
89ede9 |
{ "Í", 205 }, /* cap I with acute */
|
|
Packit |
89ede9 |
{ "Î", 206 }, /* cap I with circumflex */
|
|
Packit |
89ede9 |
{ "Ï", 207 }, /* cap I with diaeresis */
|
|
Packit |
89ede9 |
{ "Ð", 208 }, /* cap letter ETH */
|
|
Packit |
89ede9 |
{ "Ñ", 209 }, /* cap N with tilde */
|
|
Packit |
89ede9 |
{ "Ò", 210 }, /* cap O with grave */
|
|
Packit |
89ede9 |
{ "Ó", 211 }, /* cap O with acute */
|
|
Packit |
89ede9 |
{ "Ô", 212 }, /* cap O with circumflex */
|
|
Packit |
89ede9 |
{ "Õ", 213 }, /* cap O with tilde */
|
|
Packit |
89ede9 |
{ "Ö", 214 }, /* cap O with diaeresis */
|
|
Packit |
89ede9 |
{ "×", 215 }, /* multiplication sign */
|
|
Packit |
89ede9 |
{ "Ø", 216 }, /* cap O with stroke */
|
|
Packit |
89ede9 |
{ "Ù", 217 }, /* cap U with grave */
|
|
Packit |
89ede9 |
{ "Ú", 218 }, /* cap U with acute */
|
|
Packit |
89ede9 |
{ "Û", 219 }, /* cap U with circumflex */
|
|
Packit |
89ede9 |
{ "Ü", 220 }, /* cap U with diaeresis */
|
|
Packit |
89ede9 |
{ "Ý", 221 }, /* cap Y with acute */
|
|
Packit |
89ede9 |
{ "Þ", 222 }, /* cap letter THORN */
|
|
Packit |
89ede9 |
{ "ß", 223 }, /* small sharp s = ess-zed */
|
|
Packit |
89ede9 |
{ "à", 224 }, /* small a with grave */
|
|
Packit |
89ede9 |
{ "á", 225 }, /* small a with acute */
|
|
Packit |
89ede9 |
{ "â", 226 }, /* small a with cirucmflex */
|
|
Packit |
89ede9 |
{ "ã", 227 }, /* small a with tilde */
|
|
Packit |
89ede9 |
{ "&amu;;", 228 }, /* small a with diaeresis */
|
|
Packit |
89ede9 |
{ "å", 229 }, /* small a with ring */
|
|
Packit |
89ede9 |
{ "æ", 230 }, /* small ligature ae */
|
|
Packit |
89ede9 |
{ "ç", 231 }, /* small c with cedilla */
|
|
Packit |
89ede9 |
{ "è", 232 }, /* small e with grave */
|
|
Packit |
89ede9 |
{ "é", 233 }, /* small e with acute */
|
|
Packit |
89ede9 |
{ "ê", 234 }, /* small e with circumflex */
|
|
Packit |
89ede9 |
{ "&emu;;", 235 }, /* small e with diaeresis */
|
|
Packit |
89ede9 |
{ "ì", 236 }, /* small i with grave */
|
|
Packit |
89ede9 |
{ "í", 237 }, /* small i with acute */
|
|
Packit |
89ede9 |
{ "î", 238 }, /* small i with circumflex */
|
|
Packit |
89ede9 |
{ "ï", 239 }, /* small i with diaeresis */
|
|
Packit |
89ede9 |
{ "ð", 240 }, /* latin small letter eth */
|
|
Packit |
89ede9 |
{ "ñ", 241 }, /* small n with tilde */
|
|
Packit |
89ede9 |
{ "ò", 242 }, /* small o with grave */
|
|
Packit |
89ede9 |
{ "ó", 243 }, /* small o with acute */
|
|
Packit |
89ede9 |
{ "ô", 244 }, /* small o with circumflex */
|
|
Packit |
89ede9 |
{ "õ", 245 }, /* small o with tilde */
|
|
Packit |
89ede9 |
{ "ö", 246 }, /* small o with diaeresis */
|
|
Packit |
89ede9 |
{ "÷", 247 }, /* division sign */
|
|
Packit |
89ede9 |
{ "ø", 248 }, /* small o with slash */
|
|
Packit |
89ede9 |
{ "ù", 249 }, /* small u with grave */
|
|
Packit |
89ede9 |
{ "ú", 250 }, /* small u with acute */
|
|
Packit |
89ede9 |
{ "û", 251 }, /* small u with circumflex */
|
|
Packit |
89ede9 |
{ "ü", 252 }, /* small u with diaeresis */
|
|
Packit |
89ede9 |
{ "ý", 253 }, /* small y with acute */
|
|
Packit |
89ede9 |
{ "þ", 254 }, /* latin small letter thorn */
|
|
Packit |
89ede9 |
{ "ÿ", 255 }, /* small y with diaeresis */
|
|
Packit |
89ede9 |
};
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static unsigned int
|
|
Packit |
89ede9 |
decode_html_entity( char *s, unsigned int *pi, int *err )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int nhtml_entities = sizeof( html_entities ) / sizeof( entities );
|
|
Packit |
89ede9 |
char *e;
|
|
Packit |
89ede9 |
int i, n=-1, len;
|
|
Packit |
89ede9 |
for ( i=0; i
|
|
Packit |
89ede9 |
e = &(html_entities[i].html[0]);
|
|
Packit |
89ede9 |
len = strlen( e );
|
|
Packit |
89ede9 |
if ( !strncasecmp( &(s[*pi]), e, len ) ) {
|
|
Packit |
89ede9 |
n = i;
|
|
Packit |
89ede9 |
*pi += len;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( n==-1 ) {
|
|
Packit |
89ede9 |
*err = 1;
|
|
Packit |
89ede9 |
return '&';
|
|
Packit |
89ede9 |
} else {
|
|
Packit |
89ede9 |
*err = 0;
|
|
Packit |
89ede9 |
return html_entities[n].unicode;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* decode decimal entity
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* extract a decimal entity from &#NNNN;
|
|
Packit |
89ede9 |
* s[*pi] points to the '&' character
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static unsigned int
|
|
Packit |
89ede9 |
decode_decimal_entity( char *s, unsigned int *pi, int *err )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
unsigned int c = 0, d;
|
|
Packit |
89ede9 |
int i = *pi, j = 2;
|
|
Packit |
89ede9 |
while ( isdigit( (unsigned char)s[i+j] ) ) {
|
|
Packit |
89ede9 |
d = s[i+j] - '0';
|
|
Packit |
89ede9 |
c = 10 * c + d;
|
|
Packit |
89ede9 |
j++;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( s[i+j]!=';' ) *err = 1;
|
|
Packit |
89ede9 |
else *pi = i+j+1;
|
|
Packit |
89ede9 |
return c;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* decode hex entity
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* extract a hex entity from &#xNNNN;
|
|
Packit |
89ede9 |
* s[*pi] points to the '&' character
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static unsigned int
|
|
Packit |
89ede9 |
decode_hex_entity( char *s, unsigned int *pi, int *err )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
unsigned int c = 0, d;
|
|
Packit |
89ede9 |
int i = *pi, j = 3;
|
|
Packit |
89ede9 |
while ( isxdigit( (unsigned char)s[i+j] ) ) {
|
|
Packit |
89ede9 |
if ( isdigit( (unsigned char)s[i+j] ) ) d = s[i+j]-'0';
|
|
Packit |
89ede9 |
else d = toupper((unsigned char)s[i+j])-'A' + 10;
|
|
Packit |
89ede9 |
c = 16 * c + d;
|
|
Packit |
89ede9 |
j++;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( s[i+j]!=';' ) *err = 1;
|
|
Packit |
89ede9 |
else *pi = i+j+1;
|
|
Packit |
89ede9 |
return c;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* decode numeric entity
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* extract a numeric entity from &#NNN; or &#xNNNN;
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* In XML, the "x" in hexadecimal entries should be lowercase,
|
|
Packit |
89ede9 |
* but we'll be generous and accept "X" as well.
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static unsigned int
|
|
Packit |
89ede9 |
decode_numeric_entity( char *s, unsigned int *pi, int *err )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
unsigned int c;
|
|
Packit |
89ede9 |
*err = 0;
|
|
Packit |
89ede9 |
if ( s[*pi+2]!='x' && s[*pi+2]!='X' ) c = decode_decimal_entity( s, pi, err );
|
|
Packit |
89ede9 |
else c = decode_hex_entity( s, pi, err );
|
|
Packit |
89ede9 |
if ( *err ) {
|
|
Packit |
89ede9 |
*pi = *pi + 1;
|
|
Packit |
89ede9 |
c = '&';
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return c;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* decode entity
|
|
Packit |
89ede9 |
* extract entity from &mmmm;
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* where &mmmm; is one of
|
|
Packit |
89ede9 |
* - &#nnnn; is code point in decimal form
|
|
Packit |
89ede9 |
* - &#xhhhh; is code point in hexadecimal form (note "x" is lowercase in XML)
|
|
Packit |
89ede9 |
* - &mmmm; corresponds to a pre-defined XML entity, e.g. "e for quotations
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
unsigned int
|
|
Packit |
89ede9 |
decode_entity( char *s, unsigned int *pi, int *unicode, int *err )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
unsigned int c = '&';
|
|
Packit |
89ede9 |
*unicode = 0;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( s[*pi]!='&' ) {
|
|
Packit |
89ede9 |
*err = 1; /* need to start with ampersand */
|
|
Packit |
89ede9 |
c = s[*pi];
|
|
Packit |
89ede9 |
} else *err = 0;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( !*err ) {
|
|
Packit |
89ede9 |
if ( s[*pi+1]=='#' ) c = decode_numeric_entity( s, pi, err );
|
|
Packit |
89ede9 |
else {
|
|
Packit |
89ede9 |
c = decode_html_entity( s, pi, err );
|
|
Packit |
89ede9 |
*unicode = 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( *err ) *pi = *pi + 1;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return c;
|
|
Packit |
89ede9 |
}
|