|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* $LynxId: HTParse.h,v 1.22 2016/11/23 21:06:50 tom Exp $
|
|
Packit |
f574b8 |
* HTParse: URL parsing in the WWW Library
|
|
Packit |
f574b8 |
* HTPARSE
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This module of the WWW library contains code to parse URLs and various
|
|
Packit |
f574b8 |
* related things.
|
|
Packit |
f574b8 |
* Implemented by HTParse.c .
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#ifndef HTPARSE_H
|
|
Packit |
f574b8 |
#define HTPARSE_H
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifndef HTUTILS_H
|
|
Packit |
f574b8 |
#include <HTUtils.h>
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef __cplusplus
|
|
Packit |
f574b8 |
extern "C" {
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#define RFC_3986_UNRESERVED(c) (isalnum(UCH(c)) || strchr("-._~", UCH(c)) != 0)
|
|
Packit |
f574b8 |
#define RFC_3986_GEN_DELIMS(c) ((c) != 0 && strchr(":/?#[]@", UCH(c)) != 0)
|
|
Packit |
f574b8 |
#define RFC_3986_SUB_DELIMS(c) ((c) != 0 && strchr("!$&'()*+,;=", UCH(c)) != 0)
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* The following are flag bits which may be ORed together to form
|
|
Packit |
f574b8 |
* a number to give the 'wanted' argument to HTParse.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define PARSE_ACCESS 16
|
|
Packit |
f574b8 |
#define PARSE_HOST 8
|
|
Packit |
f574b8 |
#define PARSE_PATH 4
|
|
Packit |
f574b8 |
#define PARSE_ANCHOR 2
|
|
Packit |
f574b8 |
#define PARSE_PUNCTUATION 1
|
|
Packit |
f574b8 |
#define PARSE_ALL 31
|
|
Packit |
f574b8 |
#define PARSE_ALL_WITHOUT_ANCHOR (PARSE_ALL ^ PARSE_ANCHOR)
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Additional flag bits for more details on components already
|
|
Packit |
f574b8 |
* covered by the above. The PARSE_PATH above doesn't really
|
|
Packit |
f574b8 |
* strictly refer to the path component in the sense of the URI
|
|
Packit |
f574b8 |
* specs only, but rather to that combined with a possible query
|
|
Packit |
f574b8 |
* component. - kw
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define PARSE_STRICTPATH 32
|
|
Packit |
f574b8 |
#define PARSE_QUERY 64
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* The following are valid mask values. The terms are the BNF names
|
|
Packit |
f574b8 |
* in the URL document.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define URL_XALPHAS UCH(1)
|
|
Packit |
f574b8 |
#define URL_XPALPHAS UCH(2)
|
|
Packit |
f574b8 |
#define URL_PATH UCH(4)
|
|
Packit |
f574b8 |
/* Strip white space off a string. HTStrip()
|
|
Packit |
f574b8 |
* -------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* Return value points to first non-white character, or to 0 if none.
|
|
Packit |
f574b8 |
* All trailing white space is OVERWRITTEN with zero.
|
|
Packit |
f574b8 |
*/ extern char *HTStrip(char *s);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Parse a port number
|
|
Packit |
f574b8 |
* -------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On entry,
|
|
Packit |
f574b8 |
* host A pointer to hostname possibly followed by port
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* returns A pointer to the ":" before the port
|
|
Packit |
f574b8 |
* sets the port number via the pointer portp.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern char *HTParsePort(char *host, int *portp);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Parse a Name relative to another name. HTParse()
|
|
Packit |
f574b8 |
* --------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This returns those parts of a name which are given (and requested)
|
|
Packit |
f574b8 |
* substituting bits from the related name where necessary.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On entry,
|
|
Packit |
f574b8 |
* aName A filename given
|
|
Packit |
f574b8 |
* relatedName A name relative to which aName is to be parsed
|
|
Packit |
f574b8 |
* wanted A mask for the bits which are wanted.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* returns A pointer to a malloc'd string which MUST BE FREED
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern char *HTParse(const char *aName,
|
|
Packit |
f574b8 |
const char *relatedName,
|
|
Packit |
f574b8 |
int wanted);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* HTParseAnchor(), fast HTParse() specialization
|
|
Packit |
f574b8 |
* ----------------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* returns A pointer within input string (probably to its end '\0')
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern const char *HTParseAnchor(const char *aName);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Simplify a filename. HTSimplify()
|
|
Packit |
f574b8 |
* --------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* A unix-style file is allowed to contain the seqeunce xxx/../ which may
|
|
Packit |
f574b8 |
* be replaced by "" , and the seqeunce "/./" which may be replaced by "/".
|
|
Packit |
f574b8 |
* Simplification helps us recognize duplicate filenames.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Thus, /etc/junk/../fred becomes /etc/fred
|
|
Packit |
f574b8 |
* /etc/junk/./fred becomes /etc/junk/fred
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* but we should NOT change
|
|
Packit |
f574b8 |
* http://fred.xxx.edu/../..
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* or ../../albert.html
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern void HTSimplify(char *filename);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Make Relative Name. HTRelative()
|
|
Packit |
f574b8 |
* -------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function creates and returns a string which gives an expression of
|
|
Packit |
f574b8 |
* one address as related to another. Where there is no relation, an absolute
|
|
Packit |
f574b8 |
* address is retured.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On entry,
|
|
Packit |
f574b8 |
* Both names must be absolute, fully qualified names of nodes
|
|
Packit |
f574b8 |
* (no anchor bits)
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* The return result points to a newly allocated name which, if
|
|
Packit |
f574b8 |
* parsed by HTParse relative to relatedName, will yield aName.
|
|
Packit |
f574b8 |
* The caller is responsible for freeing the resulting name later.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern char *HTRelative(const char *aName,
|
|
Packit |
f574b8 |
const char *relatedName);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Escape undesirable characters using % HTEscape()
|
|
Packit |
f574b8 |
* -------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which
|
|
Packit |
f574b8 |
* some characters may be unacceptable are unescaped.
|
|
Packit |
f574b8 |
* It returns a string which has these characters
|
|
Packit |
f574b8 |
* represented by a '%' character followed by two hex digits.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Unlike HTUnEscape(), this routine returns a malloc'd string.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern char *HTEscape(const char *str,
|
|
Packit |
f574b8 |
unsigned mask);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Escape unsafe characters using % HTEscapeUnsafe()
|
|
Packit |
f574b8 |
* --------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which
|
|
Packit |
f574b8 |
* some characters may be that may be unsafe are unescaped.
|
|
Packit |
f574b8 |
* It returns a string which has these characters
|
|
Packit |
f574b8 |
* represented by a '%' character followed by two hex digits.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Unlike HTUnEscape(), this routine returns a malloc'd string.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern char *HTEscapeUnsafe(const char *str);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Escape undesirable characters using % but space to +. HTEscapeSP()
|
|
Packit |
f574b8 |
* -----------------------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which
|
|
Packit |
f574b8 |
* some characters may be unacceptable are unescaped.
|
|
Packit |
f574b8 |
* It returns a string which has these characters
|
|
Packit |
f574b8 |
* represented by a '%' character followed by two hex digits,
|
|
Packit |
f574b8 |
* except that spaces are converted to '+' instead of %2B.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Unlike HTUnEscape(), this routine returns a malloc'd string.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern char *HTEscapeSP(const char *str,
|
|
Packit |
f574b8 |
unsigned mask);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Decode %xx escaped characters. HTUnEscape()
|
|
Packit |
f574b8 |
* ------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which some
|
|
Packit |
f574b8 |
* characters may have been encoded in %xy form, where xy is
|
|
Packit |
f574b8 |
* the acsii hex code for character 16x+y.
|
|
Packit |
f574b8 |
* The string is converted in place, as it will never grow.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern char *HTUnEscape(char *str);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Decode some %xx escaped characters. HTUnEscapeSome()
|
|
Packit |
f574b8 |
* ----------------------------------- Klaus Weide
|
|
Packit |
f574b8 |
* (kweide@tezcat.com)
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which some
|
|
Packit |
f574b8 |
* characters may have been encoded in %xy form, where xy is
|
|
Packit |
f574b8 |
* the acsii hex code for character 16x+y, and a pointer to
|
|
Packit |
f574b8 |
* a second string containing one or more characters which
|
|
Packit |
f574b8 |
* should be unescaped if escaped in the first string.
|
|
Packit |
f574b8 |
* The first string is converted in place, as it will never grow.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern char *HTUnEscapeSome(char *str,
|
|
Packit |
f574b8 |
const char *do_trans);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Turn a string which is not a RFC 822 token into a quoted-string. - KW
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
extern void HTMake822Word(char **str,
|
|
Packit |
f574b8 |
int quoted);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef __cplusplus
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
#endif /* HTPARSE_H */
|