|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* $LynxId: HTParse.c,v 1.78 2016/11/24 15:29:50 tom Exp $
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Parse HyperText Document Address HTParse.c
|
|
Packit |
f574b8 |
* ================================
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#include <HTUtils.h>
|
|
Packit |
f574b8 |
#include <HTParse.h>
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#include <LYUtils.h>
|
|
Packit |
f574b8 |
#include <LYLeaks.h>
|
|
Packit |
f574b8 |
#include <LYStrings.h>
|
|
Packit |
f574b8 |
#include <LYCharUtils.h>
|
|
Packit |
f574b8 |
#include <LYGlobalDefs.h>
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef HAVE_ALLOCA_H
|
|
Packit |
f574b8 |
#include <alloca.h>
|
|
Packit |
f574b8 |
#else
|
|
Packit |
f574b8 |
#ifdef __MINGW32__
|
|
Packit |
f574b8 |
#include <malloc.h>
|
|
Packit |
f574b8 |
#endif /* __MINGW32__ */
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef USE_IDNA
|
|
Packit |
f574b8 |
#include <idna.h>
|
|
Packit |
f574b8 |
#include <idn-free.h>
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#define HEX_ESCAPE '%'
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
struct struct_parts {
|
|
Packit |
f574b8 |
char *access;
|
|
Packit |
f574b8 |
char *host;
|
|
Packit |
f574b8 |
char *absolute;
|
|
Packit |
f574b8 |
char *relative;
|
|
Packit |
f574b8 |
char *search; /* treated normally as part of path */
|
|
Packit |
f574b8 |
char *anchor;
|
|
Packit |
f574b8 |
};
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#if 0 /* for debugging */
|
|
Packit |
f574b8 |
static void show_parts(const char *name, struct struct_parts *parts, int line)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
if (TRACE) {
|
|
Packit |
f574b8 |
CTRACE((tfp, "struct_parts(%s) %s@%d\n", name, __FILE__, line));
|
|
Packit |
f574b8 |
CTRACE((tfp, " access '%s'\n", NONNULL(parts->access)));
|
|
Packit |
f574b8 |
CTRACE((tfp, " host '%s'\n", NONNULL(parts->host)));
|
|
Packit |
f574b8 |
CTRACE((tfp, " absolute '%s'\n", NONNULL(parts->absolute)));
|
|
Packit |
f574b8 |
CTRACE((tfp, " relative '%s'\n", NONNULL(parts->relative)));
|
|
Packit |
f574b8 |
CTRACE((tfp, " search '%s'\n", NONNULL(parts->search)));
|
|
Packit |
f574b8 |
CTRACE((tfp, " anchor '%s'\n", NONNULL(parts->anchor)));
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#define SHOW_PARTS(name) show_parts(#name, &name, __LINE__)
|
|
Packit |
f574b8 |
#else
|
|
Packit |
f574b8 |
#define SHOW_PARTS(name) /* nothing */
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Strip white space off a string. HTStrip()
|
|
Packit |
f574b8 |
* -------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* Return value points to first non-white character, or to 0 if none.
|
|
Packit |
f574b8 |
* All trailing white space is OVERWRITTEN with zero.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
char *HTStrip(char *s)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
#define SPACE(c) ((c == ' ') || (c == '\t') || (c == '\n'))
|
|
Packit |
f574b8 |
char *p;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (p = s; *p; p++) { /* Find end of string */
|
|
Packit |
f574b8 |
;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
for (p--; p >= s; p--) {
|
|
Packit |
f574b8 |
if (SPACE(*p))
|
|
Packit |
f574b8 |
*p = '\0'; /* Zap trailing blanks */
|
|
Packit |
f574b8 |
else
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
while (SPACE(*s))
|
|
Packit |
f574b8 |
s++; /* Strip leading blanks */
|
|
Packit |
f574b8 |
return s;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Scan a filename for its constituents. scan()
|
|
Packit |
f574b8 |
* -------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On entry,
|
|
Packit |
f574b8 |
* name points to a document name which may be incomplete.
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* absolute or relative may be nonzero (but not both).
|
|
Packit |
f574b8 |
* host, anchor and access may be nonzero if they were specified.
|
|
Packit |
f574b8 |
* Any which are nonzero point to zero terminated strings.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
static void scan(char *name,
|
|
Packit |
f574b8 |
struct struct_parts *parts)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
char *after_access;
|
|
Packit |
f574b8 |
char *p;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
parts->access = NULL;
|
|
Packit |
f574b8 |
parts->host = NULL;
|
|
Packit |
f574b8 |
parts->absolute = NULL;
|
|
Packit |
f574b8 |
parts->relative = NULL;
|
|
Packit |
f574b8 |
parts->search = NULL; /* normally not used - kw */
|
|
Packit |
f574b8 |
parts->anchor = NULL;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Scan left-to-right for a scheme (access).
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
after_access = name;
|
|
Packit |
f574b8 |
for (p = name; *p; p++) {
|
|
Packit |
f574b8 |
if (*p == ':') {
|
|
Packit |
f574b8 |
*p = '\0';
|
|
Packit |
f574b8 |
parts->access = name; /* Access name has been specified */
|
|
Packit |
f574b8 |
after_access = (p + 1);
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (*p == '/' || *p == '#' || *p == ';' || *p == '?')
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Scan left-to-right for a fragment (anchor).
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
for (p = after_access; *p; p++) {
|
|
Packit |
f574b8 |
if (*p == '#') {
|
|
Packit |
f574b8 |
parts->anchor = (p + 1);
|
|
Packit |
f574b8 |
*p = '\0'; /* terminate the rest */
|
|
Packit |
f574b8 |
break; /* leave things after first # alone - kw */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Scan left-to-right for a host or absolute path.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
p = after_access;
|
|
Packit |
f574b8 |
if (*p == '/') {
|
|
Packit |
f574b8 |
if (p[1] == '/') {
|
|
Packit |
f574b8 |
parts->host = (p + 2); /* host has been specified */
|
|
Packit |
f574b8 |
*p = '\0'; /* Terminate access */
|
|
Packit |
f574b8 |
p = StrChr(parts->host, '/'); /* look for end of host name if any */
|
|
Packit |
f574b8 |
if (p != NULL) {
|
|
Packit |
f574b8 |
*p = '\0'; /* Terminate host */
|
|
Packit |
f574b8 |
parts->absolute = (p + 1); /* Root has been found */
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
p = StrChr(parts->host, '?');
|
|
Packit |
f574b8 |
if (p != NULL) {
|
|
Packit |
f574b8 |
*p = '\0'; /* Terminate host */
|
|
Packit |
f574b8 |
parts->search = (p + 1);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
parts->absolute = (p + 1); /* Root found but no host */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
parts->relative = (*after_access) ?
|
|
Packit |
f574b8 |
after_access : NULL; /* NULL for "" */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Check schemes that commonly have unescaped hashes.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if (parts->access && parts->anchor &&
|
|
Packit |
f574b8 |
/* optimize */ StrChr("lnsdLNSD", *parts->access) != NULL) {
|
|
Packit |
f574b8 |
if ((!parts->host && strcasecomp(parts->access, "lynxcgi")) ||
|
|
Packit |
f574b8 |
!strcasecomp(parts->access, "nntp") ||
|
|
Packit |
f574b8 |
!strcasecomp(parts->access, "snews") ||
|
|
Packit |
f574b8 |
!strcasecomp(parts->access, "news") ||
|
|
Packit |
f574b8 |
!strcasecomp(parts->access, "data")) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Access specified but no host and not a lynxcgi URL, so the
|
|
Packit |
f574b8 |
* anchor may not really be one, e.g., news:j462#36487@foo.bar, or
|
|
Packit |
f574b8 |
* it's an nntp or snews URL, or news URL with a host. Restore the
|
|
Packit |
f574b8 |
* '#' in the address.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
/* but only if we have found a path component of which this will
|
|
Packit |
f574b8 |
* become part. - kw */
|
|
Packit |
f574b8 |
if (parts->relative || parts->absolute) {
|
|
Packit |
f574b8 |
*(parts->anchor - 1) = '#';
|
|
Packit |
f574b8 |
parts->anchor = NULL;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} /*scan */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#if defined(HAVE_ALLOCA) && !defined(LY_FIND_LEAKS)
|
|
Packit |
f574b8 |
#define LYalloca(x) alloca(x)
|
|
Packit |
f574b8 |
#define LYalloca_free(x) {}
|
|
Packit |
f574b8 |
#else
|
|
Packit |
f574b8 |
#define LYalloca(x) malloc(x)
|
|
Packit |
f574b8 |
#define LYalloca_free(x) free(x)
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
static char *strchr_or_end(char *string, int ch)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
char *result = StrChr(string, ch);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (result == 0) {
|
|
Packit |
f574b8 |
result = string + strlen(string);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
return result;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Given a host specification that may end with a port number, e.g.,
|
|
Packit |
f574b8 |
* foobar:123
|
|
Packit |
f574b8 |
* point to the ':' which begins the ":port" to make it simple to handle the
|
|
Packit |
f574b8 |
* substring.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* If no port is found (or a syntax error), return null.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
char *HTParsePort(char *host, int *portp)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
int brackets = 0;
|
|
Packit |
f574b8 |
char *result = NULL;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
*portp = 0;
|
|
Packit |
f574b8 |
if (host != NULL) {
|
|
Packit |
f574b8 |
while (*host != '\0' && result == 0) {
|
|
Packit |
f574b8 |
switch (*host++) {
|
|
Packit |
f574b8 |
case ':':
|
|
Packit |
f574b8 |
if (brackets == 0 && isdigit(UCH(*host))) {
|
|
Packit |
f574b8 |
char *next = NULL;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
*portp = (int) strtol(host, &next, 10);
|
|
Packit |
f574b8 |
if (next != 0 && next != host && *next == '\0') {
|
|
Packit |
f574b8 |
result = (host - 1);
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParsePort %d\n", *portp));
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case '[': /* for ipv6 */
|
|
Packit |
f574b8 |
++brackets;
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case ']': /* for ipv6 */
|
|
Packit |
f574b8 |
--brackets;
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
return result;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#ifdef USE_IDNA
|
|
Packit |
f574b8 |
static int hex_decode(int ch)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
int result = -1;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (ch >= '0' && ch <= '9')
|
|
Packit |
f574b8 |
result = (ch - '0');
|
|
Packit |
f574b8 |
else if (ch >= 'a' && ch <= 'f')
|
|
Packit |
f574b8 |
result = (ch - 'a') + 10;
|
|
Packit |
f574b8 |
else if (ch >= 'A' && ch <= 'F')
|
|
Packit |
f574b8 |
result = (ch - 'A') + 10;
|
|
Packit |
f574b8 |
return result;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Convert in-place the given hostname to IDNA form. That requires up to 64
|
|
Packit |
f574b8 |
* characters, and we've allowed for that, with MIN_PARSE.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
static void convert_to_idna(char *host)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
size_t length = strlen(host);
|
|
Packit |
f574b8 |
char *endhost = host + length;
|
|
Packit |
f574b8 |
char *buffer = malloc(length + 1);
|
|
Packit |
f574b8 |
char *params = malloc(length + 1);
|
|
Packit |
f574b8 |
char *output = NULL;
|
|
Packit |
f574b8 |
char *src, *dst;
|
|
Packit |
f574b8 |
int code;
|
|
Packit |
f574b8 |
int hi, lo;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (buffer != NULL && params != NULL) {
|
|
Packit |
f574b8 |
code = TRUE;
|
|
Packit |
f574b8 |
*params = '\0';
|
|
Packit |
f574b8 |
for (dst = buffer, src = host; src < endhost; ++dst) {
|
|
Packit |
f574b8 |
int ch = *src++;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (RFC_3986_GEN_DELIMS(ch)) {
|
|
Packit |
f574b8 |
strcpy(params, src - 1);
|
|
Packit |
f574b8 |
*dst = '\0';
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
} else if (ch == HEX_ESCAPE) {
|
|
Packit |
f574b8 |
if ((src + 1) < endhost
|
|
Packit |
f574b8 |
&& (hi = hex_decode(src[0])) >= 0
|
|
Packit |
f574b8 |
&& (lo = hex_decode(src[1])) >= 0) {
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
*dst = (char) ((hi << 4) | lo);
|
|
Packit |
f574b8 |
src += 2;
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
CTRACE((tfp, "convert_to_idna: `%s' is malformed\n", host));
|
|
Packit |
f574b8 |
code = FALSE;
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
*dst = (char) ch;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (code) {
|
|
Packit |
f574b8 |
*dst = '\0';
|
|
Packit |
f574b8 |
code = idna_to_ascii_8z(buffer, &output, IDNA_USE_STD3_ASCII_RULES);
|
|
Packit |
f574b8 |
if (code == IDNA_SUCCESS) {
|
|
Packit |
f574b8 |
strcpy(host, output);
|
|
Packit |
f574b8 |
strcat(host, params);
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
CTRACE((tfp, "convert_to_idna: `%s': %s\n",
|
|
Packit |
f574b8 |
buffer,
|
|
Packit |
f574b8 |
idna_strerror((Idna_rc) code)));
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (output)
|
|
Packit |
f574b8 |
idn_free(output);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
free(buffer);
|
|
Packit |
f574b8 |
free(params);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#define MIN_PARSE 80
|
|
Packit |
f574b8 |
#else
|
|
Packit |
f574b8 |
#define MIN_PARSE 8
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Parse a Name relative to another name. HTParse()
|
|
Packit |
f574b8 |
* --------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This returns those parts of a name which are given (and requested)
|
|
Packit |
f574b8 |
* substituting bits from the related name where necessary.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On entry,
|
|
Packit |
f574b8 |
* aName A filename given
|
|
Packit |
f574b8 |
* relatedName A name relative to which aName is to be parsed
|
|
Packit |
f574b8 |
* wanted A mask for the bits which are wanted.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* returns A pointer to a malloc'd string which MUST BE FREED
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
char *HTParse(const char *aName,
|
|
Packit |
f574b8 |
const char *relatedName,
|
|
Packit |
f574b8 |
int wanted)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
char *result = NULL;
|
|
Packit |
f574b8 |
char *tail = NULL; /* a pointer to the end of the 'result' string */
|
|
Packit |
f574b8 |
char *return_value = NULL;
|
|
Packit |
f574b8 |
size_t len, len1, len2;
|
|
Packit |
f574b8 |
size_t need;
|
|
Packit |
f574b8 |
char *name = NULL;
|
|
Packit |
f574b8 |
char *rel = NULL;
|
|
Packit |
f574b8 |
char *p, *q;
|
|
Packit |
f574b8 |
char *acc_method;
|
|
Packit |
f574b8 |
struct struct_parts given, related;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: aName:`%s'\n", aName));
|
|
Packit |
f574b8 |
CTRACE((tfp, " relatedName:`%s'\n", relatedName));
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (wanted & (PARSE_STRICTPATH | PARSE_QUERY)) { /* if detail wanted... */
|
|
Packit |
f574b8 |
if ((wanted & (PARSE_STRICTPATH | PARSE_QUERY))
|
|
Packit |
f574b8 |
== (PARSE_STRICTPATH | PARSE_QUERY)) /* if strictpath AND query */
|
|
Packit |
f574b8 |
wanted |= PARSE_PATH; /* then treat as if PARSE_PATH wanted */
|
|
Packit |
f574b8 |
if (wanted & PARSE_PATH) /* if PARSE_PATH wanted */
|
|
Packit |
f574b8 |
wanted &= ~(PARSE_STRICTPATH | PARSE_QUERY); /* ignore details */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
/* *INDENT-OFF* */
|
|
Packit |
f574b8 |
CTRACE((tfp, " want:%s%s%s%s%s%s%s\n",
|
|
Packit |
f574b8 |
wanted & PARSE_PUNCTUATION ? " punc" : "",
|
|
Packit |
f574b8 |
wanted & PARSE_ANCHOR ? " anchor" : "",
|
|
Packit |
f574b8 |
wanted & PARSE_PATH ? " path" : "",
|
|
Packit |
f574b8 |
wanted & PARSE_HOST ? " host" : "",
|
|
Packit |
f574b8 |
wanted & PARSE_ACCESS ? " access" : "",
|
|
Packit |
f574b8 |
wanted & PARSE_STRICTPATH ? " PATH" : "",
|
|
Packit |
f574b8 |
wanted & PARSE_QUERY ? " QUERY" : ""));
|
|
Packit |
f574b8 |
/* *INDENT-ON* */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Allocate the temporary string. Optimized.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
len1 = strlen(aName) + 1;
|
|
Packit |
f574b8 |
len2 = strlen(relatedName) + 1;
|
|
Packit |
f574b8 |
len = len1 + len2 + MIN_PARSE; /* Lots of space: more than enough */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
need = (len * 2 + len1 + len2);
|
|
Packit |
f574b8 |
if (need > (size_t) max_uri_size ||
|
|
Packit |
f574b8 |
(int) need < (int) len1 ||
|
|
Packit |
f574b8 |
(int) need < (int) len2)
|
|
Packit |
f574b8 |
return StrAllocCopy(return_value, "");
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
result = tail = (char *) LYalloca(need);
|
|
Packit |
f574b8 |
if (result == NULL) {
|
|
Packit |
f574b8 |
outofmem(__FILE__, "HTParse");
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
*result = '\0';
|
|
Packit |
f574b8 |
name = result + len;
|
|
Packit |
f574b8 |
rel = name + len1;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Make working copy of the input string to cut up.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
MemCpy(name, aName, len1);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Cut up the string into URL fields.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
scan(name, &given);
|
|
Packit |
f574b8 |
SHOW_PARTS(given);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Now related string.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if ((given.access && given.host && given.absolute) || !*relatedName) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Inherit nothing!
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
related.access = NULL;
|
|
Packit |
f574b8 |
related.host = NULL;
|
|
Packit |
f574b8 |
related.absolute = NULL;
|
|
Packit |
f574b8 |
related.relative = NULL;
|
|
Packit |
f574b8 |
related.search = NULL;
|
|
Packit |
f574b8 |
related.anchor = NULL;
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
MemCpy(rel, relatedName, len2);
|
|
Packit |
f574b8 |
scan(rel, &related);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
SHOW_PARTS(related);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle the scheme (access) field.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if (given.access && given.host && !given.relative && !given.absolute) {
|
|
Packit |
f574b8 |
if (!strcmp(given.access, "http") ||
|
|
Packit |
f574b8 |
!strcmp(given.access, "https") ||
|
|
Packit |
f574b8 |
!strcmp(given.access, "ftp")) {
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Assume root.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
given.absolute = empty_string;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
acc_method = given.access ? given.access : related.access;
|
|
Packit |
f574b8 |
if (wanted & PARSE_ACCESS) {
|
|
Packit |
f574b8 |
if (acc_method) {
|
|
Packit |
f574b8 |
strcpy(tail, acc_method);
|
|
Packit |
f574b8 |
tail += strlen(tail);
|
|
Packit |
f574b8 |
if (wanted & PARSE_PUNCTUATION) {
|
|
Packit |
f574b8 |
*tail++ = ':';
|
|
Packit |
f574b8 |
*tail = '\0';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* If different schemes, inherit nothing.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* We'll try complying with RFC 1808 and the Fielding draft, and inherit
|
|
Packit |
f574b8 |
* nothing if both schemes are given, rather than only when they differ,
|
|
Packit |
f574b8 |
* except for file URLs - FM
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* After trying it for a while, it's still premature, IHMO, to go along
|
|
Packit |
f574b8 |
* with it, so this is back to inheriting for identical schemes whether or
|
|
Packit |
f574b8 |
* not they are "file". If you want to try it again yourself, uncomment
|
|
Packit |
f574b8 |
* the strcasecomp() below. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if ((given.access && related.access) &&
|
|
Packit |
f574b8 |
( /* strcasecomp(given.access, "file") || */
|
|
Packit |
f574b8 |
strcmp(given.access, related.access))) {
|
|
Packit |
f574b8 |
related.host = NULL;
|
|
Packit |
f574b8 |
related.absolute = NULL;
|
|
Packit |
f574b8 |
related.relative = NULL;
|
|
Packit |
f574b8 |
related.search = NULL;
|
|
Packit |
f574b8 |
related.anchor = NULL;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle the host field.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if (wanted & PARSE_HOST) {
|
|
Packit |
f574b8 |
if (given.host || related.host) {
|
|
Packit |
f574b8 |
if (wanted & PARSE_PUNCTUATION) {
|
|
Packit |
f574b8 |
*tail++ = '/';
|
|
Packit |
f574b8 |
*tail++ = '/';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
strcpy(tail, given.host ? given.host : related.host);
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Ignore default port numbers, and trailing dots on FQDNs, which
|
|
Packit |
f574b8 |
* will only cause identical addresses to look different. (related
|
|
Packit |
f574b8 |
* is already a clean url).
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
char *p2, *h;
|
|
Packit |
f574b8 |
int portnumber;
|
|
Packit |
f574b8 |
int gen_delims = 0;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if ((p2 = HTSkipToAt(result, &gen_delims)) != NULL
|
|
Packit |
f574b8 |
&& gen_delims == 0) {
|
|
Packit |
f574b8 |
tail = (p2 + 1);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
p2 = HTParsePort(result, &portnumber);
|
|
Packit |
f574b8 |
if (p2 != NULL && acc_method != NULL) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Port specified.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define ACC_METHOD(a,b) (!strcmp(acc_method, a) && (portnumber == b))
|
|
Packit |
f574b8 |
if (ACC_METHOD("http", 80) ||
|
|
Packit |
f574b8 |
ACC_METHOD("https", 443) ||
|
|
Packit |
f574b8 |
ACC_METHOD("gopher", 70) ||
|
|
Packit |
f574b8 |
ACC_METHOD("ftp", 21) ||
|
|
Packit |
f574b8 |
ACC_METHOD("wais", 210) ||
|
|
Packit |
f574b8 |
ACC_METHOD("nntp", 119) ||
|
|
Packit |
f574b8 |
ACC_METHOD("news", 119) ||
|
|
Packit |
f574b8 |
ACC_METHOD("newspost", 119) ||
|
|
Packit |
f574b8 |
ACC_METHOD("newsreply", 119) ||
|
|
Packit |
f574b8 |
ACC_METHOD("snews", 563) ||
|
|
Packit |
f574b8 |
ACC_METHOD("snewspost", 563) ||
|
|
Packit |
f574b8 |
ACC_METHOD("snewsreply", 563) ||
|
|
Packit |
f574b8 |
ACC_METHOD("finger", 79) ||
|
|
Packit |
f574b8 |
ACC_METHOD("telnet", 23) ||
|
|
Packit |
f574b8 |
ACC_METHOD("tn3270", 23) ||
|
|
Packit |
f574b8 |
ACC_METHOD("rlogin", 513) ||
|
|
Packit |
f574b8 |
ACC_METHOD("cso", 105))
|
|
Packit |
f574b8 |
*p2 = '\0'; /* It is the default: ignore it */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (p2 == NULL) {
|
|
Packit |
f574b8 |
int len3 = (int) strlen(tail);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (len3 > 0) {
|
|
Packit |
f574b8 |
h = tail + len3 - 1; /* last char of hostname */
|
|
Packit |
f574b8 |
if (*h == '.')
|
|
Packit |
f574b8 |
*h = '\0'; /* chop final . */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else if (p2 != result) {
|
|
Packit |
f574b8 |
h = p2;
|
|
Packit |
f574b8 |
h--; /* End of hostname */
|
|
Packit |
f574b8 |
if (*h == '.') {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Slide p2 over h.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
while (*p2 != '\0')
|
|
Packit |
f574b8 |
*h++ = *p2++;
|
|
Packit |
f574b8 |
*h = '\0'; /* terminate */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#ifdef USE_IDNA
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Depending on locale-support, we could have a literal UTF-8
|
|
Packit |
f574b8 |
* string as a host name, or a URL-encoded form of that.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
convert_to_idna(tail);
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Trim any blanks from the result so far - there's no excuse for blanks
|
|
Packit |
f574b8 |
* in a hostname. Also update the tail here.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
tail = LYRemoveBlanks(result);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* If host in given or related was ended directly with a '?' (no slash),
|
|
Packit |
f574b8 |
* fake the search part into absolute. This is the only case search is
|
|
Packit |
f574b8 |
* returned from scan. A host must have been present. this restores the
|
|
Packit |
f574b8 |
* '?' at which the host part had been truncated in scan, we have to do
|
|
Packit |
f574b8 |
* this after host part handling is done. - kw
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if (given.search && *(given.search - 1) == '\0') {
|
|
Packit |
f574b8 |
given.absolute = given.search - 1;
|
|
Packit |
f574b8 |
given.absolute[0] = '?';
|
|
Packit |
f574b8 |
} else if (related.search && !related.absolute &&
|
|
Packit |
f574b8 |
*(related.search - 1) == '\0') {
|
|
Packit |
f574b8 |
related.absolute = related.search - 1;
|
|
Packit |
f574b8 |
related.absolute[0] = '?';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* If different hosts, inherit no path.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if (given.host && related.host)
|
|
Packit |
f574b8 |
if (strcmp(given.host, related.host) != 0) {
|
|
Packit |
f574b8 |
related.absolute = NULL;
|
|
Packit |
f574b8 |
related.relative = NULL;
|
|
Packit |
f574b8 |
related.anchor = NULL;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle the path.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if (wanted & (PARSE_PATH | PARSE_STRICTPATH | PARSE_QUERY)) {
|
|
Packit |
f574b8 |
int want_detail = (wanted & (PARSE_STRICTPATH | PARSE_QUERY));
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (acc_method && !given.absolute && given.relative) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Treat all given nntp or snews paths, or given paths for news
|
|
Packit |
f574b8 |
* URLs with a host, as absolute.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
switch (*acc_method) {
|
|
Packit |
f574b8 |
case 'N':
|
|
Packit |
f574b8 |
case 'n':
|
|
Packit |
f574b8 |
if (!strcasecomp(acc_method, "nntp") ||
|
|
Packit |
f574b8 |
(!strcasecomp(acc_method, "news") &&
|
|
Packit |
f574b8 |
!strncasecomp(result, "news://", 7))) {
|
|
Packit |
f574b8 |
given.absolute = given.relative;
|
|
Packit |
f574b8 |
given.relative = NULL;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case 'S':
|
|
Packit |
f574b8 |
case 's':
|
|
Packit |
f574b8 |
if (!strcasecomp(acc_method, "snews")) {
|
|
Packit |
f574b8 |
given.absolute = given.relative;
|
|
Packit |
f574b8 |
given.relative = NULL;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (given.absolute) { /* All is given */
|
|
Packit |
f574b8 |
if (wanted & PARSE_PUNCTUATION)
|
|
Packit |
f574b8 |
*tail++ = '/';
|
|
Packit |
f574b8 |
strcpy(tail, given.absolute);
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: (ABS)\n"));
|
|
Packit |
f574b8 |
} else if (related.absolute) { /* Adopt path not name */
|
|
Packit |
f574b8 |
char *base = tail;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
*tail++ = '/';
|
|
Packit |
f574b8 |
strcpy(tail, related.absolute);
|
|
Packit |
f574b8 |
if (given.relative) {
|
|
Packit |
f574b8 |
/* RFC 1808 part 4 step 5 (if URL path is empty) */
|
|
Packit |
f574b8 |
/* a) if given has params, add/replace that */
|
|
Packit |
f574b8 |
if (given.relative[0] == ';') {
|
|
Packit |
f574b8 |
strcpy(strchr_or_end(tail, ';'), given.relative);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
/* b) if given has query, add/replace that */
|
|
Packit |
f574b8 |
else if (given.relative[0] == '?') {
|
|
Packit |
f574b8 |
strcpy(strchr_or_end(tail, '?'), given.relative);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
/* otherwise fall through to RFC 1808 part 4 step 6 */
|
|
Packit |
f574b8 |
else {
|
|
Packit |
f574b8 |
p = StrChr(tail, '?'); /* Search part? */
|
|
Packit |
f574b8 |
if (p == NULL)
|
|
Packit |
f574b8 |
p = (tail + strlen(tail) - 1);
|
|
Packit |
f574b8 |
for (; *p != '/'; p--) ; /* last / */
|
|
Packit |
f574b8 |
p[1] = '\0'; /* Remove filename */
|
|
Packit |
f574b8 |
strcat(p, given.relative); /* Add given one */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
HTSimplify(base);
|
|
Packit |
f574b8 |
if (*base == '\0')
|
|
Packit |
f574b8 |
strcpy(base, "/");
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: (Related-ABS)\n"));
|
|
Packit |
f574b8 |
} else if (given.relative) {
|
|
Packit |
f574b8 |
strcpy(tail, given.relative); /* what we've got */
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: (REL)\n"));
|
|
Packit |
f574b8 |
} else if (related.relative) {
|
|
Packit |
f574b8 |
strcpy(tail, related.relative);
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: (Related-REL)\n"));
|
|
Packit |
f574b8 |
} else { /* No inheritance */
|
|
Packit |
f574b8 |
if (!isLYNXCGI(aName) &&
|
|
Packit |
f574b8 |
!isLYNXEXEC(aName) &&
|
|
Packit |
f574b8 |
!isLYNXPROG(aName)) {
|
|
Packit |
f574b8 |
*tail++ = '/';
|
|
Packit |
f574b8 |
*tail = '\0';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (!strcmp(result, "news:/"))
|
|
Packit |
f574b8 |
result[5] = '*';
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: (No inheritance)\n"));
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (want_detail) {
|
|
Packit |
f574b8 |
p = StrChr(tail, '?'); /* Search part? */
|
|
Packit |
f574b8 |
if (p) {
|
|
Packit |
f574b8 |
if (PARSE_STRICTPATH) {
|
|
Packit |
f574b8 |
*p = '\0';
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
if (!(wanted & PARSE_PUNCTUATION))
|
|
Packit |
f574b8 |
p++;
|
|
Packit |
f574b8 |
do {
|
|
Packit |
f574b8 |
*tail++ = *p;
|
|
Packit |
f574b8 |
} while (*p++);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
if (wanted & PARSE_QUERY)
|
|
Packit |
f574b8 |
*tail = '\0';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle the fragment (anchor). Never inherit.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if (wanted & PARSE_ANCHOR) {
|
|
Packit |
f574b8 |
if (given.anchor && *given.anchor) {
|
|
Packit |
f574b8 |
tail += strlen(tail);
|
|
Packit |
f574b8 |
if (wanted & PARSE_PUNCTUATION)
|
|
Packit |
f574b8 |
*tail++ = '#';
|
|
Packit |
f574b8 |
strcpy(tail, given.anchor);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* If there are any blanks remaining in the string, escape them as needed.
|
|
Packit |
f574b8 |
* See the discussion in LYLegitimizeHREF() for example.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if ((p = StrChr(result, ' ')) != 0) {
|
|
Packit |
f574b8 |
switch (is_url(result)) {
|
|
Packit |
f574b8 |
case UNKNOWN_URL_TYPE:
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: ignore:`%s'\n", result));
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case LYNXEXEC_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXPROG_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXCGI_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXPRINT_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXHIST_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXDOWNLOAD_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXKEYMAP_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXIMGMAP_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXCOOKIE_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXCACHE_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXDIRED_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXOPTIONS_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXCFG_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXCOMPILE_OPTS_URL_TYPE:
|
|
Packit |
f574b8 |
case LYNXMESSAGES_URL_TYPE:
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: spaces:`%s'\n", result));
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case NOT_A_URL_TYPE:
|
|
Packit |
f574b8 |
default:
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: encode:`%s'\n", result));
|
|
Packit |
f574b8 |
do {
|
|
Packit |
f574b8 |
q = p + strlen(p) + 2;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
while (q != p + 1) {
|
|
Packit |
f574b8 |
q[0] = q[-2];
|
|
Packit |
f574b8 |
--q;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
p[0] = HEX_ESCAPE;
|
|
Packit |
f574b8 |
p[1] = '2';
|
|
Packit |
f574b8 |
p[2] = '0';
|
|
Packit |
f574b8 |
} while ((p = StrChr(result, ' ')) != 0);
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
CTRACE((tfp, "HTParse: result:`%s'\n", result));
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
StrAllocCopy(return_value, result);
|
|
Packit |
f574b8 |
LYalloca_free(result);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* FIXME: could be optimized using HTParse() internals */
|
|
Packit |
f574b8 |
if (*relatedName &&
|
|
Packit |
f574b8 |
((wanted & PARSE_ALL_WITHOUT_ANCHOR) == PARSE_ALL_WITHOUT_ANCHOR)) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Check whether to fill in localhost. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
LYFillLocalFileURL(&return_value, relatedName);
|
|
Packit |
f574b8 |
CTRACE((tfp, "pass LYFillLocalFile:`%s'\n", return_value));
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
return return_value; /* exactly the right length */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* HTParseAnchor(), fast HTParse() specialization
|
|
Packit |
f574b8 |
* ----------------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* returns A pointer within input string (probably to its end '\0')
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
const char *HTParseAnchor(const char *aName)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
const char *p = aName;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (; *p && *p != '#'; p++) {
|
|
Packit |
f574b8 |
;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (*p == '#') {
|
|
Packit |
f574b8 |
/* the safe way based on HTParse() -
|
|
Packit |
f574b8 |
* keeping in mind scan() peculiarities on schemes:
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
struct struct_parts given;
|
|
Packit |
f574b8 |
size_t need = ((unsigned) ((p - aName) + (int) strlen(p) + 1));
|
|
Packit |
f574b8 |
char *name;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (need > (size_t) max_uri_size) {
|
|
Packit |
f574b8 |
p += strlen(p);
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
name = (char *) LYalloca(need);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (name == NULL) {
|
|
Packit |
f574b8 |
outofmem(__FILE__, "HTParseAnchor");
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
strcpy(name, aName);
|
|
Packit |
f574b8 |
scan(name, &given);
|
|
Packit |
f574b8 |
LYalloca_free(name);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
p++; /*next to '#' */
|
|
Packit |
f574b8 |
if (given.anchor == NULL) {
|
|
Packit |
f574b8 |
for (; *p; p++) /*scroll to end '\0' */
|
|
Packit |
f574b8 |
;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
return p;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Simplify a filename. HTSimplify()
|
|
Packit |
f574b8 |
* --------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* A unix-style file is allowed to contain the sequence xxx/../ which may
|
|
Packit |
f574b8 |
* be replaced by "" , and the sequence "/./" which may be replaced by "/".
|
|
Packit |
f574b8 |
* Simplification helps us recognize duplicate filenames.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Thus, /etc/junk/../fred becomes /etc/fred
|
|
Packit |
f574b8 |
* /etc/junk/./fred becomes /etc/junk/fred
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* but we should NOT change
|
|
Packit |
f574b8 |
* http://fred.xxx.edu/../..
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* or ../../albert.html
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
void HTSimplify(char *filename)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
char *p;
|
|
Packit |
f574b8 |
char *q, *q1;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (filename == NULL)
|
|
Packit |
f574b8 |
return;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (!(filename[0] && filename[1]) ||
|
|
Packit |
f574b8 |
filename[0] == '?' || filename[1] == '?' || filename[2] == '?')
|
|
Packit |
f574b8 |
return;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (StrChr(filename, '/') != NULL) {
|
|
Packit |
f574b8 |
for (p = (filename + 2); *p; p++) {
|
|
Packit |
f574b8 |
if (*p == '?') {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* We're still treating a ?searchpart as part of the path in
|
|
Packit |
f574b8 |
* HTParse() and scan(), but if we encounter a '?' here, assume
|
|
Packit |
f574b8 |
* it's the delimiter and break. We also could check for a
|
|
Packit |
f574b8 |
* parameter delimiter (';') here, but the current Fielding
|
|
Packit |
f574b8 |
* draft (wisely or ill-advisedly :) says that it should be
|
|
Packit |
f574b8 |
* ignored and collapsing be allowed in it's value). The only
|
|
Packit |
f574b8 |
* defined parameter at present is ;type=[A, I, or D] for ftp
|
|
Packit |
f574b8 |
* URLs, so if there's a "/..", "/../", "/./", or terminal '.'
|
|
Packit |
f574b8 |
* following the ';', it must be due to the ';' being an
|
|
Packit |
f574b8 |
* unescaped path character and not actually a parameter
|
|
Packit |
f574b8 |
* delimiter. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (*p == '/') {
|
|
Packit |
f574b8 |
if ((p[1] == '.') && (p[2] == '.') &&
|
|
Packit |
f574b8 |
(p[3] == '/' || p[3] == '?' || p[3] == '\0')) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle "../", "..?" or "..".
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
for (q = (p - 1); (q >= filename) && (*q != '/'); q--)
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Back up to previous slash or beginning of string.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
;
|
|
Packit |
f574b8 |
if ((q[0] == '/') &&
|
|
Packit |
f574b8 |
(StrNCmp(q, "/../", 4) &&
|
|
Packit |
f574b8 |
StrNCmp(q, "/..?", 4)) &&
|
|
Packit |
f574b8 |
!((q - 1) > filename && q[-1] == '/')) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Not at beginning of string or in a host field, so
|
|
Packit |
f574b8 |
* remove the "/xxx/..".
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
q1 = (p + 3);
|
|
Packit |
f574b8 |
p = q;
|
|
Packit |
f574b8 |
while (*q1 != '\0')
|
|
Packit |
f574b8 |
*p++ = *q1++;
|
|
Packit |
f574b8 |
*p = '\0'; /* terminate */
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Start again with previous slash.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
p = (q - 1);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else if (p[1] == '.' && p[2] == '/') {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle "./" by removing both characters.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
q = p;
|
|
Packit |
f574b8 |
q1 = (p + 2);
|
|
Packit |
f574b8 |
while (*q1 != '\0')
|
|
Packit |
f574b8 |
*q++ = *q1++;
|
|
Packit |
f574b8 |
*q = '\0'; /* terminate */
|
|
Packit |
f574b8 |
p--;
|
|
Packit |
f574b8 |
} else if (p[1] == '.' && p[2] == '?') {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle ".?" by removing the dot.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
q = (p + 1);
|
|
Packit |
f574b8 |
q1 = (p + 2);
|
|
Packit |
f574b8 |
while (*q1 != '\0')
|
|
Packit |
f574b8 |
*q++ = *q1++;
|
|
Packit |
f574b8 |
*q = '\0'; /* terminate */
|
|
Packit |
f574b8 |
p--;
|
|
Packit |
f574b8 |
} else if (p[1] == '.' && p[2] == '\0') {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle terminal "." by removing the character.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
p[1] = '\0';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (p >= filename + 2 && *p == '?' && *(p - 1) == '.') {
|
|
Packit |
f574b8 |
if (*(p - 2) == '/') {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle "/.?" by removing the dot.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
q = p - 1;
|
|
Packit |
f574b8 |
q1 = p;
|
|
Packit |
f574b8 |
while (*q1 != '\0')
|
|
Packit |
f574b8 |
*q++ = *q1++;
|
|
Packit |
f574b8 |
*q = '\0';
|
|
Packit |
f574b8 |
} else if (*(p - 2) == '.' &&
|
|
Packit |
f574b8 |
p >= filename + 4 && *(p - 3) == '/' &&
|
|
Packit |
f574b8 |
(*(p - 4) != '/' ||
|
|
Packit |
f574b8 |
(p > filename + 4 && *(p - 5) != ':'))) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Handle "xxx/..?"
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
for (q = (p - 4); (q > filename) && (*q != '/'); q--)
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Back up to previous slash or beginning of string.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
;
|
|
Packit |
f574b8 |
if (*q == '/') {
|
|
Packit |
f574b8 |
if (q > filename && *(q - 1) == '/' &&
|
|
Packit |
f574b8 |
!(q > filename + 1 && *(q - 1) != ':'))
|
|
Packit |
f574b8 |
return;
|
|
Packit |
f574b8 |
q++;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (StrNCmp(q, "../", 3) && StrNCmp(q, "./", 2)) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Not after "//" at beginning of string or after "://",
|
|
Packit |
f574b8 |
* and xxx is not ".." or ".", so remove the "xxx/..".
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
q1 = p;
|
|
Packit |
f574b8 |
p = q;
|
|
Packit |
f574b8 |
while (*q1 != '\0')
|
|
Packit |
f574b8 |
*p++ = *q1++;
|
|
Packit |
f574b8 |
*p = '\0'; /* terminate */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Make Relative Name. HTRelative()
|
|
Packit |
f574b8 |
* -------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function creates and returns a string which gives an expression of
|
|
Packit |
f574b8 |
* one address as related to another. Where there is no relation, an absolute
|
|
Packit |
f574b8 |
* address is returned.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On entry,
|
|
Packit |
f574b8 |
* Both names must be absolute, fully qualified names of nodes
|
|
Packit |
f574b8 |
* (no anchor bits)
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On exit,
|
|
Packit |
f574b8 |
* The return result points to a newly allocated name which, if
|
|
Packit |
f574b8 |
* parsed by HTParse relative to relatedName, will yield aName.
|
|
Packit |
f574b8 |
* The caller is responsible for freeing the resulting name later.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
char *HTRelative(const char *aName,
|
|
Packit |
f574b8 |
const char *relatedName)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
char *result = NULL;
|
|
Packit |
f574b8 |
const char *p = aName;
|
|
Packit |
f574b8 |
const char *q = relatedName;
|
|
Packit |
f574b8 |
const char *after_access = NULL;
|
|
Packit |
f574b8 |
const char *path = NULL;
|
|
Packit |
f574b8 |
const char *last_slash = NULL;
|
|
Packit |
f574b8 |
int slashes = 0;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (; *p; p++, q++) { /* Find extent of match */
|
|
Packit |
f574b8 |
if (*p != *q)
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
if (*p == ':')
|
|
Packit |
f574b8 |
after_access = p + 1;
|
|
Packit |
f574b8 |
if (*p == '/') {
|
|
Packit |
f574b8 |
last_slash = p;
|
|
Packit |
f574b8 |
slashes++;
|
|
Packit |
f574b8 |
if (slashes == 3)
|
|
Packit |
f574b8 |
path = p;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* q, p point to the first non-matching character or zero */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (!after_access) { /* Different access */
|
|
Packit |
f574b8 |
StrAllocCopy(result, aName);
|
|
Packit |
f574b8 |
} else if (slashes < 3) { /* Different nodes */
|
|
Packit |
f574b8 |
StrAllocCopy(result, after_access);
|
|
Packit |
f574b8 |
} else if (slashes == 3) { /* Same node, different path */
|
|
Packit |
f574b8 |
StrAllocCopy(result, path);
|
|
Packit |
f574b8 |
} else { /* Some path in common */
|
|
Packit |
f574b8 |
unsigned levels = 0;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (; *q && (*q != '#'); q++)
|
|
Packit |
f574b8 |
if (*q == '/')
|
|
Packit |
f574b8 |
levels++;
|
|
Packit |
f574b8 |
result = typecallocn(char, 3 * levels + strlen(last_slash) + 1);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (result == NULL)
|
|
Packit |
f574b8 |
outofmem(__FILE__, "HTRelative");
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
result[0] = '\0';
|
|
Packit |
f574b8 |
for (; levels; levels--)
|
|
Packit |
f574b8 |
strcat(result, "../");
|
|
Packit |
f574b8 |
strcat(result, last_slash + 1);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
CTRACE((tfp,
|
|
Packit |
f574b8 |
"HTparse: `%s' expressed relative to\n `%s' is\n `%s'.\n",
|
|
Packit |
f574b8 |
aName, relatedName, result));
|
|
Packit |
f574b8 |
return result;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#define AlloCopy(next,base,extra) \
|
|
Packit |
f574b8 |
typecallocn(char, ((next - base) + ((int) extra)))
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Escape undesirable characters using % HTEscape()
|
|
Packit |
f574b8 |
* -------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which
|
|
Packit |
f574b8 |
* some characters may be unacceptable unescaped.
|
|
Packit |
f574b8 |
* It returns a string which has these characters
|
|
Packit |
f574b8 |
* represented by a '%' character followed by two hex digits.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Unlike HTUnEscape(), this routine returns a calloc'd string.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
/* *INDENT-OFF* */
|
|
Packit |
f574b8 |
static const unsigned char isAcceptable[96] =
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Bit 0 xalpha -- see HTFile.h
|
|
Packit |
f574b8 |
* Bit 1 xpalpha -- as xalpha but with plus.
|
|
Packit |
f574b8 |
* Bit 2 ... path -- as xpalphas but with /
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
|
Packit |
f574b8 |
{ 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
|
|
Packit |
f574b8 |
7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
|
|
Packit |
f574b8 |
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
|
|
Packit |
f574b8 |
7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
|
|
Packit |
f574b8 |
0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
|
|
Packit |
f574b8 |
7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{|}~ DEL */
|
|
Packit |
f574b8 |
/* *INDENT-ON* */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
static const char *hex = "0123456789ABCDEF";
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#define ACCEPTABLE(a) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
char *HTEscape(const char *str,
|
|
Packit |
f574b8 |
unsigned mask)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
const char *p;
|
|
Packit |
f574b8 |
char *q;
|
|
Packit |
f574b8 |
char *result;
|
|
Packit |
f574b8 |
size_t unacceptable = 0;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (p = str; *p; p++)
|
|
Packit |
f574b8 |
if (!ACCEPTABLE(UCH(TOASCII(*p))))
|
|
Packit |
f574b8 |
unacceptable++;
|
|
Packit |
f574b8 |
result = AlloCopy(p, str, (unacceptable * 2) + 1);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (result == NULL)
|
|
Packit |
f574b8 |
outofmem(__FILE__, "HTEscape");
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (q = result, p = str; *p; p++) {
|
|
Packit |
f574b8 |
unsigned char a = UCH(TOASCII(*p));
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (!ACCEPTABLE(a)) {
|
|
Packit |
f574b8 |
*q++ = HEX_ESCAPE; /* Means hex coming */
|
|
Packit |
f574b8 |
*q++ = hex[a >> 4];
|
|
Packit |
f574b8 |
*q++ = hex[a & 15];
|
|
Packit |
f574b8 |
} else
|
|
Packit |
f574b8 |
*q++ = *p;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
*q = '\0'; /* Terminate */
|
|
Packit |
f574b8 |
return result;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Escape unsafe characters using % HTEscapeUnsafe()
|
|
Packit |
f574b8 |
* --------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which
|
|
Packit |
f574b8 |
* some characters may be that may be unsafe are unescaped.
|
|
Packit |
f574b8 |
* It returns a string which has these characters
|
|
Packit |
f574b8 |
* represented by a '%' character followed by two hex digits.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Unlike HTUnEscape(), this routine returns a malloc'd string.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define UNSAFE(ch) (((ch) <= 32) || ((ch) >= 127))
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
char *HTEscapeUnsafe(const char *str)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
const char *p;
|
|
Packit |
f574b8 |
char *q;
|
|
Packit |
f574b8 |
char *result;
|
|
Packit |
f574b8 |
size_t unacceptable = 0;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (p = str; *p; p++)
|
|
Packit |
f574b8 |
if (UNSAFE(UCH(TOASCII(*p))))
|
|
Packit |
f574b8 |
unacceptable++;
|
|
Packit |
f574b8 |
result = AlloCopy(p, str, (unacceptable * 2) + 1);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (result == NULL)
|
|
Packit |
f574b8 |
outofmem(__FILE__, "HTEscapeUnsafe");
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (q = result, p = str; *p; p++) {
|
|
Packit |
f574b8 |
unsigned char a = UCH(TOASCII(*p));
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (UNSAFE(a)) {
|
|
Packit |
f574b8 |
*q++ = HEX_ESCAPE; /* Means hex coming */
|
|
Packit |
f574b8 |
*q++ = hex[a >> 4];
|
|
Packit |
f574b8 |
*q++ = hex[a & 15];
|
|
Packit |
f574b8 |
} else
|
|
Packit |
f574b8 |
*q++ = *p;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
*q = '\0'; /* Terminate */
|
|
Packit |
f574b8 |
return result;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Escape undesirable characters using % but space to +. HTEscapeSP()
|
|
Packit |
f574b8 |
* -----------------------------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which
|
|
Packit |
f574b8 |
* some characters may be unacceptable unescaped.
|
|
Packit |
f574b8 |
* It returns a string which has these characters
|
|
Packit |
f574b8 |
* represented by a '%' character followed by two hex digits,
|
|
Packit |
f574b8 |
* except that spaces are converted to '+' instead of %2B.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Unlike HTUnEscape(), this routine returns a calloced string.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
char *HTEscapeSP(const char *str,
|
|
Packit |
f574b8 |
unsigned mask)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
const char *p;
|
|
Packit |
f574b8 |
char *q;
|
|
Packit |
f574b8 |
char *result;
|
|
Packit |
f574b8 |
size_t unacceptable = 0;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (p = str; *p; p++)
|
|
Packit |
f574b8 |
if (!(*p == ' ' || ACCEPTABLE(UCH(TOASCII(*p)))))
|
|
Packit |
f574b8 |
unacceptable++;
|
|
Packit |
f574b8 |
result = AlloCopy(p, str, (unacceptable * 2) + 1);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (result == NULL)
|
|
Packit |
f574b8 |
outofmem(__FILE__, "HTEscape");
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
for (q = result, p = str; *p; p++) {
|
|
Packit |
f574b8 |
unsigned char a = UCH(TOASCII(*p));
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (a == 32) {
|
|
Packit |
f574b8 |
*q++ = '+';
|
|
Packit |
f574b8 |
} else if (!ACCEPTABLE(a)) {
|
|
Packit |
f574b8 |
*q++ = HEX_ESCAPE; /* Means hex coming */
|
|
Packit |
f574b8 |
*q++ = hex[a >> 4];
|
|
Packit |
f574b8 |
*q++ = hex[a & 15];
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
*q++ = *p;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
*q = '\0'; /* Terminate */
|
|
Packit |
f574b8 |
return result;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Decode %xx escaped characters. HTUnEscape()
|
|
Packit |
f574b8 |
* ------------------------------
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which some
|
|
Packit |
f574b8 |
* characters may have been encoded in %xy form, where xy is
|
|
Packit |
f574b8 |
* the ASCII hex code for character 16x+y.
|
|
Packit |
f574b8 |
* The string is converted in place, as it will never grow.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
static char from_hex(int c)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
return (char) (c >= '0' && c <= '9' ? c - '0'
|
|
Packit |
f574b8 |
: c >= 'A' && c <= 'F' ? c - 'A' + 10
|
|
Packit |
f574b8 |
: c - 'a' + 10); /* accept small letters just in case */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
char *HTUnEscape(char *str)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
char *p = str;
|
|
Packit |
f574b8 |
char *q = str;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (!(p && *p))
|
|
Packit |
f574b8 |
return str;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
while (*p != '\0') {
|
|
Packit |
f574b8 |
if (*p == HEX_ESCAPE &&
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Tests shouldn't be needed, but better safe than sorry.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
p[1] && p[2] &&
|
|
Packit |
f574b8 |
isxdigit(UCH(p[1])) &&
|
|
Packit |
f574b8 |
isxdigit(UCH(p[2]))) {
|
|
Packit |
f574b8 |
p++;
|
|
Packit |
f574b8 |
if (*p)
|
|
Packit |
f574b8 |
*q = (char) (from_hex(*p++) * 16);
|
|
Packit |
f574b8 |
if (*p) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Careful! FROMASCII() may evaluate its arg more than once!
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
/* S/390 -- gil -- 0221 */
|
|
Packit |
f574b8 |
*q = (char) (*q + from_hex(*p++));
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
*q = FROMASCII(*q);
|
|
Packit |
f574b8 |
q++;
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
*q++ = *p++;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
*q = '\0';
|
|
Packit |
f574b8 |
return str;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
} /* HTUnEscape */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Decode some %xx escaped characters. HTUnEscapeSome()
|
|
Packit |
f574b8 |
* ----------------------------------- Klaus Weide
|
|
Packit |
f574b8 |
* (kweide@tezcat.com)
|
|
Packit |
f574b8 |
* This function takes a pointer to a string in which some
|
|
Packit |
f574b8 |
* characters may have been encoded in %xy form, where xy is
|
|
Packit |
f574b8 |
* the ASCII hex code for character 16x+y, and a pointer to
|
|
Packit |
f574b8 |
* a second string containing one or more characters which
|
|
Packit |
f574b8 |
* should be unescaped if escaped in the first string.
|
|
Packit |
f574b8 |
* The first string is converted in place, as it will never grow.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
char *HTUnEscapeSome(char *str,
|
|
Packit |
f574b8 |
const char *do_trans)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
char *p = str;
|
|
Packit |
f574b8 |
char *q = str;
|
|
Packit |
f574b8 |
char testcode;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (p == NULL || *p == '\0' || do_trans == NULL || *do_trans == '\0')
|
|
Packit |
f574b8 |
return str;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
while (*p != '\0') {
|
|
Packit |
f574b8 |
if (*p == HEX_ESCAPE &&
|
|
Packit |
f574b8 |
p[1] && p[2] && /* tests shouldn't be needed, but.. */
|
|
Packit |
f574b8 |
isxdigit(UCH(p[1])) &&
|
|
Packit |
f574b8 |
isxdigit(UCH(p[2])) &&
|
|
Packit |
f574b8 |
(testcode = (char) FROMASCII(from_hex(p[1]) * 16 +
|
|
Packit |
f574b8 |
from_hex(p[2]))) && /* %00 no good */
|
|
Packit |
f574b8 |
StrChr(do_trans, testcode)) { /* it's one of the ones we want */
|
|
Packit |
f574b8 |
*q++ = testcode;
|
|
Packit |
f574b8 |
p += 3;
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
*q++ = *p++;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
*q = '\0';
|
|
Packit |
f574b8 |
return str;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
} /* HTUnEscapeSome */
|
|
Packit |
f574b8 |
/* *INDENT-OFF* */
|
|
Packit |
f574b8 |
static const unsigned char crfc[96] =
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* Bit 0 xalpha -- need "quoting"
|
|
Packit |
f574b8 |
* Bit 1 xpalpha -- need \escape if quoted
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
|
Packit |
f574b8 |
{ 1,0,3,0,0,0,0,0,1,1,0,0,1,0,1,0, /* 2x !"#$%&'()*+,-./ */
|
|
Packit |
f574b8 |
0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0, /* 3x 0123456789:;<=>? */
|
|
Packit |
f574b8 |
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4x @ABCDEFGHIJKLMNO */
|
|
Packit |
f574b8 |
0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0, /* 5X PQRSTUVWXYZ[\]^_ */
|
|
Packit |
f574b8 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6x `abcdefghijklmno */
|
|
Packit |
f574b8 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3 }; /* 7X pqrstuvwxyz{|}~ DEL */
|
|
Packit |
f574b8 |
/* *INDENT-ON* */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#define ASCII_TAB '\011'
|
|
Packit |
f574b8 |
#define ASCII_LF '\012'
|
|
Packit |
f574b8 |
#define ASCII_CR '\015'
|
|
Packit |
f574b8 |
#define ASCII_SPC '\040'
|
|
Packit |
f574b8 |
#define ASCII_BAK '\134'
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Turn a string which is not a RFC 822 token into a quoted-string. - KW
|
|
Packit |
f574b8 |
* The "quoted" parameter tells whether we need the beginning/ending quote
|
|
Packit |
f574b8 |
* marks. If not, the caller will provide them -TD
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
void HTMake822Word(char **str,
|
|
Packit |
f574b8 |
int quoted)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
const char *p;
|
|
Packit |
f574b8 |
char *q;
|
|
Packit |
f574b8 |
char *result;
|
|
Packit |
f574b8 |
unsigned char a;
|
|
Packit |
f574b8 |
unsigned added = 0;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (isEmpty(*str)) {
|
|
Packit |
f574b8 |
StrAllocCopy(*str, quoted ? "\"\"" : "");
|
|
Packit |
f574b8 |
return;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
for (p = *str; *p; p++) {
|
|
Packit |
f574b8 |
a = UCH(TOASCII(*p)); /* S/390 -- gil -- 0240 */
|
|
Packit |
f574b8 |
if (a < 32 || a >= 128 ||
|
|
Packit |
f574b8 |
((crfc[a - 32]) & 1)) {
|
|
Packit |
f574b8 |
if (!added)
|
|
Packit |
f574b8 |
added = 2;
|
|
Packit |
f574b8 |
if (a >= 160 || a == '\t')
|
|
Packit |
f574b8 |
continue;
|
|
Packit |
f574b8 |
if (a == '\r' || a == '\n')
|
|
Packit |
f574b8 |
added += 2;
|
|
Packit |
f574b8 |
else if ((a & 127) < 32 || ((crfc[a - 32]) & 2))
|
|
Packit |
f574b8 |
added++;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (!added)
|
|
Packit |
f574b8 |
return;
|
|
Packit |
f574b8 |
result = AlloCopy(p, *str, added + 1);
|
|
Packit |
f574b8 |
if (result == NULL)
|
|
Packit |
f574b8 |
outofmem(__FILE__, "HTMake822Word");
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
q = result;
|
|
Packit |
f574b8 |
if (quoted)
|
|
Packit |
f574b8 |
*q++ = '"';
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Having converted the character to ASCII, we can't use symbolic
|
|
Packit |
f574b8 |
* escape codes, since they're in the host character set, which
|
|
Packit |
f574b8 |
* is not necessarily ASCII. Thus we use octal escape codes instead.
|
|
Packit |
f574b8 |
* -- gil (Paul Gilmartin) <pg@sweng.stortek.com>
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
/* S/390 -- gil -- 0268 */
|
|
Packit |
f574b8 |
for (p = *str; *p; p++) {
|
|
Packit |
f574b8 |
a = UCH(TOASCII(*p));
|
|
Packit |
f574b8 |
if ((a != ASCII_TAB) &&
|
|
Packit |
f574b8 |
((a & 127) < ASCII_SPC ||
|
|
Packit |
f574b8 |
(a < 128 && ((crfc[a - 32]) & 2))))
|
|
Packit |
f574b8 |
*q++ = ASCII_BAK;
|
|
Packit |
f574b8 |
*q++ = *p;
|
|
Packit |
f574b8 |
if (a == ASCII_LF ||
|
|
Packit |
f574b8 |
(a == ASCII_CR && (TOASCII(*(p + 1)) != ASCII_LF)))
|
|
Packit |
f574b8 |
*q++ = ' ';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (quoted)
|
|
Packit |
f574b8 |
*q++ = '"';
|
|
Packit |
f574b8 |
*q = '\0'; /* Terminate */
|
|
Packit |
f574b8 |
FREE(*str);
|
|
Packit |
f574b8 |
*str = result;
|
|
Packit |
f574b8 |
}
|