|
Packit |
90a5c9 |
/* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
Packit |
90a5c9 |
* contributor license agreements. See the NOTICE file distributed with
|
|
Packit |
90a5c9 |
* this work for additional information regarding copyright ownership.
|
|
Packit |
90a5c9 |
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
Packit |
90a5c9 |
* (the "License"); you may not use this file except in compliance with
|
|
Packit |
90a5c9 |
* the License. You may obtain a copy of the License at
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* http://www.apache.org/licenses/LICENSE-2.0
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* Unless required by applicable law or agreed to in writing, software
|
|
Packit |
90a5c9 |
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
Packit |
90a5c9 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
Packit |
90a5c9 |
* See the License for the specific language governing permissions and
|
|
Packit |
90a5c9 |
* limitations under the License.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
#include "apr.h"
|
|
Packit |
90a5c9 |
#include "apr_file_io.h"
|
|
Packit |
90a5c9 |
#include "apr_strings.h"
|
|
Packit |
90a5c9 |
#include "apr_lib.h"
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
#define APR_WANT_STRFUNC
|
|
Packit |
90a5c9 |
#include "apr_want.h"
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
#define WANT_BASENAME_MATCH
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
#include "httpd.h"
|
|
Packit |
90a5c9 |
#include "http_core.h"
|
|
Packit |
90a5c9 |
#include "http_config.h"
|
|
Packit |
90a5c9 |
#include "http_request.h"
|
|
Packit |
90a5c9 |
#include "http_log.h"
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* mod_speling.c - by Alexei Kosut <akosut@organic.com> June, 1996
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* This module is transparent, and simple. It attempts to correct
|
|
Packit |
90a5c9 |
* misspellings of URLs that users might have entered, namely by checking
|
|
Packit |
90a5c9 |
* capitalizations. If it finds a match, it sends a redirect.
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* Sep-1999 Hugo Haas <hugo@w3.org>
|
|
Packit |
90a5c9 |
* o Added a CheckCaseOnly option to check only miscapitalized words.
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* 08-Aug-1997 <Martin.Kraemer@Mch.SNI.De>
|
|
Packit |
90a5c9 |
* o Upgraded module interface to apache_1.3a2-dev API (more NULL's in
|
|
Packit |
90a5c9 |
* speling_module).
|
|
Packit |
90a5c9 |
* o Integrated tcsh's "spelling correction" routine which allows one
|
|
Packit |
90a5c9 |
* misspelling (character insertion/omission/typo/transposition).
|
|
Packit |
90a5c9 |
* Rewrote it to ignore case as well. This ought to catch the majority
|
|
Packit |
90a5c9 |
* of misspelled requests.
|
|
Packit |
90a5c9 |
* o Commented out the second pass where files' suffixes are stripped.
|
|
Packit |
90a5c9 |
* Given the better hit rate of the first pass, this rather ugly
|
|
Packit |
90a5c9 |
* (request index.html, receive index.db ?!?!) solution can be
|
|
Packit |
90a5c9 |
* omitted.
|
|
Packit |
90a5c9 |
* o wrote a "kind of" html page for mod_speling
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* Activate it with "CheckSpelling On"
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
module AP_MODULE_DECLARE_DATA speling_module;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
typedef struct {
|
|
Packit |
90a5c9 |
int enabled;
|
|
Packit |
90a5c9 |
int case_only;
|
|
Packit |
90a5c9 |
} spconfig;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* Create a configuration specific to this module for a server or directory
|
|
Packit |
90a5c9 |
* location, and fill it with the default settings.
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* The API says that in the absence of a merge function, the record for the
|
|
Packit |
90a5c9 |
* closest ancestor is used exclusively. That's what we want, so we don't
|
|
Packit |
90a5c9 |
* bother to have such a function.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
static void *mkconfig(apr_pool_t *p)
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
spconfig *cfg = apr_pcalloc(p, sizeof(spconfig));
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
cfg->enabled = 0;
|
|
Packit |
90a5c9 |
cfg->case_only = 0;
|
|
Packit |
90a5c9 |
return cfg;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* Respond to a callback to create configuration record for a server or
|
|
Packit |
90a5c9 |
* vhost environment.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
static void *create_mconfig_for_server(apr_pool_t *p, server_rec *s)
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
return mkconfig(p);
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* Respond to a callback to create a config record for a specific directory.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
static void *create_mconfig_for_directory(apr_pool_t *p, char *dir)
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
return mkconfig(p);
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* Define the directives specific to this module. This structure is referenced
|
|
Packit |
90a5c9 |
* later by the 'module' structure.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
static const command_rec speling_cmds[] =
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
AP_INIT_FLAG("CheckSpelling", ap_set_flag_slot,
|
|
Packit |
90a5c9 |
(void*)APR_OFFSETOF(spconfig, enabled), OR_OPTIONS,
|
|
Packit |
90a5c9 |
"whether or not to fix miscapitalized/misspelled requests"),
|
|
Packit |
90a5c9 |
AP_INIT_FLAG("CheckCaseOnly", ap_set_flag_slot,
|
|
Packit |
90a5c9 |
(void*)APR_OFFSETOF(spconfig, case_only), OR_OPTIONS,
|
|
Packit |
90a5c9 |
"whether or not to fix only miscapitalized requests"),
|
|
Packit |
90a5c9 |
{ NULL }
|
|
Packit |
90a5c9 |
};
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
typedef enum {
|
|
Packit |
90a5c9 |
SP_IDENTICAL = 0,
|
|
Packit |
90a5c9 |
SP_MISCAPITALIZED = 1,
|
|
Packit |
90a5c9 |
SP_TRANSPOSITION = 2,
|
|
Packit |
90a5c9 |
SP_MISSINGCHAR = 3,
|
|
Packit |
90a5c9 |
SP_EXTRACHAR = 4,
|
|
Packit |
90a5c9 |
SP_SIMPLETYPO = 5,
|
|
Packit |
90a5c9 |
SP_VERYDIFFERENT = 6
|
|
Packit |
90a5c9 |
} sp_reason;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
static const char *sp_reason_str[] =
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
"identical",
|
|
Packit |
90a5c9 |
"miscapitalized",
|
|
Packit |
90a5c9 |
"transposed characters",
|
|
Packit |
90a5c9 |
"character missing",
|
|
Packit |
90a5c9 |
"extra character",
|
|
Packit |
90a5c9 |
"mistyped character",
|
|
Packit |
90a5c9 |
"common basename",
|
|
Packit |
90a5c9 |
};
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
typedef struct {
|
|
Packit |
90a5c9 |
const char *name;
|
|
Packit |
90a5c9 |
sp_reason quality;
|
|
Packit |
90a5c9 |
} misspelled_file;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* spdist() is taken from Kernighan & Pike,
|
|
Packit |
90a5c9 |
* _The_UNIX_Programming_Environment_
|
|
Packit |
90a5c9 |
* and adapted somewhat to correspond better to psychological reality.
|
|
Packit |
90a5c9 |
* (Note the changes to the return values)
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* According to Pollock and Zamora, CACM April 1984 (V. 27, No. 4),
|
|
Packit |
90a5c9 |
* page 363, the correct order for this is:
|
|
Packit |
90a5c9 |
* OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION
|
|
Packit |
90a5c9 |
* thus, it was exactly backwards in the old version. -- PWP
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* This routine was taken out of tcsh's spelling correction code
|
|
Packit |
90a5c9 |
* (tcsh-6.07.04) and re-converted to apache data types ("char" type
|
|
Packit |
90a5c9 |
* instead of tcsh's NLS'ed "Char"). Plus it now ignores the case
|
|
Packit |
90a5c9 |
* during comparisons, so is a "approximate strcasecmp()".
|
|
Packit |
90a5c9 |
* NOTE that is still allows only _one_ real "typo",
|
|
Packit |
90a5c9 |
* it does NOT try to correct multiple errors.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
static sp_reason spdist(const char *s, const char *t)
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
for (; apr_tolower(*s) == apr_tolower(*t); t++, s++) {
|
|
Packit |
90a5c9 |
if (*t == '\0') {
|
|
Packit |
90a5c9 |
return SP_MISCAPITALIZED; /* exact match (sans case) */
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
if (*s) {
|
|
Packit |
90a5c9 |
if (*t) {
|
|
Packit |
90a5c9 |
if (s[1] && t[1] && apr_tolower(*s) == apr_tolower(t[1])
|
|
Packit |
90a5c9 |
&& apr_tolower(*t) == apr_tolower(s[1])
|
|
Packit |
90a5c9 |
&& strcasecmp(s + 2, t + 2) == 0) {
|
|
Packit |
90a5c9 |
return SP_TRANSPOSITION; /* transposition */
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
if (strcasecmp(s + 1, t + 1) == 0) {
|
|
Packit |
90a5c9 |
return SP_SIMPLETYPO; /* 1 char mismatch */
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
if (strcasecmp(s + 1, t) == 0) {
|
|
Packit |
90a5c9 |
return SP_EXTRACHAR; /* extra character */
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
if (*t && strcasecmp(s, t + 1) == 0) {
|
|
Packit |
90a5c9 |
return SP_MISSINGCHAR; /* missing character */
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
return SP_VERYDIFFERENT; /* distance too large to fix. */
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
static int sort_by_quality(const void *left, const void *rite)
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
return (int) (((misspelled_file *) left)->quality)
|
|
Packit |
90a5c9 |
- (int) (((misspelled_file *) rite)->quality);
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
static int check_speling(request_rec *r)
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
spconfig *cfg;
|
|
Packit |
90a5c9 |
char *good, *bad, *postgood, *url;
|
|
Packit |
90a5c9 |
apr_finfo_t dirent;
|
|
Packit |
90a5c9 |
int filoc, dotloc, urlen, pglen;
|
|
Packit |
90a5c9 |
apr_array_header_t *candidates = NULL;
|
|
Packit |
90a5c9 |
apr_dir_t *dir;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
cfg = ap_get_module_config(r->per_dir_config, &speling_module);
|
|
Packit |
90a5c9 |
if (!cfg->enabled) {
|
|
Packit |
90a5c9 |
return DECLINED;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* We only want to worry about GETs */
|
|
Packit |
90a5c9 |
if (r->method_number != M_GET) {
|
|
Packit |
90a5c9 |
return DECLINED;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* We've already got a file of some kind or another */
|
|
Packit |
90a5c9 |
if (r->finfo.filetype != APR_NOFILE) {
|
|
Packit |
90a5c9 |
return DECLINED;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* Not a file request */
|
|
Packit |
90a5c9 |
if (r->proxyreq || !r->filename) {
|
|
Packit |
90a5c9 |
return DECLINED;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* This is a sub request - don't mess with it */
|
|
Packit |
90a5c9 |
if (r->main) {
|
|
Packit |
90a5c9 |
return DECLINED;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* The request should end up looking like this:
|
|
Packit |
90a5c9 |
* r->uri: /correct-url/mispelling/more
|
|
Packit |
90a5c9 |
* r->filename: /correct-file/mispelling r->path_info: /more
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* So we do this in steps. First break r->filename into two pieces
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
filoc = ap_rind(r->filename, '/');
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* Don't do anything if the request doesn't contain a slash, or
|
|
Packit |
90a5c9 |
* requests "/"
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
if (filoc == -1 || strcmp(r->uri, "/") == 0) {
|
|
Packit |
90a5c9 |
return DECLINED;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* good = /correct-file */
|
|
Packit |
90a5c9 |
good = apr_pstrndup(r->pool, r->filename, filoc);
|
|
Packit |
90a5c9 |
/* bad = mispelling */
|
|
Packit |
90a5c9 |
bad = apr_pstrdup(r->pool, r->filename + filoc + 1);
|
|
Packit |
90a5c9 |
/* postgood = mispelling/more */
|
|
Packit |
90a5c9 |
postgood = apr_pstrcat(r->pool, bad, r->path_info, NULL);
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
urlen = strlen(r->uri);
|
|
Packit |
90a5c9 |
pglen = strlen(postgood);
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* Check to see if the URL pieces add up */
|
|
Packit |
90a5c9 |
if (strcmp(postgood, r->uri + (urlen - pglen))) {
|
|
Packit |
90a5c9 |
return DECLINED;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* url = /correct-url */
|
|
Packit |
90a5c9 |
url = apr_pstrndup(r->pool, r->uri, (urlen - pglen));
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* Now open the directory and do ourselves a check... */
|
|
Packit |
90a5c9 |
if (apr_dir_open(&dir, good, r->pool) != APR_SUCCESS) {
|
|
Packit |
90a5c9 |
/* Oops, not a directory... */
|
|
Packit |
90a5c9 |
return DECLINED;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
candidates = apr_array_make(r->pool, 2, sizeof(misspelled_file));
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
dotloc = ap_ind(bad, '.');
|
|
Packit |
90a5c9 |
if (dotloc == -1) {
|
|
Packit |
90a5c9 |
dotloc = strlen(bad);
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
while (apr_dir_read(&dirent, APR_FINFO_DIRENT, dir) == APR_SUCCESS) {
|
|
Packit |
90a5c9 |
sp_reason q;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* If we end up with a "fixed" URL which is identical to the
|
|
Packit |
90a5c9 |
* requested one, we must have found a broken symlink or some such.
|
|
Packit |
90a5c9 |
* Do _not_ try to redirect this, it causes a loop!
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
if (strcmp(bad, dirent.name) == 0) {
|
|
Packit |
90a5c9 |
apr_dir_close(dir);
|
|
Packit |
90a5c9 |
return OK;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* miscapitalization errors are checked first (like, e.g., lower case
|
|
Packit |
90a5c9 |
* file, upper case request)
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
else if (strcasecmp(bad, dirent.name) == 0) {
|
|
Packit |
90a5c9 |
misspelled_file *sp_new;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
sp_new = (misspelled_file *) apr_array_push(candidates);
|
|
Packit |
90a5c9 |
sp_new->name = apr_pstrdup(r->pool, dirent.name);
|
|
Packit |
90a5c9 |
sp_new->quality = SP_MISCAPITALIZED;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* simple typing errors are checked next (like, e.g.,
|
|
Packit |
90a5c9 |
* missing/extra/transposed char)
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
else if ((cfg->case_only == 0)
|
|
Packit |
90a5c9 |
&& ((q = spdist(bad, dirent.name)) != SP_VERYDIFFERENT)) {
|
|
Packit |
90a5c9 |
misspelled_file *sp_new;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
sp_new = (misspelled_file *) apr_array_push(candidates);
|
|
Packit |
90a5c9 |
sp_new->name = apr_pstrdup(r->pool, dirent.name);
|
|
Packit |
90a5c9 |
sp_new->quality = q;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* The spdist() should have found the majority of the misspelled
|
|
Packit |
90a5c9 |
* requests. It is of questionable use to continue looking for
|
|
Packit |
90a5c9 |
* files with the same base name, but potentially of totally wrong
|
|
Packit |
90a5c9 |
* type (index.html <-> index.db).
|
|
Packit |
90a5c9 |
* I would propose to not set the WANT_BASENAME_MATCH define.
|
|
Packit |
90a5c9 |
* 08-Aug-1997 <Martin.Kraemer@Mch.SNI.De>
|
|
Packit |
90a5c9 |
*
|
|
Packit |
90a5c9 |
* However, Alexei replied giving some reasons to add it anyway:
|
|
Packit |
90a5c9 |
* > Oh, by the way, I remembered why having the
|
|
Packit |
90a5c9 |
* > extension-stripping-and-matching stuff is a good idea:
|
|
Packit |
90a5c9 |
* >
|
|
Packit |
90a5c9 |
* > If you're using MultiViews, and have a file named foobar.html,
|
|
Packit |
90a5c9 |
* > which you refer to as "foobar", and someone tried to access
|
|
Packit |
90a5c9 |
* > "Foobar", mod_speling won't find it, because it won't find
|
|
Packit |
90a5c9 |
* > anything matching that spelling. With the extension-munging,
|
|
Packit |
90a5c9 |
* > it would locate "foobar.html". Not perfect, but I ran into
|
|
Packit |
90a5c9 |
* > that problem when I first wrote the module.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
else {
|
|
Packit |
90a5c9 |
#ifdef WANT_BASENAME_MATCH
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* Okay... we didn't find anything. Now we take out the hard-core
|
|
Packit |
90a5c9 |
* power tools. There are several cases here. Someone might have
|
|
Packit |
90a5c9 |
* entered a wrong extension (.htm instead of .html or vice
|
|
Packit |
90a5c9 |
* versa) or the document could be negotiated. At any rate, now
|
|
Packit |
90a5c9 |
* we just compare stuff before the first dot. If it matches, we
|
|
Packit |
90a5c9 |
* figure we got us a match. This can result in wrong things if
|
|
Packit |
90a5c9 |
* there are files of different content types but the same prefix
|
|
Packit |
90a5c9 |
* (e.g. foo.gif and foo.html) This code will pick the first one
|
|
Packit |
90a5c9 |
* it finds. Better than a Not Found, though.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
int entloc = ap_ind(dirent.name, '.');
|
|
Packit |
90a5c9 |
if (entloc == -1) {
|
|
Packit |
90a5c9 |
entloc = strlen(dirent.name);
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
if ((dotloc == entloc)
|
|
Packit |
90a5c9 |
&& !strncasecmp(bad, dirent.name, dotloc)) {
|
|
Packit |
90a5c9 |
misspelled_file *sp_new;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
sp_new = (misspelled_file *) apr_array_push(candidates);
|
|
Packit |
90a5c9 |
sp_new->name = apr_pstrdup(r->pool, dirent.name);
|
|
Packit |
90a5c9 |
sp_new->quality = SP_VERYDIFFERENT;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
#endif
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
apr_dir_close(dir);
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
if (candidates->nelts != 0) {
|
|
Packit |
90a5c9 |
/* Wow... we found us a mispelling. Construct a fixed url */
|
|
Packit |
90a5c9 |
char *nuri;
|
|
Packit |
90a5c9 |
const char *ref;
|
|
Packit |
90a5c9 |
misspelled_file *variant = (misspelled_file *) candidates->elts;
|
|
Packit |
90a5c9 |
int i;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
ref = apr_table_get(r->headers_in, "Referer");
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
qsort((void *) candidates->elts, candidates->nelts,
|
|
Packit |
90a5c9 |
sizeof(misspelled_file), sort_by_quality);
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* Conditions for immediate redirection:
|
|
Packit |
90a5c9 |
* a) the first candidate was not found by stripping the suffix
|
|
Packit |
90a5c9 |
* AND b) there exists only one candidate OR the best match is not
|
|
Packit |
90a5c9 |
* ambiguous
|
|
Packit |
90a5c9 |
* then return a redirection right away.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
if (variant[0].quality != SP_VERYDIFFERENT
|
|
Packit |
90a5c9 |
&& (candidates->nelts == 1
|
|
Packit |
90a5c9 |
|| variant[0].quality != variant[1].quality)) {
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
nuri = ap_escape_uri(r->pool, apr_pstrcat(r->pool, url,
|
|
Packit |
90a5c9 |
variant[0].name,
|
|
Packit |
90a5c9 |
r->path_info, NULL));
|
|
Packit |
90a5c9 |
if (r->parsed_uri.query)
|
|
Packit |
90a5c9 |
nuri = apr_pstrcat(r->pool, nuri, "?", r->parsed_uri.query, NULL);
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
apr_table_setn(r->headers_out, "Location",
|
|
Packit |
90a5c9 |
ap_construct_url(r->pool, nuri, r));
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
ap_log_rerror(APLOG_MARK, APLOG_INFO, APR_SUCCESS,
|
|
Packit |
90a5c9 |
r,
|
|
Packit |
90a5c9 |
ref ? APLOGNO(03224) "Fixed spelling: %s to %s from %s"
|
|
Packit |
90a5c9 |
: APLOGNO(03225) "Fixed spelling: %s to %s%s",
|
|
Packit |
90a5c9 |
r->uri, nuri,
|
|
Packit |
90a5c9 |
(ref ? ref : ""));
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
return HTTP_MOVED_PERMANENTLY;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* Otherwise, a "[300] Multiple Choices" list with the variants is
|
|
Packit |
90a5c9 |
* returned.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
else {
|
|
Packit |
90a5c9 |
apr_pool_t *p;
|
|
Packit |
90a5c9 |
apr_table_t *notes;
|
|
Packit |
90a5c9 |
apr_pool_t *sub_pool;
|
|
Packit |
90a5c9 |
apr_array_header_t *t;
|
|
Packit |
90a5c9 |
apr_array_header_t *v;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
if (r->main == NULL) {
|
|
Packit |
90a5c9 |
p = r->pool;
|
|
Packit |
90a5c9 |
notes = r->notes;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
else {
|
|
Packit |
90a5c9 |
p = r->main->pool;
|
|
Packit |
90a5c9 |
notes = r->main->notes;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
if (apr_pool_create(&sub_pool, p) != APR_SUCCESS)
|
|
Packit |
90a5c9 |
return DECLINED;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
t = apr_array_make(sub_pool, candidates->nelts * 8 + 8,
|
|
Packit |
90a5c9 |
sizeof(char *));
|
|
Packit |
90a5c9 |
v = apr_array_make(sub_pool, candidates->nelts * 5,
|
|
Packit |
90a5c9 |
sizeof(char *));
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* Generate the response text. */
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) =
|
|
Packit |
90a5c9 |
"The document name you requested (";
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = ap_escape_html(sub_pool, r->uri);
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) =
|
|
Packit |
90a5c9 |
") could not be found on this server.\n"
|
|
Packit |
90a5c9 |
"However, we found documents with names similar "
|
|
Packit |
90a5c9 |
"to the one you requested."
|
|
Packit |
90a5c9 |
"Available documents:\n
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
for (i = 0; i < candidates->nelts; ++i) {
|
|
Packit |
90a5c9 |
char *vuri;
|
|
Packit |
90a5c9 |
const char *reason;
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
reason = sp_reason_str[(int) (variant[i].quality)];
|
|
Packit |
90a5c9 |
/* The format isn't very neat... */
|
|
Packit |
90a5c9 |
vuri = apr_pstrcat(sub_pool, url, variant[i].name, r->path_info,
|
|
Packit |
90a5c9 |
(r->parsed_uri.query != NULL) ? "?" : "",
|
|
Packit |
90a5c9 |
(r->parsed_uri.query != NULL)
|
|
Packit |
90a5c9 |
? r->parsed_uri.query : "",
|
|
Packit |
90a5c9 |
NULL);
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(v) = "\"";
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(v) = ap_escape_uri(sub_pool, vuri);
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(v) = "\";\"";
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(v) = reason;
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(v) = "\"";
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = "
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = ap_escape_uri(sub_pool, vuri);
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = "\">";
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = ap_escape_html(sub_pool, vuri);
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = " (";
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = reason;
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = ")\n";
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/*
|
|
Packit |
90a5c9 |
* when we have printed the "close matches" and there are
|
|
Packit |
90a5c9 |
* more "distant matches" (matched by stripping the suffix),
|
|
Packit |
90a5c9 |
* then we insert an additional separator text to suggest
|
|
Packit |
90a5c9 |
* that the user LOOK CLOSELY whether these are really the
|
|
Packit |
90a5c9 |
* files she wanted.
|
|
Packit |
90a5c9 |
*/
|
|
Packit |
90a5c9 |
if (i > 0 && i < candidates->nelts - 1
|
|
Packit |
90a5c9 |
&& variant[i].quality != SP_VERYDIFFERENT
|
|
Packit |
90a5c9 |
&& variant[i + 1].quality == SP_VERYDIFFERENT) {
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) =
|
|
Packit |
90a5c9 |
"\nFurthermore, the following related "
|
|
Packit |
90a5c9 |
"documents were found:\n
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = "\n";
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* If we know there was a referring page, add a note: */
|
|
Packit |
90a5c9 |
if (ref != NULL) {
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) =
|
|
Packit |
90a5c9 |
"Please consider informing the owner of the "
|
|
Packit |
90a5c9 |
"referring page <tt>";
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) = ap_escape_html(sub_pool, ref);
|
|
Packit |
90a5c9 |
*(const char **)apr_array_push(t) =
|
|
Packit |
90a5c9 |
"</tt> about the broken link.\n";
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
/* Pass our apr_table_t to http_protocol.c (see mod_negotiation): */
|
|
Packit |
90a5c9 |
apr_table_setn(notes, "variant-list", apr_array_pstrcat(p, t, 0));
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
apr_table_mergen(r->subprocess_env, "VARIANTS",
|
|
Packit |
90a5c9 |
apr_array_pstrcat(p, v, ','));
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
apr_pool_destroy(sub_pool);
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
|
|
Packit |
90a5c9 |
ref ? APLOGNO(03226) "Spelling fix: %s: %d candidates from %s"
|
|
Packit |
90a5c9 |
: APLOGNO(03227) "Spelling fix: %s: %d candidates%s",
|
|
Packit |
90a5c9 |
r->uri, candidates->nelts,
|
|
Packit |
90a5c9 |
(ref ? ref : ""));
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
return HTTP_MULTIPLE_CHOICES;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
return OK;
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
static void register_hooks(apr_pool_t *p)
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
ap_hook_fixups(check_speling,NULL,NULL,APR_HOOK_LAST);
|
|
Packit |
90a5c9 |
}
|
|
Packit |
90a5c9 |
|
|
Packit |
90a5c9 |
AP_DECLARE_MODULE(speling) =
|
|
Packit |
90a5c9 |
{
|
|
Packit |
90a5c9 |
STANDARD20_MODULE_STUFF,
|
|
Packit |
90a5c9 |
create_mconfig_for_directory, /* create per-dir config */
|
|
Packit |
90a5c9 |
NULL, /* merge per-dir config */
|
|
Packit |
90a5c9 |
create_mconfig_for_server, /* server config */
|
|
Packit |
90a5c9 |
NULL, /* merge server config */
|
|
Packit |
90a5c9 |
speling_cmds, /* command apr_table_t */
|
|
Packit |
90a5c9 |
register_hooks /* register hooks */
|
|
Packit |
90a5c9 |
};
|