Blame server/util_regex.c

Packit 90a5c9
/* Licensed to the Apache Software Foundation (ASF) under one or more
Packit 90a5c9
 * contributor license agreements.  See the NOTICE file distributed with
Packit 90a5c9
 * this work for additional information regarding copyright ownership.
Packit 90a5c9
 * The ASF licenses this file to You under the Apache License, Version 2.0
Packit 90a5c9
 * (the "License"); you may not use this file except in compliance with
Packit 90a5c9
 * the License.  You may obtain a copy of the License at
Packit 90a5c9
 *
Packit 90a5c9
 *     http://www.apache.org/licenses/LICENSE-2.0
Packit 90a5c9
 *
Packit 90a5c9
 * Unless required by applicable law or agreed to in writing, software
Packit 90a5c9
 * distributed under the License is distributed on an "AS IS" BASIS,
Packit 90a5c9
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Packit 90a5c9
 * See the License for the specific language governing permissions and
Packit 90a5c9
 * limitations under the License.
Packit 90a5c9
 */
Packit 90a5c9
Packit 90a5c9
#include "apr.h"
Packit 90a5c9
#include "apr_lib.h"
Packit 90a5c9
#include "apr_pools.h"
Packit 90a5c9
#include "apr_strings.h"
Packit 90a5c9
#include "ap_config.h"
Packit 90a5c9
#include "ap_regex.h"
Packit 90a5c9
#include "httpd.h"
Packit 90a5c9
Packit 90a5c9
static apr_status_t rxplus_cleanup(void *preg)
Packit 90a5c9
{
Packit 90a5c9
    ap_regfree((ap_regex_t *) preg);
Packit 90a5c9
    return APR_SUCCESS;
Packit 90a5c9
}
Packit 90a5c9
Packit 90a5c9
AP_DECLARE(ap_rxplus_t*) ap_rxplus_compile(apr_pool_t *pool,
Packit 90a5c9
                                           const char *pattern)
Packit 90a5c9
{
Packit 90a5c9
    /* perl style patterns
Packit 90a5c9
     * add support for more as and when wanted
Packit 90a5c9
     * substitute: s/rx/subs/
Packit 90a5c9
     * match: m/rx/ or just /rx/
Packit 90a5c9
     */
Packit 90a5c9
Packit 90a5c9
    /* allow any nonalnum delimiter as first or second char.
Packit 90a5c9
     * If we ever use this with non-string pattern we'll need an extra check
Packit 90a5c9
     */
Packit 90a5c9
    const char *endp = 0;
Packit 90a5c9
    const char *str = pattern;
Packit 90a5c9
    const char *rxstr;
Packit 90a5c9
    ap_rxplus_t *ret = apr_pcalloc(pool, sizeof(ap_rxplus_t));
Packit 90a5c9
    char delim = 0;
Packit 90a5c9
    enum { SUBSTITUTE = 's', MATCH = 'm'} action = MATCH;
Packit 90a5c9
Packit 90a5c9
    if (!apr_isalnum(pattern[0])) {
Packit 90a5c9
        delim = *str++;
Packit 90a5c9
    }
Packit 90a5c9
    else if (pattern[0] == 's' && !apr_isalnum(pattern[1])) {
Packit 90a5c9
        action = SUBSTITUTE;
Packit 90a5c9
        delim = pattern[1];
Packit 90a5c9
        str += 2;
Packit 90a5c9
    }
Packit 90a5c9
    else if (pattern[0] == 'm' && !apr_isalnum(pattern[1])) {
Packit 90a5c9
        delim = pattern[1];
Packit 90a5c9
        str += 2;
Packit 90a5c9
    }
Packit 90a5c9
    /* TODO: support perl's after/before */
Packit 90a5c9
    /* FIXME: fix these simplminded delims */
Packit 90a5c9
Packit 90a5c9
    /* we think there's a delimiter.  Allow for it not to be if unmatched */
Packit 90a5c9
    if (delim) {
Packit 90a5c9
        endp = ap_strchr_c(str, delim);
Packit 90a5c9
    }
Packit 90a5c9
    if (!endp) { /* there's no delim or flags */
Packit 90a5c9
        if (ap_regcomp(&ret->rx, pattern, 0) == 0) {
Packit 90a5c9
            apr_pool_cleanup_register(pool, &ret->rx, rxplus_cleanup,
Packit 90a5c9
                                      apr_pool_cleanup_null);
Packit 90a5c9
            return ret;
Packit 90a5c9
        }
Packit 90a5c9
        else {
Packit 90a5c9
            return NULL;
Packit 90a5c9
        }
Packit 90a5c9
    }
Packit 90a5c9
Packit 90a5c9
    /* We have a delimiter.  Use it to extract the regexp */
Packit 90a5c9
    rxstr = apr_pstrmemdup(pool, str, endp-str);
Packit 90a5c9
Packit 90a5c9
    /* If it's a substitution, we need the replacement string
Packit 90a5c9
     * TODO: possible future enhancement - support other parsing
Packit 90a5c9
     * in the replacement string.
Packit 90a5c9
     */
Packit 90a5c9
    if (action == SUBSTITUTE) {
Packit 90a5c9
        str = endp+1;
Packit 90a5c9
        if (!*str || (endp = ap_strchr_c(str, delim), !endp)) {
Packit 90a5c9
            /* missing replacement string is an error */
Packit 90a5c9
            return NULL;
Packit 90a5c9
        }
Packit 90a5c9
        ret->subs = apr_pstrmemdup(pool, str, endp-str);
Packit 90a5c9
    }
Packit 90a5c9
Packit 90a5c9
    /* anything after the current delimiter is flags */
Packit 90a5c9
    while (*++endp) {
Packit 90a5c9
        switch (*endp) {
Packit 90a5c9
        case 'i': ret->flags |= AP_REG_ICASE; break;
Packit 90a5c9
        case 'm': ret->flags |= AP_REG_NEWLINE; break;
Packit 90a5c9
        case 'n': ret->flags |= AP_REG_NOMEM; break;
Packit 90a5c9
        case 'g': ret->flags |= AP_REG_MULTI; break;
Packit 90a5c9
        case 's': ret->flags |= AP_REG_DOTALL; break;
Packit 90a5c9
        case '^': ret->flags |= AP_REG_NOTBOL; break;
Packit 90a5c9
        case '$': ret->flags |= AP_REG_NOTEOL; break;
Packit 90a5c9
        default: break; /* we should probably be stricter here */
Packit 90a5c9
        }
Packit 90a5c9
    }
Packit 90a5c9
    if (ap_regcomp(&ret->rx, rxstr, ret->flags) == 0) {
Packit 90a5c9
        apr_pool_cleanup_register(pool, &ret->rx, rxplus_cleanup,
Packit 90a5c9
                                  apr_pool_cleanup_null);
Packit 90a5c9
    }
Packit 90a5c9
    else {
Packit 90a5c9
        return NULL;
Packit 90a5c9
    }
Packit 90a5c9
    if (!(ret->flags & AP_REG_NOMEM)) {
Packit 90a5c9
        /* count size of memory required, starting at 1 for the whole-match
Packit 90a5c9
         * Simpleminded should be fine 'cos regcomp already checked syntax
Packit 90a5c9
         */
Packit 90a5c9
        ret->nmatch = 1;
Packit 90a5c9
        while (*rxstr) {
Packit 90a5c9
            switch (*rxstr++) {
Packit 90a5c9
            case '\\':  /* next char is escaped - skip it */
Packit 90a5c9
                if (*rxstr != 0) {
Packit 90a5c9
                    ++rxstr;
Packit 90a5c9
                }
Packit 90a5c9
                break;
Packit 90a5c9
            case '(':   /* unescaped bracket implies memory */
Packit 90a5c9
                ++ret->nmatch;
Packit 90a5c9
                break;
Packit 90a5c9
            default:
Packit 90a5c9
                break;
Packit 90a5c9
            }
Packit 90a5c9
        }
Packit 90a5c9
        ret->pmatch = apr_palloc(pool, ret->nmatch*sizeof(ap_regmatch_t));
Packit 90a5c9
    }
Packit 90a5c9
    return ret;
Packit 90a5c9
}
Packit 90a5c9
Packit 90a5c9
AP_DECLARE(int) ap_rxplus_exec(apr_pool_t *pool, ap_rxplus_t *rx,
Packit 90a5c9
                               const char *pattern, char **newpattern)
Packit 90a5c9
{
Packit 90a5c9
    int ret = 1;
Packit 90a5c9
    int startl, oldl, newl, diffsz;
Packit 90a5c9
    const char *remainder;
Packit 90a5c9
    char *subs;
Packit 90a5c9
/* snrf process_regexp from mod_headers */
Packit 90a5c9
    if (ap_regexec(&rx->rx, pattern, rx->nmatch, rx->pmatch, rx->flags) != 0) {
Packit 90a5c9
        rx->match = NULL;
Packit 90a5c9
        return 0; /* no match, nothing to do */
Packit 90a5c9
    }
Packit 90a5c9
    rx->match = pattern;
Packit 90a5c9
    if (rx->subs) {
Packit 90a5c9
        *newpattern = ap_pregsub(pool, rx->subs, pattern,
Packit 90a5c9
                                 rx->nmatch, rx->pmatch);
Packit 90a5c9
        if (!*newpattern) {
Packit 90a5c9
            return 0; /* FIXME - should we do more to handle error? */
Packit 90a5c9
        }
Packit 90a5c9
        startl = rx->pmatch[0].rm_so;
Packit 90a5c9
        oldl = rx->pmatch[0].rm_eo - startl;
Packit 90a5c9
        newl = strlen(*newpattern);
Packit 90a5c9
        diffsz = newl - oldl;
Packit 90a5c9
        remainder = pattern + startl + oldl;
Packit 90a5c9
        if (rx->flags & AP_REG_MULTI) {
Packit 90a5c9
            /* recurse to do any further matches */
Packit 90a5c9
            ret += ap_rxplus_exec(pool, rx, remainder, &subs);
Packit 90a5c9
            if (ret > 1) {
Packit 90a5c9
                /* a further substitution happened */
Packit 90a5c9
                diffsz += strlen(subs) - strlen(remainder);
Packit 90a5c9
                remainder = subs;
Packit 90a5c9
            }
Packit 90a5c9
        }
Packit 90a5c9
        subs  = apr_palloc(pool, strlen(pattern) + 1 + diffsz);
Packit 90a5c9
        memcpy(subs, pattern, startl);
Packit 90a5c9
        memcpy(subs+startl, *newpattern, newl);
Packit 90a5c9
        strcpy(subs+startl+newl, remainder);
Packit 90a5c9
        *newpattern = subs;
Packit 90a5c9
    }
Packit 90a5c9
    return ret;
Packit 90a5c9
}
Packit 90a5c9
#ifdef DOXYGEN
Packit 90a5c9
AP_DECLARE(int) ap_rxplus_nmatch(ap_rxplus_t *rx)
Packit 90a5c9
{
Packit 90a5c9
    return (rx->match != NULL) ? rx->nmatch : 0;
Packit 90a5c9
}
Packit 90a5c9
#endif
Packit 90a5c9
Packit 90a5c9
/* If this blows up on you, see the notes in the header/apidoc
Packit 90a5c9
 * rx->match is a pointer and it's your responsibility to ensure
Packit 90a5c9
 * it hasn't gone out-of-scope since the last ap_rxplus_exec
Packit 90a5c9
 */
Packit 90a5c9
AP_DECLARE(void) ap_rxplus_match(ap_rxplus_t *rx, int n, int *len,
Packit 90a5c9
                                 const char **match)
Packit 90a5c9
{
Packit 90a5c9
    if (n >= 0 && n < ap_rxplus_nmatch(rx)) {
Packit 90a5c9
        *match = rx->match + rx->pmatch[n].rm_so;
Packit 90a5c9
        *len = rx->pmatch[n].rm_eo - rx->pmatch[n].rm_so;
Packit 90a5c9
    }
Packit 90a5c9
    else {
Packit 90a5c9
        *len = -1;
Packit 90a5c9
        *match = NULL;
Packit 90a5c9
    }
Packit 90a5c9
}
Packit 90a5c9
AP_DECLARE(char*) ap_rxplus_pmatch(apr_pool_t *pool, ap_rxplus_t *rx, int n)
Packit 90a5c9
{
Packit 90a5c9
    int len;
Packit 90a5c9
    const char *match;
Packit 90a5c9
    ap_rxplus_match(rx, n, &len, &match);
Packit 90a5c9
    return apr_pstrndup(pool, match, len);
Packit 90a5c9
}