Blame apache2/libinjection/libinjection_sqli.c

Packit 284210
/**
Packit 284210
 * Copyright 2012,2016  Nick Galbreath
Packit 284210
 * nickg@client9.com
Packit 284210
 * BSD License -- see COPYING.txt for details
Packit 284210
 *
Packit 284210
 * https://libinjection.client9.com/
Packit 284210
 *
Packit 284210
 */
Packit 284210
Packit 284210
#include <string.h>
Packit 284210
#include <stdlib.h>
Packit 284210
#include <stdio.h>
Packit 284210
#include <ctype.h>
Packit 284210
#include <assert.h>
Packit 284210
#include <stddef.h>
Packit 284210
Packit 284210
#include "libinjection.h"
Packit 284210
#include "libinjection_sqli.h"
Packit 284210
#include "libinjection_sqli_data.h"
Packit 284210
Packit 284210
#define LIBINJECTION_VERSION "3.9.2"
Packit 284210
Packit 284210
#define LIBINJECTION_SQLI_TOKEN_SIZE  sizeof(((stoken_t*)(0))->val)
Packit 284210
#define LIBINJECTION_SQLI_MAX_TOKENS  5
Packit 284210
Packit 284210
#ifndef TRUE
Packit 284210
#define TRUE 1
Packit 284210
#endif
Packit 284210
#ifndef FALSE
Packit 284210
#define FALSE 0
Packit 284210
#endif
Packit 284210
Packit 284210
#define CHAR_NULL    '\0'
Packit 284210
#define CHAR_SINGLE  '\''
Packit 284210
#define CHAR_DOUBLE  '"'
Packit 284210
#define CHAR_TICK    '`'
Packit 284210
Packit 284210
/* faster than calling out to libc isdigit */
Packit 284210
#define ISDIGIT(a) ((unsigned)((a) - '0') <= 9)
Packit 284210
Packit 284210
#if 0
Packit 284210
#define FOLD_DEBUG printf("%d \t more=%d  pos=%d left=%d\n", __LINE__, more, (int)pos, (int)left);
Packit 284210
#else
Packit 284210
#define FOLD_DEBUG
Packit 284210
#endif
Packit 284210
Packit 284210
/*
Packit 284210
 * not making public just yet
Packit 284210
 */
Packit 284210
typedef enum {
Packit 284210
    TYPE_NONE        = 0
Packit 284210
    , TYPE_KEYWORD     = (int)'k'
Packit 284210
    , TYPE_UNION       = (int)'U'
Packit 284210
    , TYPE_GROUP       = (int)'B'
Packit 284210
    , TYPE_EXPRESSION  = (int)'E'
Packit 284210
    , TYPE_SQLTYPE     = (int)'t'
Packit 284210
    , TYPE_FUNCTION    = (int)'f'
Packit 284210
    , TYPE_BAREWORD    = (int)'n'
Packit 284210
    , TYPE_NUMBER      = (int)'1'
Packit 284210
    , TYPE_VARIABLE    = (int)'v'
Packit 284210
    , TYPE_STRING      = (int)'s'
Packit 284210
    , TYPE_OPERATOR    = (int)'o'
Packit 284210
    , TYPE_LOGIC_OPERATOR = (int)'&'
Packit 284210
    , TYPE_COMMENT     = (int)'c'
Packit 284210
    , TYPE_COLLATE     = (int)'A'
Packit 284210
    , TYPE_LEFTPARENS  = (int)'('
Packit 284210
    , TYPE_RIGHTPARENS = (int)')'  /* not used? */
Packit 284210
    , TYPE_LEFTBRACE   = (int)'{'
Packit 284210
    , TYPE_RIGHTBRACE  = (int)'}'
Packit 284210
    , TYPE_DOT         = (int)'.'
Packit 284210
    , TYPE_COMMA       = (int)','
Packit 284210
    , TYPE_COLON       = (int)':'
Packit 284210
    , TYPE_SEMICOLON   = (int)';'
Packit 284210
    , TYPE_TSQL        = (int)'T'  /* TSQL start */
Packit 284210
    , TYPE_UNKNOWN     = (int)'?'
Packit 284210
    , TYPE_EVIL        = (int)'X'  /* unparsable, abort  */
Packit 284210
    , TYPE_FINGERPRINT = (int)'F'  /* not really a token */
Packit 284210
    , TYPE_BACKSLASH   = (int)'\\'
Packit 284210
} sqli_token_types;
Packit 284210
Packit 284210
/**
Packit 284210
 * Initializes parsing state
Packit 284210
 *
Packit 284210
 */
Packit 284210
static char flag2delim(int flag)
Packit 284210
{
Packit 284210
    if (flag & FLAG_QUOTE_SINGLE) {
Packit 284210
        return CHAR_SINGLE;
Packit 284210
    } else if (flag & FLAG_QUOTE_DOUBLE) {
Packit 284210
        return CHAR_DOUBLE;
Packit 284210
    } else {
Packit 284210
        return CHAR_NULL;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
/* memchr2 finds a string of 2 characters inside another string
Packit 284210
 * This a specialized version of "memmem" or "memchr".
Packit 284210
 * 'memmem' doesn't exist on all platforms
Packit 284210
 *
Packit 284210
 * Porting notes: this is just a special version of
Packit 284210
 *    astring.find("AB")
Packit 284210
 *
Packit 284210
 */
Packit 284210
static const char *
Packit 284210
memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
Packit 284210
{
Packit 284210
    const char *cur = haystack;
Packit 284210
    const char *last = haystack + haystack_len - 1;
Packit 284210
Packit 284210
    if (haystack_len < 2) {
Packit 284210
        return NULL;
Packit 284210
    }
Packit 284210
Packit 284210
    while (cur < last) {
Packit 284210
        /* safe since cur < len - 1 always */
Packit 284210
        if (cur[0] == c0 && cur[1] == c1) {
Packit 284210
            return cur;
Packit 284210
        }
Packit 284210
        cur += 1;
Packit 284210
    }
Packit 284210
Packit 284210
    return NULL;
Packit 284210
}
Packit 284210
Packit 284210
/**
Packit 284210
 * memmem might not exist on some systems
Packit 284210
 */
Packit 284210
static const char *
Packit 284210
my_memmem(const char* haystack, size_t hlen, const char* needle, size_t nlen)
Packit 284210
{
Packit 284210
    const char* cur;
Packit 284210
    const char* last;
Packit 284210
    assert(haystack);
Packit 284210
    assert(needle);
Packit 284210
    assert(nlen > 1);
Packit 284210
    last =  haystack + hlen - nlen;
Packit 284210
    for (cur = haystack; cur <= last; ++cur) {
Packit 284210
        if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) {
Packit 284210
            return cur;
Packit 284210
        }
Packit 284210
    }
Packit 284210
    return NULL;
Packit 284210
}
Packit 284210
Packit 284210
/** Find largest string containing certain characters.
Packit 284210
 *
Packit 284210
 * C Standard library 'strspn' only works for 'c-strings' (null terminated)
Packit 284210
 * This works on arbitrary length.
Packit 284210
 *
Packit 284210
 * Performance notes:
Packit 284210
 *   not critical
Packit 284210
 *
Packit 284210
 * Porting notes:
Packit 284210
 *   if accept is 'ABC', then this function would be similar to
Packit 284210
 *   a_regexp.match(a_str, '[ABC]*'),
Packit 284210
 */
Packit 284210
static size_t
Packit 284210
strlenspn(const char *s, size_t len, const char *accept)
Packit 284210
{
Packit 284210
    size_t i;
Packit 284210
    for (i = 0; i < len; ++i) {
Packit 284210
        /* likely we can do better by inlining this function
Packit 284210
         * but this works for now
Packit 284210
         */
Packit 284210
        if (strchr(accept, s[i]) == NULL) {
Packit 284210
            return i;
Packit 284210
        }
Packit 284210
    }
Packit 284210
    return len;
Packit 284210
}
Packit 284210
Packit 284210
static size_t
Packit 284210
strlencspn(const char *s, size_t len, const char *accept)
Packit 284210
{
Packit 284210
    size_t i;
Packit 284210
    for (i = 0; i < len; ++i) {
Packit 284210
        /* likely we can do better by inlining this function
Packit 284210
         * but this works for now
Packit 284210
         */
Packit 284210
        if (strchr(accept, s[i]) != NULL) {
Packit 284210
            return i;
Packit 284210
        }
Packit 284210
    }
Packit 284210
    return len;
Packit 284210
}
Packit 284210
static int char_is_white(char ch) {
Packit 284210
    /* ' '  space is 0x32
Packit 284210
       '\t  0x09 \011 horizontal tab
Packit 284210
       '\n' 0x0a \012 new line
Packit 284210
       '\v' 0x0b \013 vertical tab
Packit 284210
       '\f' 0x0c \014 new page
Packit 284210
       '\r' 0x0d \015 carriage return
Packit 284210
            0x00 \000 null (oracle)
Packit 284210
            0xa0 \240 is Latin-1
Packit 284210
    */
Packit 284210
    return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
Packit 284210
}
Packit 284210
Packit 284210
/* DANGER DANGER
Packit 284210
 * This is -very specialized function-
Packit 284210
 *
Packit 284210
 * this compares a ALL_UPPER CASE C STRING
Packit 284210
 * with a *arbitrary memory* + length
Packit 284210
 *
Packit 284210
 * Sane people would just make a copy, up-case
Packit 284210
 * and use a hash table.
Packit 284210
 *
Packit 284210
 * Required since libc version uses the current locale
Packit 284210
 * and is much slower.
Packit 284210
 */
Packit 284210
static int cstrcasecmp(const char *a, const char *b, size_t n)
Packit 284210
{
Packit 284210
    char cb;
Packit 284210
Packit 284210
    for (; n > 0; a++, b++, n--) {
Packit 284210
        cb = *b;
Packit 284210
        if (cb >= 'a' && cb <= 'z') {
Packit 284210
            cb -= 0x20;
Packit 284210
        }
Packit 284210
        if (*a != cb) {
Packit 284210
            return *a - cb;
Packit 284210
        } else if (*a == '\0') {
Packit 284210
            return -1;
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    return (*a == 0) ? 0 : 1;
Packit 284210
}
Packit 284210
Packit 284210
/**
Packit 284210
 * Case sensitive string compare.
Packit 284210
 *  Here only to make code more readable
Packit 284210
 */
Packit 284210
static int streq(const char *a, const char *b)
Packit 284210
{
Packit 284210
    return strcmp(a, b) == 0;
Packit 284210
}
Packit 284210
Packit 284210
/**
Packit 284210
 *
Packit 284210
 *
Packit 284210
 *
Packit 284210
 * Porting Notes:
Packit 284210
 *  given a mapping/hash of string to char
Packit 284210
 *  this is just
Packit 284210
 *    typecode = mapping[key.upper()]
Packit 284210
 */
Packit 284210
Packit 284210
static char bsearch_keyword_type(const char *key, size_t len,
Packit 284210
                                 const keyword_t * keywords, size_t numb)
Packit 284210
{
Packit 284210
    size_t pos;
Packit 284210
    size_t left = 0;
Packit 284210
    size_t right = numb - 1;
Packit 284210
Packit 284210
    while (left < right) {
Packit 284210
        pos = (left + right) >> 1;
Packit 284210
Packit 284210
        /* arg0 = upper case only, arg1 = mixed case */
Packit 284210
        if (cstrcasecmp(keywords[pos].word, key, len) < 0) {
Packit 284210
            left = pos + 1;
Packit 284210
        } else {
Packit 284210
            right = pos;
Packit 284210
        }
Packit 284210
    }
Packit 284210
    if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) {
Packit 284210
        return keywords[left].type;
Packit 284210
    } else {
Packit 284210
        return CHAR_NULL;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
static char is_keyword(const char* key, size_t len)
Packit 284210
{
Packit 284210
    return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz);
Packit 284210
}
Packit 284210
Packit 284210
/* st_token methods
Packit 284210
 *
Packit 284210
 * The following functions manipulates the stoken_t type
Packit 284210
 *
Packit 284210
 *
Packit 284210
 */
Packit 284210
Packit 284210
static void st_clear(stoken_t * st)
Packit 284210
{
Packit 284210
    memset(st, 0, sizeof(stoken_t));
Packit 284210
}
Packit 284210
Packit 284210
static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len,
Packit 284210
                           const char value)
Packit 284210
{
Packit 284210
    /* done to eliminate unused warning */
Packit 284210
    (void)len;
Packit 284210
    st->type = (char) stype;
Packit 284210
    st->pos = pos;
Packit 284210
    st->len = 1;
Packit 284210
    st->val[0] = value;
Packit 284210
    st->val[1] = CHAR_NULL;
Packit 284210
}
Packit 284210
Packit 284210
static void st_assign(stoken_t * st, const char stype,
Packit 284210
                      size_t pos, size_t len, const char* value)
Packit 284210
{
Packit 284210
    const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE;
Packit 284210
    size_t last = len < MSIZE ? len : (MSIZE - 1);
Packit 284210
    st->type = (char) stype;
Packit 284210
    st->pos = pos;
Packit 284210
    st->len = last;
Packit 284210
    memcpy(st->val, value, last);
Packit 284210
    st->val[last] = CHAR_NULL;
Packit 284210
}
Packit 284210
Packit 284210
static void st_copy(stoken_t * dest, const stoken_t * src)
Packit 284210
{
Packit 284210
    memcpy(dest, src, sizeof(stoken_t));
Packit 284210
}
Packit 284210
Packit 284210
static int st_is_arithmetic_op(const stoken_t* st)
Packit 284210
{
Packit 284210
    const char ch = st->val[0];
Packit 284210
    return (st->type == TYPE_OPERATOR && st->len == 1 &&
Packit 284210
            (ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%'));
Packit 284210
}
Packit 284210
Packit 284210
static int st_is_unary_op(const stoken_t * st)
Packit 284210
{
Packit 284210
    const char* str = st->val;
Packit 284210
    const size_t len = st->len;
Packit 284210
Packit 284210
    if (st->type != TYPE_OPERATOR) {
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
Packit 284210
    switch (len) {
Packit 284210
    case 1:
Packit 284210
        return *str == '+' || *str == '-' || *str == '!' || *str == '~';
Packit 284210
    case 2:
Packit 284210
        return str[0] == '!' && str[1] == '!';
Packit 284210
    case 3:
Packit 284210
        return cstrcasecmp("NOT", str, 3) == 0;
Packit 284210
    default:
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
/* Parsers
Packit 284210
 *
Packit 284210
 *
Packit 284210
 */
Packit 284210
Packit 284210
static size_t parse_white(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    return sf->pos + 1;
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_operator1(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]);
Packit 284210
    return pos + 1;
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_other(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]);
Packit 284210
    return pos + 1;
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_char(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]);
Packit 284210
    return pos + 1;
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_eol_comment(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    const char *endpos =
Packit 284210
        (const char *) memchr((const void *) (cs + pos), '\n', slen - pos);
Packit 284210
    if (endpos == NULL) {
Packit 284210
        st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos);
Packit 284210
        return slen;
Packit 284210
    } else {
Packit 284210
        st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos, cs + pos);
Packit 284210
        return (size_t)((endpos - cs) + 1);
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
/** In ANSI mode, hash is an operator
Packit 284210
 *  In MYSQL mode, it's a EOL comment like '--'
Packit 284210
 */
Packit 284210
static size_t parse_hash(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    sf->stats_comment_hash += 1;
Packit 284210
    if (sf->flags & FLAG_SQL_MYSQL) {
Packit 284210
        sf->stats_comment_hash += 1;
Packit 284210
        return parse_eol_comment(sf);
Packit 284210
    } else {
Packit 284210
        st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#');
Packit 284210
        return sf->pos + 1;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_dash(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    /*
Packit 284210
     * five cases
Packit 284210
     * 1) --[white]  this is always a SQL comment
Packit 284210
     * 2) --[EOF]    this is a comment
Packit 284210
     * 3) --[notwhite] in MySQL this is NOT a comment but two unary operators
Packit 284210
     * 4) --[notwhite] everyone else thinks this is a comment
Packit 284210
     * 5) -[not dash]  '-' is a unary operator
Packit 284210
     */
Packit 284210
Packit 284210
    if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos+2]) ) {
Packit 284210
        return parse_eol_comment(sf);
Packit 284210
    } else if (pos +2 == slen && cs[pos + 1] == '-') {
Packit 284210
        return parse_eol_comment(sf);
Packit 284210
    } else if (pos + 1 < slen && cs[pos + 1] == '-' && (sf->flags & FLAG_SQL_ANSI)) {
Packit 284210
        /* --[not-white] not-white case:
Packit 284210
         *
Packit 284210
         */
Packit 284210
        sf->stats_comment_ddx += 1;
Packit 284210
        return parse_eol_comment(sf);
Packit 284210
    } else {
Packit 284210
        st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-');
Packit 284210
        return pos + 1;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
Packit 284210
/** This detects MySQL comments, comments that
Packit 284210
 * start with /x!   We just ban these now but
Packit 284210
 * previously we attempted to parse the inside
Packit 284210
 *
Packit 284210
 * For reference:
Packit 284210
 * the form of /x![anything]x/ or /x!12345[anything] x/
Packit 284210
 *
Packit 284210
 * Mysql 3 (maybe 4), allowed this:
Packit 284210
 *    /x!0selectx/ 1;
Packit 284210
 * where 0 could be any number.
Packit 284210
 *
Packit 284210
 * The last version of MySQL 3 was in 2003.
Packit 284210
Packit 284210
 * It is unclear if the MySQL 3 syntax was allowed
Packit 284210
 * in MySQL 4.  The last version of MySQL 4 was in 2008
Packit 284210
 *
Packit 284210
 */
Packit 284210
static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
Packit 284210
{
Packit 284210
    /* so far...
Packit 284210
     * cs[pos] == '/' && cs[pos+1] == '*'
Packit 284210
     */
Packit 284210
Packit 284210
    if (pos + 2 >= len) {
Packit 284210
        /* not a mysql comment */
Packit 284210
        return 0;
Packit 284210
    }
Packit 284210
Packit 284210
    if (cs[pos + 2] != '!') {
Packit 284210
        /* not a mysql comment */
Packit 284210
        return 0;
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * this is a mysql comment
Packit 284210
     *  got "/x!"
Packit 284210
     */
Packit 284210
    return 1;
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_slash(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char* ptr;
Packit 284210
    size_t clen;
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
    const char* cur = cs + pos;
Packit 284210
    char ctype = TYPE_COMMENT;
Packit 284210
    size_t pos1 = pos + 1;
Packit 284210
    if (pos1 == slen || cs[pos1] != '*') {
Packit 284210
        return parse_operator1(sf);
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * skip over initial '/x'
Packit 284210
     */
Packit 284210
    ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
Packit 284210
Packit 284210
    /*
Packit 284210
     * (ptr == NULL) causes false positive in cppcheck 1.61
Packit 284210
     * casting to type seems to fix it
Packit 284210
     */
Packit 284210
    if (ptr == (const char*) NULL) {
Packit 284210
        /* till end of line */
Packit 284210
        clen = slen - pos;
Packit 284210
    } else {
Packit 284210
        clen = (size_t)(ptr + 2 - cur);
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * postgresql allows nested comments which makes
Packit 284210
     * this is incompatible with parsing so
Packit 284210
     * if we find a '/x' inside the coment, then
Packit 284210
     * make a new token.
Packit 284210
     *
Packit 284210
     * Also, Mysql's "conditional" comments for version
Packit 284210
     *  are an automatic black ban!
Packit 284210
     */
Packit 284210
Packit 284210
    if (memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') !=  NULL) {
Packit 284210
        ctype = TYPE_EVIL;
Packit 284210
    } else if (is_mysql_comment(cs, slen, pos)) {
Packit 284210
        ctype = TYPE_EVIL;
Packit 284210
    }
Packit 284210
Packit 284210
    st_assign(sf->current, ctype, pos, clen, cs + pos);
Packit 284210
    return pos + clen;
Packit 284210
}
Packit 284210
Packit 284210
Packit 284210
static size_t parse_backslash(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    /*
Packit 284210
     * Weird MySQL alias for NULL, "\N" (capital N only)
Packit 284210
     */
Packit 284210
    if (pos + 1 < slen && cs[pos +1] == 'N') {
Packit 284210
        st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos);
Packit 284210
        return pos + 2;
Packit 284210
    } else {
Packit 284210
        st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]);
Packit 284210
        return pos + 1;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_operator2(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    char ch;
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    if (pos + 1 >= slen) {
Packit 284210
        return parse_operator1(sf);
Packit 284210
    }
Packit 284210
Packit 284210
    if (pos + 2 < slen &&
Packit 284210
        cs[pos] == '<' &&
Packit 284210
        cs[pos + 1] == '=' &&
Packit 284210
        cs[pos + 2] == '>') {
Packit 284210
        /*
Packit 284210
         * special 3-char operator
Packit 284210
         */
Packit 284210
        st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos);
Packit 284210
        return pos + 3;
Packit 284210
    }
Packit 284210
Packit 284210
    ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2);
Packit 284210
    if (ch != CHAR_NULL) {
Packit 284210
        st_assign(sf->current, ch, pos, 2, cs+pos);
Packit 284210
        return pos + 2;
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * not an operator.. what to do with the two
Packit 284210
     * characters we got?
Packit 284210
     */
Packit 284210
Packit 284210
    if (cs[pos] == ':') {
Packit 284210
        /* ':' is not an operator */
Packit 284210
        st_assign(sf->current, TYPE_COLON, pos, 1, cs+pos);
Packit 284210
        return pos + 1;
Packit 284210
    } else {
Packit 284210
        /*
Packit 284210
         * must be a single char operator
Packit 284210
         */
Packit 284210
        return parse_operator1(sf);
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
/*
Packit 284210
 * Ok!   "  \"   "  one backslash = escaped!
Packit 284210
 *       " \\"   "  two backslash = not escaped!
Packit 284210
 *       "\\\"   "  three backslash = escaped!
Packit 284210
 */
Packit 284210
static int is_backslash_escaped(const char* end, const char* start)
Packit 284210
{
Packit 284210
    const char* ptr;
Packit 284210
    for (ptr = end; ptr >= start; ptr--) {
Packit 284210
        if (*ptr != '\\') {
Packit 284210
            break;
Packit 284210
        }
Packit 284210
    }
Packit 284210
    /* if number of backslashes is odd, it is escaped */
Packit 284210
Packit 284210
    return (end - ptr) & 1;
Packit 284210
}
Packit 284210
Packit 284210
static size_t is_double_delim_escaped(const char* cur,  const char* end)
Packit 284210
{
Packit 284210
    return  ((cur + 1) < end) && *(cur+1) == *cur;
Packit 284210
}
Packit 284210
Packit 284210
/* Look forward for doubling of delimiter
Packit 284210
 *
Packit 284210
 * case 'foo''bar' --> foo''bar
Packit 284210
 *
Packit 284210
 * ending quote isn't duplicated (i.e. escaped)
Packit 284210
 * since it's the wrong char or EOL
Packit 284210
 *
Packit 284210
 */
Packit 284210
static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
Packit 284210
                                stoken_t * st, char delim, size_t offset)
Packit 284210
{
Packit 284210
    /*
Packit 284210
     * offset is to skip the perhaps first quote char
Packit 284210
     */
Packit 284210
    const char *qpos =
Packit 284210
        (const char *) memchr((const void *) (cs + pos + offset), delim,
Packit 284210
                              len - pos - offset);
Packit 284210
Packit 284210
    /*
Packit 284210
     * then keep string open/close info
Packit 284210
     */
Packit 284210
    if (offset > 0) {
Packit 284210
        /*
Packit 284210
         * this is real quote
Packit 284210
         */
Packit 284210
        st->str_open = delim;
Packit 284210
    } else {
Packit 284210
        /*
Packit 284210
         * this was a simulated quote
Packit 284210
         */
Packit 284210
        st->str_open = CHAR_NULL;
Packit 284210
    }
Packit 284210
Packit 284210
    while (TRUE) {
Packit 284210
        if (qpos == NULL) {
Packit 284210
            /*
Packit 284210
             * string ended with no trailing quote
Packit 284210
             * assign what we have
Packit 284210
             */
Packit 284210
            st_assign(st, TYPE_STRING, pos + offset, len - pos - offset, cs + pos + offset);
Packit 284210
            st->str_close = CHAR_NULL;
Packit 284210
            return len;
Packit 284210
        } else if ( is_backslash_escaped(qpos - 1, cs + pos + offset)) {
Packit 284210
            /* keep going, move ahead one character */
Packit 284210
            qpos =
Packit 284210
                (const char *) memchr((const void *) (qpos + 1), delim,
Packit 284210
                                      (size_t)((cs + len) - (qpos + 1)));
Packit 284210
            continue;
Packit 284210
        } else if (is_double_delim_escaped(qpos, cs + len)) {
Packit 284210
            /* keep going, move ahead two characters */
Packit 284210
            qpos =
Packit 284210
                (const char *) memchr((const void *) (qpos + 2), delim,
Packit 284210
                                      (size_t)((cs + len) - (qpos + 2)));
Packit 284210
            continue;
Packit 284210
        } else {
Packit 284210
            /* hey it's a normal string */
Packit 284210
            st_assign(st, TYPE_STRING, pos + offset,
Packit 284210
                      (size_t)(qpos - (cs + pos + offset)), cs + pos + offset);
Packit 284210
            st->str_close = delim;
Packit 284210
            return (size_t)(qpos - cs + 1);
Packit 284210
        }
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
/**
Packit 284210
 * Used when first char is a ' or "
Packit 284210
 */
Packit 284210
static size_t parse_string(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    /*
Packit 284210
     * assert cs[pos] == single or double quote
Packit 284210
     */
Packit 284210
    return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1);
Packit 284210
}
Packit 284210
Packit 284210
/**
Packit 284210
 * Used when first char is:
Packit 284210
 *    N or n:  mysql "National Character set"
Packit 284210
 *    E     :  psql  "Escaped String"
Packit 284210
 */
Packit 284210
static size_t parse_estring(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    if (pos + 2 >= slen || cs[pos+1] != CHAR_SINGLE) {
Packit 284210
        return parse_word(sf);
Packit 284210
    }
Packit 284210
    return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2);
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_ustring(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    if (pos + 2 < slen && cs[pos+1] == '&' && cs[pos+2] == '\'') {
Packit 284210
        sf->pos += 2;
Packit 284210
        pos = parse_string(sf);
Packit 284210
        sf->current->str_open = 'u';
Packit 284210
        if (sf->current->str_close == '\'') {
Packit 284210
            sf->current->str_close = 'u';
Packit 284210
        }
Packit 284210
        return pos;
Packit 284210
    } else {
Packit 284210
        return parse_word(sf);
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_qstring_core(struct libinjection_sqli_state * sf, size_t offset)
Packit 284210
{
Packit 284210
    char ch;
Packit 284210
    const char *strend;
Packit 284210
    const char *cs = sf->s;
Packit 284210
    size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos + offset;
Packit 284210
Packit 284210
    /* if we are already at end of string..
Packit 284210
       if current char is not q or Q
Packit 284210
       if we don't have 2 more chars
Packit 284210
       if char2 != a single quote
Packit 284210
       then, just treat as word
Packit 284210
    */
Packit 284210
    if (pos >= slen ||
Packit 284210
        (cs[pos] != 'q' && cs[pos] != 'Q') ||
Packit 284210
        pos + 2 >= slen ||
Packit 284210
        cs[pos + 1] != '\'') {
Packit 284210
        return parse_word(sf);
Packit 284210
    }
Packit 284210
Packit 284210
    ch = cs[pos + 2];
Packit 284210
Packit 284210
    /* the ch > 127 is un-needed since
Packit 284210
     * we assume char is signed
Packit 284210
     */
Packit 284210
    if (ch < 33 /* || ch > 127 */) {
Packit 284210
        return parse_word(sf);
Packit 284210
    }
Packit 284210
    switch (ch) {
Packit 284210
    case '(' : ch = ')'; break;
Packit 284210
    case '[' : ch = ']'; break;
Packit 284210
    case '{' : ch = '}'; break;
Packit 284210
    case '<' : ch = '>'; break;
Packit 284210
    }
Packit 284210
Packit 284210
    strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\'');
Packit 284210
    if (strend == NULL) {
Packit 284210
        st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3, cs + pos + 3);
Packit 284210
        sf->current->str_open = 'q';
Packit 284210
        sf->current->str_close = CHAR_NULL;
Packit 284210
        return slen;
Packit 284210
    } else {
Packit 284210
        st_assign(sf->current, TYPE_STRING, pos + 3, (size_t)(strend - cs) - pos -  3, cs + pos + 3);
Packit 284210
        sf->current->str_open = 'q';
Packit 284210
        sf->current->str_close = 'q';
Packit 284210
        return (size_t)(strend - cs + 2);
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
/*
Packit 284210
 * Oracle's q string
Packit 284210
 */
Packit 284210
static size_t parse_qstring(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    return parse_qstring_core(sf, 0);
Packit 284210
}
Packit 284210
Packit 284210
/*
Packit 284210
 * mysql's N'STRING' or
Packit 284210
 * ...  Oracle's nq string
Packit 284210
 */
Packit 284210
static size_t parse_nqstring(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
    if (pos + 2 < slen && sf->s[pos+1] == CHAR_SINGLE) {
Packit 284210
        return parse_estring(sf);
Packit 284210
    }
Packit 284210
    return parse_qstring_core(sf, 1);
Packit 284210
}
Packit 284210
Packit 284210
/*
Packit 284210
 * binary literal string
Packit 284210
 * re: [bB]'[01]*'
Packit 284210
 */
Packit 284210
static size_t parse_bstring(struct libinjection_sqli_state *sf)
Packit 284210
{
Packit 284210
    size_t wlen;
Packit 284210
    const char *cs = sf->s;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
    size_t slen = sf->slen;
Packit 284210
Packit 284210
    /* need at least 2 more characters
Packit 284210
     * if next char isn't a single quote, then
Packit 284210
     * continue as normal word
Packit 284210
     */
Packit 284210
    if (pos + 2 >= slen || cs[pos+1] !=  '\'') {
Packit 284210
        return parse_word(sf);
Packit 284210
    }
Packit 284210
Packit 284210
    wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01");
Packit 284210
    if (pos + 2 + wlen  >= slen || cs[pos + 2 + wlen] != '\'') {
Packit 284210
        return parse_word(sf);
Packit 284210
    }
Packit 284210
    st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
Packit 284210
    return pos + 2 + wlen + 1;
Packit 284210
}
Packit 284210
Packit 284210
/*
Packit 284210
 * hex literal string
Packit 284210
 * re: [xX]'[0123456789abcdefABCDEF]*'
Packit 284210
 * mysql has requirement of having EVEN number of chars,
Packit 284210
 *  but pgsql does not
Packit 284210
 */
Packit 284210
static size_t parse_xstring(struct libinjection_sqli_state *sf)
Packit 284210
{
Packit 284210
    size_t wlen;
Packit 284210
    const char *cs = sf->s;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
    size_t slen = sf->slen;
Packit 284210
Packit 284210
    /* need at least 2 more characters
Packit 284210
     * if next char isn't a single quote, then
Packit 284210
     * continue as normal word
Packit 284210
     */
Packit 284210
    if (pos + 2 >= slen || cs[pos+1] !=  '\'') {
Packit 284210
        return parse_word(sf);
Packit 284210
    }
Packit 284210
Packit 284210
    wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef");
Packit 284210
    if (pos + 2 + wlen  >= slen || cs[pos + 2 + wlen] != '\'') {
Packit 284210
        return parse_word(sf);
Packit 284210
    }
Packit 284210
    st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
Packit 284210
    return pos + 2 + wlen + 1;
Packit 284210
}
Packit 284210
Packit 284210
/**
Packit 284210
 * This handles MS SQLSERVER bracket words
Packit 284210
 * http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name
Packit 284210
 *
Packit 284210
 */
Packit 284210
static size_t parse_bword(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    const char *cs = sf->s;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
    const char* endptr = (const char*) memchr(cs + pos, ']', sf->slen - pos);
Packit 284210
    if (endptr == NULL) {
Packit 284210
        st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos);
Packit 284210
        return sf->slen;
Packit 284210
    } else {
Packit 284210
        st_assign(sf->current, TYPE_BAREWORD, pos, (size_t)(endptr - cs) - pos + 1, cs + pos);
Packit 284210
        return (size_t)((endptr - cs) + 1);
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_word(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    char ch;
Packit 284210
    char delim;
Packit 284210
    size_t i;
Packit 284210
    const char *cs = sf->s;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
    size_t wlen = strlencspn(cs + pos, sf->slen - pos,
Packit 284210
                             " []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000");
Packit 284210
Packit 284210
    st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos);
Packit 284210
Packit 284210
    /* now we need to look inside what we good for "." and "`"
Packit 284210
     * and see if what is before is a keyword or not
Packit 284210
     */
Packit 284210
    for (i =0; i < sf->current->len; ++i) {
Packit 284210
        delim = sf->current->val[i];
Packit 284210
        if (delim == '.' || delim == '`') {
Packit 284210
            ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i);
Packit 284210
            if (ch != TYPE_NONE && ch != TYPE_BAREWORD) {
Packit 284210
                /* needed for swig */
Packit 284210
                st_clear(sf->current);
Packit 284210
                /*
Packit 284210
                 * we got something like "SELECT.1"
Packit 284210
                 * or SELECT`column`
Packit 284210
                 */
Packit 284210
                st_assign(sf->current, ch, pos, i, cs + pos);
Packit 284210
                return pos + i;
Packit 284210
            }
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * do normal lookup with word including '.'
Packit 284210
     */
Packit 284210
    if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) {
Packit 284210
Packit 284210
        ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen);
Packit 284210
        if (ch == CHAR_NULL) {
Packit 284210
            ch = TYPE_BAREWORD;
Packit 284210
        }
Packit 284210
        sf->current->type = ch;
Packit 284210
    }
Packit 284210
    return pos + wlen;
Packit 284210
}
Packit 284210
Packit 284210
/* MySQL backticks are a cross between string and
Packit 284210
 * and a bare word.
Packit 284210
 *
Packit 284210
 */
Packit 284210
static size_t parse_tick(struct libinjection_sqli_state* sf)
Packit 284210
{
Packit 284210
    size_t pos =  parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1);
Packit 284210
Packit 284210
    /* we could check to see if start and end of
Packit 284210
     * of string are both "`", i.e. make sure we have
Packit 284210
     * matching set.  `foo` vs. `foo
Packit 284210
     * but I don't think it matters much
Packit 284210
     */
Packit 284210
Packit 284210
    /* check value of string to see if it's a keyword,
Packit 284210
     * function, operator, etc
Packit 284210
     */
Packit 284210
    char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len);
Packit 284210
    if (ch == TYPE_FUNCTION) {
Packit 284210
        /* if it's a function, then convert token */
Packit 284210
        sf->current->type = TYPE_FUNCTION;
Packit 284210
    } else {
Packit 284210
        /* otherwise it's a 'n' type -- mysql treats
Packit 284210
         * everything as a bare word
Packit 284210
         */
Packit 284210
        sf->current->type = TYPE_BAREWORD;
Packit 284210
    }
Packit 284210
    return pos;
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_var(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    size_t xlen;
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos + 1;
Packit 284210
Packit 284210
    /*
Packit 284210
     * var_count is only used to reconstruct
Packit 284210
     * the input.  It counts the number of '@'
Packit 284210
     * seen 0 in the case of NULL, 1 or 2
Packit 284210
     */
Packit 284210
Packit 284210
    /*
Packit 284210
     * move past optional other '@'
Packit 284210
     */
Packit 284210
    if (pos < slen && cs[pos] == '@') {
Packit 284210
        pos += 1;
Packit 284210
        sf->current->count = 2;
Packit 284210
    } else {
Packit 284210
        sf->current->count = 1;
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * MySQL allows @@`version`
Packit 284210
     */
Packit 284210
    if (pos < slen) {
Packit 284210
        if (cs[pos] == '`') {
Packit 284210
            sf->pos = pos;
Packit 284210
            pos = parse_tick(sf);
Packit 284210
            sf->current->type = TYPE_VARIABLE;
Packit 284210
            return pos;
Packit 284210
        } else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) {
Packit 284210
            sf->pos = pos;
Packit 284210
            pos = parse_string(sf);
Packit 284210
            sf->current->type = TYPE_VARIABLE;
Packit 284210
            return pos;
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
Packit 284210
    xlen = strlencspn(cs + pos, slen - pos,
Packit 284210
                     " <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\"");
Packit 284210
    if (xlen == 0) {
Packit 284210
        st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos);
Packit 284210
        return pos;
Packit 284210
    } else {
Packit 284210
        st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos);
Packit 284210
        return pos + xlen;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_money(struct libinjection_sqli_state *sf)
Packit 284210
{
Packit 284210
    size_t xlen;
Packit 284210
    const char* strend;
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
Packit 284210
    if (pos + 1 == slen) {
Packit 284210
        /* end of line */
Packit 284210
        st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
Packit 284210
        return slen;
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * $1,000.00 or $1.000,00 ok!
Packit 284210
     * This also parses $....,,,111 but that's ok
Packit 284210
     */
Packit 284210
Packit 284210
    xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
Packit 284210
    if (xlen == 0) {
Packit 284210
        if (cs[pos + 1] == '$') {
Packit 284210
            /* we have $$ .. find ending $$ and make string */
Packit 284210
            strend = memchr2(cs + pos + 2, slen - pos -2, '$', '$');
Packit 284210
            if (strend == NULL) {
Packit 284210
                /* fell off edge */
Packit 284210
                st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2), cs + pos + 2);
Packit 284210
                sf->current->str_open = '$';
Packit 284210
                sf->current->str_close = CHAR_NULL;
Packit 284210
                return slen;
Packit 284210
            } else {
Packit 284210
                st_assign(sf->current, TYPE_STRING, pos + 2,
Packit 284210
                          (size_t)(strend - (cs + pos + 2)), cs + pos + 2);
Packit 284210
                sf->current->str_open = '$';
Packit 284210
                sf->current->str_close = '$';
Packit 284210
                return (size_t)(strend - cs + 2);
Packit 284210
            }
Packit 284210
        } else {
Packit 284210
            /* ok it's not a number or '$$', but maybe it's pgsql "$ quoted strings" */
Packit 284210
            xlen = strlenspn(cs + pos + 1, slen - pos - 1, "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
Packit 284210
            if (xlen == 0) {
Packit 284210
                /* hmm it's "$" _something_ .. just add $ and keep going*/
Packit 284210
                st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
Packit 284210
                return pos + 1;
Packit 284210
            }
Packit 284210
            /* we have $foobar????? */
Packit 284210
            /* is it $foobar$ */
Packit 284210
            if (pos + xlen + 1 == slen || cs[pos+xlen+1] != '$') {
Packit 284210
                /* not $foobar$, or fell off edge */
Packit 284210
                st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
Packit 284210
                return pos + 1;
Packit 284210
            }
Packit 284210
Packit 284210
            /* we have $foobar$ ... find it again */
Packit 284210
            strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2);
Packit 284210
Packit 284210
            if (strend == NULL || ((size_t)(strend - cs) < (pos+xlen+2))) {
Packit 284210
                /* fell off edge */
Packit 284210
                st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2);
Packit 284210
                sf->current->str_open = '$';
Packit 284210
                sf->current->str_close = CHAR_NULL;
Packit 284210
                return slen;
Packit 284210
            } else {
Packit 284210
                /* got one */
Packit 284210
                st_assign(sf->current, TYPE_STRING, pos+xlen+2,
Packit 284210
                          (size_t)(strend - (cs + pos + xlen + 2)), cs+pos+xlen+2);
Packit 284210
                sf->current->str_open = '$';
Packit 284210
                sf->current->str_close = '$';
Packit 284210
                return (size_t)((strend + xlen + 2) - cs);
Packit 284210
            }
Packit 284210
        }
Packit 284210
    } else if (xlen == 1 && cs[pos + 1] == '.') {
Packit 284210
        /* $. should parsed as a word */
Packit 284210
        return parse_word(sf);
Packit 284210
    } else {
Packit 284210
        st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos);
Packit 284210
        return pos + 1 + xlen;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
static size_t parse_number(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    size_t xlen;
Packit 284210
    size_t start;
Packit 284210
    const char* digits = NULL;
Packit 284210
    const char *cs = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
    size_t pos = sf->pos;
Packit 284210
    int have_e = 0;
Packit 284210
    int have_exp = 0;
Packit 284210
Packit 284210
    /* cs[pos] == '0' has 1/10 chance of being true,
Packit 284210
     * while pos+1< slen is almost always true
Packit 284210
     */
Packit 284210
    if (cs[pos] == '0' && pos + 1 < slen) {
Packit 284210
        if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') {
Packit 284210
            digits = "0123456789ABCDEFabcdef";
Packit 284210
        } else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') {
Packit 284210
            digits = "01";
Packit 284210
        }
Packit 284210
Packit 284210
        if (digits) {
Packit 284210
            xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits);
Packit 284210
            if (xlen == 0) {
Packit 284210
                st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos);
Packit 284210
                return pos + 2;
Packit 284210
            } else {
Packit 284210
                st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos);
Packit 284210
                return pos + 2 + xlen;
Packit 284210
            }
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    start = pos;
Packit 284210
    while (pos < slen && ISDIGIT(cs[pos])) {
Packit 284210
        pos += 1;
Packit 284210
    }
Packit 284210
Packit 284210
    if (pos < slen && cs[pos] == '.') {
Packit 284210
        pos += 1;
Packit 284210
        while (pos < slen && ISDIGIT(cs[pos])) {
Packit 284210
            pos += 1;
Packit 284210
        }
Packit 284210
        if (pos - start == 1) {
Packit 284210
            /* only one character read so far */
Packit 284210
            st_assign_char(sf->current, TYPE_DOT, start, 1, '.');
Packit 284210
            return pos;
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    if (pos < slen) {
Packit 284210
        if (cs[pos] == 'E' || cs[pos] == 'e') {
Packit 284210
            have_e = 1;
Packit 284210
            pos += 1;
Packit 284210
            if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
Packit 284210
                pos += 1;
Packit 284210
            }
Packit 284210
            while (pos < slen && ISDIGIT(cs[pos])) {
Packit 284210
                have_exp = 1;
Packit 284210
                pos += 1;
Packit 284210
            }
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    /* oracle's ending float or double suffix
Packit 284210
     * http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891
Packit 284210
     */
Packit 284210
    if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' || cs[pos] == 'F')) {
Packit 284210
        if (pos + 1 == slen) {
Packit 284210
            /* line ends evaluate "... 1.2f$" as '1.2f' */
Packit 284210
            pos += 1;
Packit 284210
        } else if ((char_is_white(cs[pos+1]) || cs[pos+1] == ';')) {
Packit 284210
            /*
Packit 284210
             * easy case, evaluate "... 1.2f ... as '1.2f'
Packit 284210
             */
Packit 284210
            pos += 1;
Packit 284210
        } else if (cs[pos+1] == 'u' || cs[pos+1] == 'U') {
Packit 284210
            /*
Packit 284210
             * a bit of a hack but makes '1fUNION' parse as '1f UNION'
Packit 284210
             */
Packit 284210
            pos += 1;
Packit 284210
        } else {
Packit 284210
            /* it's like "123FROM" */
Packit 284210
            /* parse as "123" only */
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    if (have_e == 1 && have_exp == 0) {
Packit 284210
        /* very special form of
Packit 284210
         * "1234.e"
Packit 284210
         * "10.10E"
Packit 284210
         * ".E"
Packit 284210
         * this is a WORD not a number!! */
Packit 284210
        st_assign(sf->current, TYPE_BAREWORD, start, pos - start, cs + start);
Packit 284210
    } else {
Packit 284210
        st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start);
Packit 284210
    }
Packit 284210
    return pos;
Packit 284210
}
Packit 284210
Packit 284210
/*
Packit 284210
 * API to return version.  This allows us to increment the version
Packit 284210
 * without having to regenerated the SWIG (or other binding) in minor
Packit 284210
 * releases.
Packit 284210
 */
Packit 284210
const char* libinjection_version()
Packit 284210
{
Packit 284210
    return LIBINJECTION_VERSION;
Packit 284210
}
Packit 284210
Packit 284210
int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    pt2Function fnptr;
Packit 284210
    size_t *pos = &sf->pos;
Packit 284210
    stoken_t *current = sf->current;
Packit 284210
    const char *s = sf->s;
Packit 284210
    const size_t slen = sf->slen;
Packit 284210
Packit 284210
    if (slen == 0) {
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
Packit 284210
    st_clear(current);
Packit 284210
    sf->current = current;
Packit 284210
Packit 284210
    /*
Packit 284210
     * if we are at beginning of string
Packit 284210
     *  and in single-quote or double quote mode
Packit 284210
     *  then pretend the input starts with a quote
Packit 284210
     */
Packit 284210
    if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) {
Packit 284210
        *pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0);
Packit 284210
        sf->stats_tokens += 1;
Packit 284210
        return TRUE;
Packit 284210
    }
Packit 284210
Packit 284210
    while (*pos < slen) {
Packit 284210
Packit 284210
        /*
Packit 284210
         * get current character
Packit 284210
         */
Packit 284210
        const unsigned char ch = (unsigned char) (s[*pos]);
Packit 284210
Packit 284210
        /*
Packit 284210
         * look up the parser, and call it
Packit 284210
         *
Packit 284210
         * Porting Note: this is mapping of char to function
Packit 284210
         *   charparsers[ch]()
Packit 284210
         */
Packit 284210
        fnptr = char_parse_map[ch];
Packit 284210
Packit 284210
        *pos = (*fnptr) (sf);
Packit 284210
Packit 284210
        /*
Packit 284210
         *
Packit 284210
         */
Packit 284210
        if (current->type != CHAR_NULL) {
Packit 284210
            sf->stats_tokens += 1;
Packit 284210
            return TRUE;
Packit 284210
        }
Packit 284210
    }
Packit 284210
    return FALSE;
Packit 284210
}
Packit 284210
Packit 284210
void libinjection_sqli_init(struct libinjection_sqli_state * sf, const char *s, size_t len, int flags)
Packit 284210
{
Packit 284210
    if (flags == 0) {
Packit 284210
        flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
Packit 284210
    }
Packit 284210
Packit 284210
    memset(sf, 0, sizeof(struct libinjection_sqli_state));
Packit 284210
    sf->s        = s;
Packit 284210
    sf->slen     = len;
Packit 284210
    sf->lookup   = libinjection_sqli_lookup_word;
Packit 284210
    sf->userdata = 0;
Packit 284210
    sf->flags    = flags;
Packit 284210
    sf->current  = &(sf->tokenvec[0]);
Packit 284210
}
Packit 284210
Packit 284210
void libinjection_sqli_reset(struct libinjection_sqli_state * sf, int flags)
Packit 284210
{
Packit 284210
    void *userdata = sf->userdata;
Packit 284210
    ptr_lookup_fn lookup = sf->lookup;;
Packit 284210
Packit 284210
    if (flags == 0) {
Packit 284210
        flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
Packit 284210
    }
Packit 284210
    libinjection_sqli_init(sf, sf->s, sf->slen, flags);
Packit 284210
    sf->lookup = lookup;
Packit 284210
    sf->userdata = userdata;
Packit 284210
}
Packit 284210
Packit 284210
void libinjection_sqli_callback(struct libinjection_sqli_state * sf, ptr_lookup_fn fn, void* userdata)
Packit 284210
{
Packit 284210
    if (fn == NULL) {
Packit 284210
        sf->lookup = libinjection_sqli_lookup_word;
Packit 284210
        sf->userdata = (void*)(NULL);
Packit 284210
    } else {
Packit 284210
        sf->lookup = fn;
Packit 284210
        sf->userdata = userdata;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
/** See if two tokens can be merged since they are compound SQL phrases.
Packit 284210
 *
Packit 284210
 * This takes two tokens, and, if they are the right type,
Packit 284210
 * merges their values together.  Then checks to see if the
Packit 284210
 * new value is special using the PHRASES mapping.
Packit 284210
 *
Packit 284210
 * Example: "UNION" + "ALL" ==> "UNION ALL"
Packit 284210
 *
Packit 284210
 * C Security Notes: this is safe to use C-strings (null-terminated)
Packit 284210
 *  since the types involved by definition do not have embedded nulls
Packit 284210
 *  (e.g. there is no keyword with embedded null)
Packit 284210
 *
Packit 284210
 * Porting Notes: since this is C, it's oddly complicated.
Packit 284210
 *  This is just:  multikeywords[token.value + ' ' + token2.value]
Packit 284210
 *
Packit 284210
 */
Packit 284210
static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, stoken_t * b)
Packit 284210
{
Packit 284210
    size_t sz1;
Packit 284210
    size_t sz2;
Packit 284210
    size_t sz3;
Packit 284210
    char tmp[LIBINJECTION_SQLI_TOKEN_SIZE];
Packit 284210
    char ch;
Packit 284210
Packit 284210
    /* first token is of right type? */
Packit 284210
    if (!
Packit 284210
        (a->type == TYPE_KEYWORD ||
Packit 284210
         a->type == TYPE_BAREWORD ||
Packit 284210
         a->type == TYPE_OPERATOR ||
Packit 284210
         a->type == TYPE_UNION ||
Packit 284210
         a->type == TYPE_FUNCTION ||
Packit 284210
         a->type == TYPE_EXPRESSION ||
Packit 284210
         a->type == TYPE_TSQL ||
Packit 284210
         a->type == TYPE_SQLTYPE)) {
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
Packit 284210
    if (!
Packit 284210
        (b->type == TYPE_KEYWORD ||
Packit 284210
         b->type == TYPE_BAREWORD ||
Packit 284210
         b->type == TYPE_OPERATOR ||
Packit 284210
         b->type == TYPE_UNION ||
Packit 284210
         b->type == TYPE_FUNCTION ||
Packit 284210
         b->type == TYPE_EXPRESSION ||
Packit 284210
         b->type == TYPE_TSQL ||
Packit 284210
         b->type == TYPE_SQLTYPE ||
Packit 284210
         b->type == TYPE_LOGIC_OPERATOR)) {
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
Packit 284210
    sz1 = a->len;
Packit 284210
    sz2 = b->len;
Packit 284210
    sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
Packit 284210
    if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for ending null */
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
    /*
Packit 284210
     * oddly annoying  last.val + ' ' + current.val
Packit 284210
     */
Packit 284210
    memcpy(tmp, a->val, sz1);
Packit 284210
    tmp[sz1] = ' ';
Packit 284210
    memcpy(tmp + sz1 + 1, b->val, sz2);
Packit 284210
    tmp[sz3] = CHAR_NULL;
Packit 284210
    ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);
Packit 284210
Packit 284210
    if (ch != CHAR_NULL) {
Packit 284210
        st_assign(a, ch, a->pos, sz3, tmp);
Packit 284210
        return TRUE;
Packit 284210
    } else {
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
Packit 284210
{
Packit 284210
    stoken_t last_comment;
Packit 284210
Packit 284210
    /* POS is the position of where the NEXT token goes */
Packit 284210
    size_t pos = 0;
Packit 284210
Packit 284210
    /* LEFT is a count of how many tokens that are already
Packit 284210
       folded or processed (i.e. part of the fingerprint) */
Packit 284210
    size_t left =  0;
Packit 284210
Packit 284210
    int more = 1;
Packit 284210
Packit 284210
    st_clear(&last_comment);
Packit 284210
Packit 284210
    /* Skip all initial comments, right-parens ( and unary operators
Packit 284210
     *
Packit 284210
     */
Packit 284210
    sf->current = &(sf->tokenvec[0]);
Packit 284210
    while (more) {
Packit 284210
        more = libinjection_sqli_tokenize(sf);
Packit 284210
        if ( ! (sf->current->type == TYPE_COMMENT ||
Packit 284210
                sf->current->type == TYPE_LEFTPARENS ||
Packit 284210
                sf->current->type == TYPE_SQLTYPE ||
Packit 284210
                st_is_unary_op(sf->current))) {
Packit 284210
            break;
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    if (! more) {
Packit 284210
        /* If input was only comments, unary or (, then exit */
Packit 284210
        return 0;
Packit 284210
    } else {
Packit 284210
        /* it's some other token */
Packit 284210
        pos += 1;
Packit 284210
    }
Packit 284210
Packit 284210
    while (1) {
Packit 284210
        FOLD_DEBUG;
Packit 284210
Packit 284210
        /* do we have all the max number of tokens?  if so do
Packit 284210
         * some special cases for 5 tokens
Packit 284210
         */
Packit 284210
        if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {
Packit 284210
            if (
Packit 284210
                (
Packit 284210
                    sf->tokenvec[0].type == TYPE_NUMBER &&
Packit 284210
                    (sf->tokenvec[1].type == TYPE_OPERATOR || sf->tokenvec[1].type == TYPE_COMMA) &&
Packit 284210
                    sf->tokenvec[2].type == TYPE_LEFTPARENS &&
Packit 284210
                    sf->tokenvec[3].type == TYPE_NUMBER &&
Packit 284210
                    sf->tokenvec[4].type == TYPE_RIGHTPARENS
Packit 284210
                    ) ||
Packit 284210
                (
Packit 284210
                    sf->tokenvec[0].type == TYPE_BAREWORD &&
Packit 284210
                    sf->tokenvec[1].type == TYPE_OPERATOR &&
Packit 284210
                    sf->tokenvec[2].type == TYPE_LEFTPARENS &&
Packit 284210
                    (sf->tokenvec[3].type == TYPE_BAREWORD || sf->tokenvec[3].type == TYPE_NUMBER) &&
Packit 284210
                    sf->tokenvec[4].type == TYPE_RIGHTPARENS
Packit 284210
                    ) ||
Packit 284210
                (
Packit 284210
                    sf->tokenvec[0].type == TYPE_NUMBER &&
Packit 284210
                    sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
Packit 284210
                    sf->tokenvec[2].type == TYPE_COMMA &&
Packit 284210
                    sf->tokenvec[3].type == TYPE_LEFTPARENS &&
Packit 284210
                    sf->tokenvec[4].type == TYPE_NUMBER
Packit 284210
                    ) ||
Packit 284210
                (
Packit 284210
                    sf->tokenvec[0].type == TYPE_BAREWORD &&
Packit 284210
                    sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
Packit 284210
                    sf->tokenvec[2].type == TYPE_OPERATOR &&
Packit 284210
                    sf->tokenvec[3].type == TYPE_LEFTPARENS &&
Packit 284210
                    sf->tokenvec[4].type == TYPE_BAREWORD
Packit 284210
                    )
Packit 284210
                )
Packit 284210
            {
Packit 284210
                if (pos > LIBINJECTION_SQLI_MAX_TOKENS) {
Packit 284210
		    st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS]));
Packit 284210
                    pos = 2;
Packit 284210
                    left = 0;
Packit 284210
                } else {
Packit 284210
                    pos = 1;
Packit 284210
                    left = 0;
Packit 284210
                }
Packit 284210
            }
Packit 284210
        }
Packit 284210
Packit 284210
        if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) {
Packit 284210
            left = pos;
Packit 284210
            break;
Packit 284210
        }
Packit 284210
Packit 284210
        /* get up to two tokens */
Packit 284210
        while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) {
Packit 284210
            sf->current = &(sf->tokenvec[pos]);
Packit 284210
            more = libinjection_sqli_tokenize(sf);
Packit 284210
            if (more) {
Packit 284210
                if (sf->current->type == TYPE_COMMENT) {
Packit 284210
                    st_copy(&last_comment, sf->current);
Packit 284210
                } else {
Packit 284210
                    last_comment.type = CHAR_NULL;
Packit 284210
                    pos += 1;
Packit 284210
                }
Packit 284210
            }
Packit 284210
        }
Packit 284210
        FOLD_DEBUG;
Packit 284210
        /* did we get 2 tokens? if not then we are done */
Packit 284210
        if (pos - left < 2) {
Packit 284210
            left = pos;
Packit 284210
            continue;
Packit 284210
        }
Packit 284210
Packit 284210
        /* FOLD: "ss" -> "s"
Packit 284210
         * "foo" "bar" is valid SQL
Packit 284210
         * just ignore second string
Packit 284210
         */
Packit 284210
        if (sf->tokenvec[left].type == TYPE_STRING && sf->tokenvec[left+1].type == TYPE_STRING) {
Packit 284210
            pos -= 1;
Packit 284210
            sf->stats_folds += 1;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && sf->tokenvec[left+1].type == TYPE_SEMICOLON) {
Packit 284210
            /* not sure how various engines handle
Packit 284210
             * 'select 1;;drop table foo' or
Packit 284210
             * 'select 1; /x foo x/; drop table foo'
Packit 284210
             * to prevent surprises, just fold away repeated semicolons
Packit 284210
             */
Packit 284210
            pos -= 1;
Packit 284210
            sf->stats_folds += 1;
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
Packit 284210
                   (st_is_unary_op(&sf->tokenvec[left+1]) ||
Packit 284210
                    sf->tokenvec[left+1].type == TYPE_SQLTYPE)) {
Packit 284210
            pos -= 1;
Packit 284210
            sf->stats_folds += 1;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
Packit 284210
                   st_is_unary_op(&sf->tokenvec[left+1])) {
Packit 284210
            pos -= 1;
Packit 284210
            sf->stats_folds += 1;
Packit 284210
            if (left > 0) {
Packit 284210
                left -= 1;
Packit 284210
            }
Packit 284210
            continue;
Packit 284210
        } else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) {
Packit 284210
            pos -= 1;
Packit 284210
            sf->stats_folds += 1;
Packit 284210
            if (left > 0) {
Packit 284210
                left -= 1;
Packit 284210
            }
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_FUNCTION &&
Packit 284210
		   (sf->tokenvec[left+1].val[0] == 'I' ||
Packit 284210
		    sf->tokenvec[left+1].val[0] == 'i' ) &&
Packit 284210
		   (sf->tokenvec[left+1].val[1] == 'F' ||
Packit 284210
                    sf->tokenvec[left+1].val[1] == 'f' )) {
Packit 284210
            /* IF is normally a function, except in Transact-SQL where it can be used as a
Packit 284210
             * standalone control flow operator, e.g. ; IF 1=1 ...
Packit 284210
             * if found after a semicolon, convert from 'f' type to 'T' type
Packit 284210
             */
Packit 284210
            sf->tokenvec[left+1].type = TYPE_TSQL;
Packit 284210
            /* left += 2; */
Packit 284210
            continue; /* reparse everything, but we probably can advance left, and pos */
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_LEFTPARENS && (
Packit 284210
                       /* TSQL functions but common enough to be column names */
Packit 284210
                       cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
Packit 284210
                       /* Function in MYSQL */
Packit 284210
                       cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
Packit 284210
                       /* Mysql words that act as a variable and are a function */
Packit 284210
Packit 284210
                       /* TSQL current_users is fake-variable */
Packit 284210
                       /* http://msdn.microsoft.com/en-us/library/ms176050.aspx */
Packit 284210
                       cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
Packit 284210
                       )) {
Packit 284210
Packit 284210
            /* pos is the same
Packit 284210
             * other conversions need to go here... for instance
Packit 284210
             * password CAN be a function, coalesce CAN be a function
Packit 284210
             */
Packit 284210
            sf->tokenvec[left].type = TYPE_FUNCTION;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_KEYWORD && (
Packit 284210
                       cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
Packit 284210
                       )) {
Packit 284210
Packit 284210
            if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
Packit 284210
                /* got .... IN ( ...  (or 'NOT IN')
Packit 284210
                 * it's an operator
Packit 284210
                 */
Packit 284210
                sf->tokenvec[left].type = TYPE_OPERATOR;
Packit 284210
            } else {
Packit 284210
                /*
Packit 284210
                 * it's a nothing
Packit 284210
                 */
Packit 284210
                sf->tokenvec[left].type = TYPE_BAREWORD;
Packit 284210
            }
Packit 284210
Packit 284210
            /* "IN" can be used as "IN BOOLEAN MODE" for mysql
Packit 284210
             *  in which case merging of words can be done later
Packit 284210
             * other wise it acts as an equality operator __ IN (values..)
Packit 284210
             *
Packit 284210
             * here we got "IN" "(" so it's an operator.
Packit 284210
             * also back track to handle "NOT IN"
Packit 284210
             * might need to do the same with like
Packit 284210
             * two use cases   "foo" LIKE "BAR" (normal operator)
Packit 284210
             *  "foo" = LIKE(1,2)
Packit 284210
             */
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && (
Packit 284210
                       cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
Packit 284210
                       cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) {
Packit 284210
            if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
Packit 284210
                /* SELECT LIKE(...
Packit 284210
                 * it's a function
Packit 284210
                 */
Packit 284210
                sf->tokenvec[left].type = TYPE_FUNCTION;
Packit 284210
            }
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_SQLTYPE &&
Packit 284210
                   (sf->tokenvec[left+1].type == TYPE_BAREWORD ||
Packit 284210
                    sf->tokenvec[left+1].type == TYPE_NUMBER ||
Packit 284210
                    sf->tokenvec[left+1].type == TYPE_SQLTYPE ||
Packit 284210
                    sf->tokenvec[left+1].type == TYPE_LEFTPARENS ||
Packit 284210
                    sf->tokenvec[left+1].type == TYPE_FUNCTION ||
Packit 284210
                    sf->tokenvec[left+1].type == TYPE_VARIABLE ||
Packit 284210
                    sf->tokenvec[left+1].type == TYPE_STRING))  {
Packit 284210
            st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
Packit 284210
            pos -= 1;
Packit 284210
            sf->stats_folds += 1;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_COLLATE &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_BAREWORD) {
Packit 284210
            /*
Packit 284210
             * there are too many collation types.. so if the bareword has a "_"
Packit 284210
             * then it's TYPE_SQLTYPE
Packit 284210
             */
Packit 284210
            if (strchr(sf->tokenvec[left+1].val, '_') != NULL) {
Packit 284210
                sf->tokenvec[left+1].type = TYPE_SQLTYPE;
Packit 284210
                left = 0;
Packit 284210
            }
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_BACKSLASH) {
Packit 284210
            if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) {
Packit 284210
                /* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc */
Packit 284210
                sf->tokenvec[left].type = TYPE_NUMBER;
Packit 284210
            } else {
Packit 284210
                /* just ignore it.. Again T-SQL seems to parse \1 as "1" */
Packit 284210
                st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
Packit 284210
                pos -= 1;
Packit 284210
                sf->stats_folds += 1;
Packit 284210
            }
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
Packit 284210
            pos -= 1;
Packit 284210
            left = 0;
Packit 284210
            sf->stats_folds += 1;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) {
Packit 284210
            pos -= 1;
Packit 284210
            left = 0;
Packit 284210
            sf->stats_folds += 1;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_LEFTBRACE &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_BAREWORD) {
Packit 284210
Packit 284210
            /*
Packit 284210
             * MySQL Degenerate case --
Packit 284210
             *
Packit 284210
             *   select { ``.``.id };  -- valid !!!
Packit 284210
             *   select { ``.``.``.id };  -- invalid
Packit 284210
             *   select ``.``.id; -- invalid
Packit 284210
             *   select { ``.id }; -- invalid
Packit 284210
             *
Packit 284210
             * so it appears {``.``.id} is a magic case
Packit 284210
             * I suspect this is "current database, current table, field id"
Packit 284210
             *
Packit 284210
             * The folding code can't look at more than 3 tokens, and
Packit 284210
             * I don't want to make two passes.
Packit 284210
             *
Packit 284210
             * Since "{ ``" so rare, we are just going to blacklist it.
Packit 284210
             *
Packit 284210
             * Highly likely this will need revisiting!
Packit 284210
             *
Packit 284210
             * CREDIT @rsalgado 2013-11-25
Packit 284210
             */
Packit 284210
            if (sf->tokenvec[left+1].len == 0) {
Packit 284210
                sf->tokenvec[left+1].type = TYPE_EVIL;
Packit 284210
                return (int)(left+2);
Packit 284210
            }
Packit 284210
            /* weird ODBC / MYSQL  {foo expr} --> expr
Packit 284210
             * but for this rule we just strip away the "{ foo" part
Packit 284210
             */
Packit 284210
            left = 0;
Packit 284210
            pos -= 2;
Packit 284210
            sf->stats_folds += 2;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) {
Packit 284210
            pos -= 1;
Packit 284210
            left = 0;
Packit 284210
            sf->stats_folds += 1;
Packit 284210
            continue;
Packit 284210
        }
Packit 284210
Packit 284210
        /* all cases of handing 2 tokens is done
Packit 284210
           and nothing matched.  Get one more token
Packit 284210
        */
Packit 284210
        FOLD_DEBUG;
Packit 284210
        while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) {
Packit 284210
            sf->current = &(sf->tokenvec[pos]);
Packit 284210
            more = libinjection_sqli_tokenize(sf);
Packit 284210
            if (more) {
Packit 284210
                if (sf->current->type == TYPE_COMMENT) {
Packit 284210
                    st_copy(&last_comment, sf->current);
Packit 284210
                } else {
Packit 284210
                    last_comment.type = CHAR_NULL;
Packit 284210
                    pos += 1;
Packit 284210
                }
Packit 284210
            }
Packit 284210
        }
Packit 284210
Packit 284210
        /* do we have three tokens? If not then we are done */
Packit 284210
        if (pos -left < 3) {
Packit 284210
            left = pos;
Packit 284210
            continue;
Packit 284210
        }
Packit 284210
Packit 284210
        /*
Packit 284210
         * now look for three token folding
Packit 284210
         */
Packit 284210
        if (sf->tokenvec[left].type == TYPE_NUMBER &&
Packit 284210
            sf->tokenvec[left+1].type == TYPE_OPERATOR &&
Packit 284210
            sf->tokenvec[left+2].type == TYPE_NUMBER) {
Packit 284210
            pos -= 2;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_OPERATOR &&
Packit 284210
                   sf->tokenvec[left+1].type != TYPE_LEFTPARENS &&
Packit 284210
                   sf->tokenvec[left+2].type == TYPE_OPERATOR) {
Packit 284210
            left = 0;
Packit 284210
            pos -= 2;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR &&
Packit 284210
                   sf->tokenvec[left+2].type == TYPE_LOGIC_OPERATOR) {
Packit 284210
            pos -= 2;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_VARIABLE &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_OPERATOR &&
Packit 284210
                   (sf->tokenvec[left+2].type == TYPE_VARIABLE ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_NUMBER ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
Packit 284210
            pos -= 2;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_NUMBER ) &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_OPERATOR &&
Packit 284210
                   (sf->tokenvec[left+2].type == TYPE_NUMBER ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
Packit 284210
            pos -= 2;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_NUMBER ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_VARIABLE ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_STRING) &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_OPERATOR &&
Packit 284210
                   streq(sf->tokenvec[left+1].val, "::") &&
Packit 284210
                   sf->tokenvec[left+2].type == TYPE_SQLTYPE) {
Packit 284210
            pos -= 2;
Packit 284210
            left = 0;
Packit 284210
            sf->stats_folds += 2;
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_NUMBER ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_STRING ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_VARIABLE) &&
Packit 284210
                   sf->tokenvec[left+1].type == TYPE_COMMA &&
Packit 284210
                   (sf->tokenvec[left+2].type == TYPE_NUMBER ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_BAREWORD ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_STRING ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_VARIABLE)) {
Packit 284210
            pos -= 2;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_GROUP ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_COMMA) &&
Packit 284210
                   st_is_unary_op(&sf->tokenvec[left+1]) &&
Packit 284210
                   sf->tokenvec[left+2].type == TYPE_LEFTPARENS) {
Packit 284210
            /* got something like SELECT + (, LIMIT + (
Packit 284210
             * remove unary operator
Packit 284210
             */
Packit 284210
            st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
Packit 284210
            pos -= 1;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_KEYWORD ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_EXPRESSION ||
Packit 284210
                    sf->tokenvec[left].type == TYPE_GROUP )  &&
Packit 284210
                   st_is_unary_op(&sf->tokenvec[left+1]) &&
Packit 284210
                   (sf->tokenvec[left+2].type == TYPE_NUMBER ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_BAREWORD ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_VARIABLE ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_STRING ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_FUNCTION )) {
Packit 284210
            /* remove unary operators
Packit 284210
             * select - 1
Packit 284210
             */
Packit 284210
            st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
Packit 284210
            pos -= 1;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_COMMA &&
Packit 284210
                   st_is_unary_op(&sf->tokenvec[left+1]) &&
Packit 284210
                   (sf->tokenvec[left+2].type == TYPE_NUMBER ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_BAREWORD ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_VARIABLE ||
Packit 284210
                    sf->tokenvec[left+2].type == TYPE_STRING)) {
Packit 284210
            /*
Packit 284210
             * interesting case    turn ", -1"  ->> ",1" PLUS we need to back up
Packit 284210
             * one token if possible to see if more folding can be done
Packit 284210
             * "1,-1" --> "1"
Packit 284210
             */
Packit 284210
            st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
Packit 284210
            left = 0;
Packit 284210
            /* pos is >= 3 so this is safe */
Packit 284210
            assert(pos >= 3);
Packit 284210
            pos -= 3;
Packit 284210
            continue;
Packit 284210
        } else if (sf->tokenvec[left].type == TYPE_COMMA &&
Packit 284210
                   st_is_unary_op(&sf->tokenvec[left+1]) &&
Packit 284210
                   sf->tokenvec[left+2].type == TYPE_FUNCTION) {
Packit 284210
Packit 284210
            /* Separate case from above since you end up with
Packit 284210
             * 1,-sin(1) --> 1 (1)
Packit 284210
             * Here, just do
Packit 284210
             * 1,-sin(1) --> 1,sin(1)
Packit 284210
             * just remove unary operator
Packit 284210
             */
Packit 284210
            st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
Packit 284210
            pos -= 1;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_BAREWORD) &&
Packit 284210
                   (sf->tokenvec[left+1].type == TYPE_DOT) &&
Packit 284210
                   (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
Packit 284210
            /* ignore the '.n'
Packit 284210
             * typically is this databasename.table
Packit 284210
             */
Packit 284210
            assert(pos >= 3);
Packit 284210
            pos -= 2;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) &&
Packit 284210
                   (sf->tokenvec[left+1].type == TYPE_DOT) &&
Packit 284210
                   (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
Packit 284210
            /* select . `foo` --> select `foo` */
Packit 284210
            st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
Packit 284210
            pos -= 1;
Packit 284210
            left = 0;
Packit 284210
            continue;
Packit 284210
        } else if ((sf->tokenvec[left].type == TYPE_FUNCTION) &&
Packit 284210
                   (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) &&
Packit 284210
                   (sf->tokenvec[left+2].type != TYPE_RIGHTPARENS)) {
Packit 284210
            /*
Packit 284210
             * whats going on here
Packit 284210
             * Some SQL functions like USER() have 0 args
Packit 284210
             * if we get User(foo), then User is not a function
Packit 284210
             * This should be expanded since it eliminated a lot of false
Packit 284210
             * positives. 
Packit 284210
             */
Packit 284210
            if  (cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0) {
Packit 284210
                sf->tokenvec[left].type = TYPE_BAREWORD;
Packit 284210
            }
Packit 284210
        }
Packit 284210
Packit 284210
        /* no folding -- assume left-most token is
Packit 284210
           is good, now use the existing 2 tokens --
Packit 284210
           do not get another
Packit 284210
        */
Packit 284210
Packit 284210
        left += 1;
Packit 284210
Packit 284210
    } /* while(1) */
Packit 284210
Packit 284210
    /* if we have 4 or less tokens, and we had a comment token
Packit 284210
     * at the end, add it back
Packit 284210
     */
Packit 284210
Packit 284210
    if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) {
Packit 284210
        st_copy(&sf->tokenvec[left], &last_comment);
Packit 284210
        left += 1;
Packit 284210
    }
Packit 284210
Packit 284210
    /* sometimes we grab a 6th token to help
Packit 284210
       determine the type of token 5.
Packit 284210
    */
Packit 284210
    if (left > LIBINJECTION_SQLI_MAX_TOKENS) {
Packit 284210
        left = LIBINJECTION_SQLI_MAX_TOKENS;
Packit 284210
    }
Packit 284210
Packit 284210
    return (int)left;
Packit 284210
}
Packit 284210
Packit 284210
/* secondary api: detects SQLi in a string, GIVEN a context.
Packit 284210
 *
Packit 284210
 * A context can be:
Packit 284210
 *   *  CHAR_NULL (\0), process as is
Packit 284210
 *   *  CHAR_SINGLE ('), process pretending input started with a
Packit 284210
 *          single quote.
Packit 284210
 *   *  CHAR_DOUBLE ("), process pretending input started with a
Packit 284210
 *          double quote.
Packit 284210
 *
Packit 284210
 */
Packit 284210
const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state * sql_state, int flags)
Packit 284210
{
Packit 284210
    int i;
Packit 284210
    int tlen = 0;
Packit 284210
Packit 284210
    libinjection_sqli_reset(sql_state, flags);
Packit 284210
Packit 284210
    tlen = libinjection_sqli_fold(sql_state);
Packit 284210
Packit 284210
    /* Check for magic PHP backquote comment
Packit 284210
     * If:
Packit 284210
     * * last token is of type "bareword"
Packit 284210
     * * And is quoted in a backtick
Packit 284210
     * * And isn't closed
Packit 284210
     * * And it's empty?
Packit 284210
     * Then convert it to comment
Packit 284210
     */
Packit 284210
    if (tlen > 2 &&
Packit 284210
        sql_state->tokenvec[tlen-1].type == TYPE_BAREWORD &&
Packit 284210
        sql_state->tokenvec[tlen-1].str_open == CHAR_TICK &&
Packit 284210
        sql_state->tokenvec[tlen-1].len == 0 &&
Packit 284210
        sql_state->tokenvec[tlen-1].str_close == CHAR_NULL) {
Packit 284210
        sql_state->tokenvec[tlen-1].type = TYPE_COMMENT;
Packit 284210
    }
Packit 284210
Packit 284210
    for (i = 0; i < tlen; ++i) {
Packit 284210
        sql_state->fingerprint[i] = sql_state->tokenvec[i].type;
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * make the fingerprint pattern a c-string (null delimited)
Packit 284210
     */
Packit 284210
    sql_state->fingerprint[tlen] = CHAR_NULL;
Packit 284210
Packit 284210
    /*
Packit 284210
     * check for 'X' in pattern, and then
Packit 284210
     * clear out all tokens
Packit 284210
     *
Packit 284210
     * this means parsing could not be done
Packit 284210
     * accurately due to pgsql's double comments
Packit 284210
     * or other syntax that isn't consistent.
Packit 284210
     * Should be very rare false positive
Packit 284210
     */
Packit 284210
    if (strchr(sql_state->fingerprint, TYPE_EVIL)) {
Packit 284210
        /*  needed for SWIG */
Packit 284210
        memset((void*)sql_state->fingerprint, 0, LIBINJECTION_SQLI_MAX_TOKENS + 1);
Packit 284210
        memset((void*)sql_state->tokenvec[0].val, 0, LIBINJECTION_SQLI_TOKEN_SIZE);
Packit 284210
Packit 284210
        sql_state->fingerprint[0] = TYPE_EVIL;
Packit 284210
Packit 284210
        sql_state->tokenvec[0].type = TYPE_EVIL;
Packit 284210
        sql_state->tokenvec[0].val[0] = TYPE_EVIL;
Packit 284210
        sql_state->tokenvec[1].type = CHAR_NULL;
Packit 284210
    }
Packit 284210
Packit 284210
Packit 284210
    return sql_state->fingerprint;
Packit 284210
}
Packit 284210
Packit 284210
int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state* sql_state)
Packit 284210
{
Packit 284210
    return libinjection_sqli_blacklist(sql_state) &&
Packit 284210
        libinjection_sqli_not_whitelist(sql_state);
Packit 284210
}
Packit 284210
Packit 284210
char libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state, int lookup_type,
Packit 284210
                                   const char* str, size_t len)
Packit 284210
{
Packit 284210
    if (lookup_type == LOOKUP_FINGERPRINT) {
Packit 284210
        return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0';
Packit 284210
    } else {
Packit 284210
        return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz);
Packit 284210
    }
Packit 284210
}
Packit 284210
Packit 284210
int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state)
Packit 284210
{
Packit 284210
    /*
Packit 284210
     * use minimum of 8 bytes to make sure gcc -fstack-protector
Packit 284210
     * works correctly
Packit 284210
     */
Packit 284210
    char fp2[8];
Packit 284210
    char ch;
Packit 284210
    size_t i;
Packit 284210
    size_t len = strlen(sql_state->fingerprint);
Packit 284210
    int patmatch;
Packit 284210
Packit 284210
    if (len < 1) {
Packit 284210
        sql_state->reason = __LINE__;
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
      to keep everything compatible, convert the
Packit 284210
      v0 fingerprint pattern to v1
Packit 284210
      v0: up to 5 chars, mixed case
Packit 284210
      v1: 1 char is '0', up to 5 more chars, upper case
Packit 284210
    */
Packit 284210
Packit 284210
    fp2[0] = '0';
Packit 284210
    for (i = 0; i < len; ++i) {
Packit 284210
        ch = sql_state->fingerprint[i];
Packit 284210
        if (ch >= 'a' && ch <= 'z') {
Packit 284210
            ch -= 0x20;
Packit 284210
        }
Packit 284210
        fp2[i+1] = ch;
Packit 284210
    }
Packit 284210
    fp2[i+1] = '\0';
Packit 284210
Packit 284210
    patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT;
Packit 284210
Packit 284210
    /*
Packit 284210
     * No match.
Packit 284210
     *
Packit 284210
     * Set sql_state->reason to current line number
Packit 284210
     * only for debugging purposes.
Packit 284210
     */
Packit 284210
    if (!patmatch) {
Packit 284210
        sql_state->reason = __LINE__;
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
Packit 284210
    return TRUE;
Packit 284210
}
Packit 284210
Packit 284210
/*
Packit 284210
 * return TRUE if SQLi, false is benign
Packit 284210
 */
Packit 284210
int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
Packit 284210
{
Packit 284210
    /*
Packit 284210
     * We assume we got a SQLi match
Packit 284210
     * This next part just helps reduce false positives.
Packit 284210
     *
Packit 284210
     */
Packit 284210
    char ch;
Packit 284210
    size_t tlen = strlen(sql_state->fingerprint);
Packit 284210
Packit 284210
    if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) {
Packit 284210
        /*
Packit 284210
         * if ending comment is contains 'sp_password' then it's SQLi!
Packit 284210
         * MS Audit log apparently ignores anything with
Packit 284210
         * 'sp_password' in it. Unable to find primary reference to
Packit 284210
         * this "feature" of SQL Server but seems to be known SQLi
Packit 284210
         * technique
Packit 284210
         */
Packit 284210
        if (my_memmem(sql_state->s, sql_state->slen,
Packit 284210
                      "sp_password", strlen("sp_password"))) {
Packit 284210
            sql_state->reason = __LINE__;
Packit 284210
            return TRUE;
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    switch (tlen) {
Packit 284210
    case 2:{
Packit 284210
        /*
Packit 284210
         * case 2 are "very small SQLi" which make them
Packit 284210
         * hard to tell from normal input...
Packit 284210
         */
Packit 284210
Packit 284210
        if (sql_state->fingerprint[1] == TYPE_UNION) {
Packit 284210
            if (sql_state->stats_tokens == 2) {
Packit 284210
                /* not sure why but 1U comes up in SQLi attack
Packit 284210
                 * likely part of parameter splitting/etc.
Packit 284210
                 * lots of reasons why "1 union" might be normal
Packit 284210
                 * input, so beep only if other SQLi things are present
Packit 284210
                 */
Packit 284210
                /* it really is a number and 'union'
Packit 284210
                 * other wise it has folding or comments
Packit 284210
                 */
Packit 284210
                sql_state->reason = __LINE__;
Packit 284210
                return FALSE;
Packit 284210
            } else {
Packit 284210
                sql_state->reason = __LINE__;
Packit 284210
                return TRUE;
Packit 284210
            }
Packit 284210
        }
Packit 284210
        /*
Packit 284210
         * if 'comment' is '#' ignore.. too many FP
Packit 284210
         */
Packit 284210
        if (sql_state->tokenvec[1].val[0] == '#') {
Packit 284210
            sql_state->reason = __LINE__;
Packit 284210
            return FALSE;
Packit 284210
        }
Packit 284210
Packit 284210
        /*
Packit 284210
         * for fingerprint like 'nc', only comments of /x are treated
Packit 284210
         * as SQL... ending comments of "--" and "#" are not SQLi
Packit 284210
         */
Packit 284210
        if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
Packit 284210
            sql_state->tokenvec[1].type == TYPE_COMMENT &&
Packit 284210
            sql_state->tokenvec[1].val[0] != '/') {
Packit 284210
                sql_state->reason = __LINE__;
Packit 284210
                return FALSE;
Packit 284210
        }
Packit 284210
Packit 284210
        /*
Packit 284210
         * if '1c' ends with '/x' then it's SQLi
Packit 284210
         */
Packit 284210
        if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
Packit 284210
            sql_state->tokenvec[1].type == TYPE_COMMENT &&
Packit 284210
            sql_state->tokenvec[1].val[0] == '/') {
Packit 284210
            return TRUE;
Packit 284210
        }
Packit 284210
Packit 284210
        /**
Packit 284210
         * there are some odd base64-looking query string values
Packit 284210
         * 1234-ABCDEFEhfhihwuefi--
Packit 284210
         * which evaluate to "1c"... these are not SQLi
Packit 284210
         * but 1234-- probably is.
Packit 284210
         * Make sure the "1" in "1c" is actually a true decimal number
Packit 284210
         *
Packit 284210
         * Need to check -original- string since the folding step
Packit 284210
         * may have merged tokens, e.g. "1+FOO" is folded into "1"
Packit 284210
         *
Packit 284210
         * Note: evasion: 1*1--
Packit 284210
         */
Packit 284210
        if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
Packit 284210
            sql_state->tokenvec[1].type == TYPE_COMMENT) {
Packit 284210
            if (sql_state->stats_tokens > 2) {
Packit 284210
                /* we have some folding going on, highly likely SQLi */
Packit 284210
                sql_state->reason = __LINE__;
Packit 284210
                return TRUE;
Packit 284210
            }
Packit 284210
            /*
Packit 284210
             * we check that next character after the number is either whitespace,
Packit 284210
             * or '/' or a '-' ==> SQLi.
Packit 284210
             */
Packit 284210
            ch = sql_state->s[sql_state->tokenvec[0].len];
Packit 284210
            if ( ch <= 32 ) {
Packit 284210
                /* next char was whitespace,e.g. "1234 --"
Packit 284210
                 * this isn't exactly correct.. ideally we should skip over all whitespace
Packit 284210
                 * but this seems to be ok for now
Packit 284210
                 */
Packit 284210
                return TRUE;
Packit 284210
            }
Packit 284210
            if (ch == '/' && sql_state->s[sql_state->tokenvec[0].len + 1] == '*') {
Packit 284210
                return TRUE;
Packit 284210
            }
Packit 284210
            if (ch == '-' && sql_state->s[sql_state->tokenvec[0].len + 1] == '-') {
Packit 284210
                return TRUE;
Packit 284210
            }
Packit 284210
Packit 284210
            sql_state->reason = __LINE__;
Packit 284210
            return FALSE;
Packit 284210
        }
Packit 284210
Packit 284210
        /*
Packit 284210
         * detect obvious SQLi scans.. many people put '--' in plain text
Packit 284210
         * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
Packit 284210
         */
Packit 284210
        if ((sql_state->tokenvec[1].len > 2)
Packit 284210
            && sql_state->tokenvec[1].val[0] == '-') {
Packit 284210
            sql_state->reason = __LINE__;
Packit 284210
            return FALSE;
Packit 284210
        }
Packit 284210
Packit 284210
        break;
Packit 284210
    } /* case 2 */
Packit 284210
    case 3:{
Packit 284210
        /*
Packit 284210
         * ...foo' + 'bar...
Packit 284210
         * no opening quote, no closing quote
Packit 284210
         * and each string has data
Packit 284210
         */
Packit 284210
Packit 284210
        if (streq(sql_state->fingerprint, "sos")
Packit 284210
            || streq(sql_state->fingerprint, "s&s")) {
Packit 284210
Packit 284210
                if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
Packit 284210
                    && (sql_state->tokenvec[2].str_close == CHAR_NULL)
Packit 284210
                    && (sql_state->tokenvec[0].str_close == sql_state->tokenvec[2].str_open)) {
Packit 284210
                    /*
Packit 284210
                     * if ....foo" + "bar....
Packit 284210
                     */
Packit 284210
                    sql_state->reason = __LINE__;
Packit 284210
                    return TRUE;
Packit 284210
                }
Packit 284210
                if (sql_state->stats_tokens == 3) {
Packit 284210
                    sql_state->reason = __LINE__;
Packit 284210
                    return FALSE;
Packit 284210
                }
Packit 284210
Packit 284210
                /*
Packit 284210
                 * not SQLi
Packit 284210
                 */
Packit 284210
                sql_state->reason = __LINE__;
Packit 284210
                return FALSE;
Packit 284210
        } else if (streq(sql_state->fingerprint, "s&n") ||
Packit 284210
                   streq(sql_state->fingerprint, "n&1") ||
Packit 284210
                   streq(sql_state->fingerprint, "1&1") ||
Packit 284210
                   streq(sql_state->fingerprint, "1&v") ||
Packit 284210
                   streq(sql_state->fingerprint, "1&s")) {
Packit 284210
            /* 'sexy and 17' not SQLi
Packit 284210
             * 'sexy and 17<18'  SQLi
Packit 284210
             */
Packit 284210
            if (sql_state->stats_tokens == 3) {
Packit 284210
                sql_state->reason = __LINE__;
Packit 284210
                return FALSE;
Packit 284210
            }
Packit 284210
        } else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) {
Packit 284210
            if ((sql_state->tokenvec[1].len < 5) ||
Packit 284210
                cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) {
Packit 284210
                /* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL)
Packit 284210
                 * then treat as safe
Packit 284210
                 */
Packit 284210
                sql_state->reason = __LINE__;
Packit 284210
                return FALSE;
Packit 284210
            }
Packit 284210
        }
Packit 284210
        break;
Packit 284210
    }  /* case 3 */
Packit 284210
    case 4:
Packit 284210
    case 5: {
Packit 284210
        /* nothing right now */
Packit 284210
        break;
Packit 284210
    } /* case 5 */
Packit 284210
    } /* end switch */
Packit 284210
Packit 284210
    return TRUE;
Packit 284210
}
Packit 284210
Packit 284210
/**  Main API, detects SQLi in an input.
Packit 284210
 *
Packit 284210
 *
Packit 284210
 */
Packit 284210
static int reparse_as_mysql(struct libinjection_sqli_state * sql_state)
Packit 284210
{
Packit 284210
    return sql_state->stats_comment_ddx ||
Packit 284210
        sql_state->stats_comment_hash;
Packit 284210
}
Packit 284210
Packit 284210
/*
Packit 284210
 * This function is mostly use with SWIG
Packit 284210
 */
Packit 284210
struct libinjection_sqli_token*
Packit 284210
libinjection_sqli_get_token(struct libinjection_sqli_state * sql_state, int i)
Packit 284210
{
Packit 284210
    if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) {
Packit 284210
        return NULL;
Packit 284210
    }
Packit 284210
    return &(sql_state->tokenvec[i]);
Packit 284210
}
Packit 284210
Packit 284210
int libinjection_is_sqli(struct libinjection_sqli_state * sql_state)
Packit 284210
{
Packit 284210
    const char *s = sql_state->s;
Packit 284210
    size_t slen = sql_state->slen;
Packit 284210
Packit 284210
    /*
Packit 284210
     * no input? not SQLi
Packit 284210
     */
Packit 284210
    if (slen == 0) {
Packit 284210
        return FALSE;
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * test input "as-is"
Packit 284210
     */
Packit 284210
    libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI);
Packit 284210
    if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
Packit 284210
                          sql_state->fingerprint, strlen(sql_state->fingerprint))) {
Packit 284210
        return TRUE;
Packit 284210
    } else if (reparse_as_mysql(sql_state)) {
Packit 284210
        libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_MYSQL);
Packit 284210
        if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
Packit 284210
                              sql_state->fingerprint, strlen(sql_state->fingerprint))) {
Packit 284210
            return TRUE;
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * if input has a single_quote, then
Packit 284210
     * test as if input was actually '
Packit 284210
     * example: if input if "1' = 1", then pretend it's
Packit 284210
     *   "'1' = 1"
Packit 284210
     * Porting Notes: example the same as doing
Packit 284210
     *   is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
Packit 284210
     *
Packit 284210
     */
Packit 284210
    if (memchr(s, CHAR_SINGLE, slen)) {
Packit 284210
        libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI);
Packit 284210
        if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
Packit 284210
                              sql_state->fingerprint, strlen(sql_state->fingerprint))) {
Packit 284210
            return TRUE;
Packit 284210
        } else if (reparse_as_mysql(sql_state)) {
Packit 284210
            libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL);
Packit 284210
            if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
Packit 284210
                                  sql_state->fingerprint, strlen(sql_state->fingerprint))) {
Packit 284210
                return TRUE;
Packit 284210
            }
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * same as above but with a double-quote "
Packit 284210
     */
Packit 284210
    if (memchr(s, CHAR_DOUBLE, slen)) {
Packit 284210
        libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL);
Packit 284210
        if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
Packit 284210
                              sql_state->fingerprint, strlen(sql_state->fingerprint))) {
Packit 284210
            return TRUE;
Packit 284210
        }
Packit 284210
    }
Packit 284210
Packit 284210
    /*
Packit 284210
     * Hurray, input is not SQLi
Packit 284210
     */
Packit 284210
    return FALSE;
Packit 284210
}
Packit 284210
Packit 284210
int libinjection_sqli(const char* input, size_t slen, char fingerprint[])
Packit 284210
{
Packit 284210
    int issqli;
Packit 284210
    struct libinjection_sqli_state state;
Packit 284210
Packit 284210
    libinjection_sqli_init(&state, input, slen, 0);
Packit 284210
    issqli = libinjection_is_sqli(&state);
Packit 284210
    if (issqli) {
Packit 284210
        strcpy(fingerprint, state.fingerprint);
Packit 284210
    } else {
Packit 284210
        fingerprint[0] = '\0';
Packit 284210
    }
Packit 284210
    return issqli;
Packit 284210
}