Blame support/logresolve.c

Packit 90a5c9
/* Licensed to the Apache Software Foundation (ASF) under one or more
Packit 90a5c9
 * contributor license agreements.  See the NOTICE file distributed with
Packit 90a5c9
 * this work for additional information regarding copyright ownership.
Packit 90a5c9
 * The ASF licenses this file to You under the Apache License, Version 2.0
Packit 90a5c9
 * (the "License"); you may not use this file except in compliance with
Packit 90a5c9
 * the License.  You may obtain a copy of the License at
Packit 90a5c9
 *
Packit 90a5c9
 *     http://www.apache.org/licenses/LICENSE-2.0
Packit 90a5c9
 *
Packit 90a5c9
 * Unless required by applicable law or agreed to in writing, software
Packit 90a5c9
 * distributed under the License is distributed on an "AS IS" BASIS,
Packit 90a5c9
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Packit 90a5c9
 * See the License for the specific language governing permissions and
Packit 90a5c9
 * limitations under the License.
Packit 90a5c9
 */
Packit 90a5c9
Packit 90a5c9
/*
Packit 90a5c9
 * logresolve 2.0
Packit 90a5c9
 *
Packit 90a5c9
 * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
Packit 90a5c9
 * UUNET Canada, April 16, 1995
Packit 90a5c9
 *
Packit 90a5c9
 * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
Packit 90a5c9
 * Rewritten again, and ported to APR by Colm MacCarthaigh
Packit 90a5c9
 *
Packit 90a5c9
 * Usage: logresolve [-s filename] [-c] < access_log > new_log
Packit 90a5c9
 *
Packit 90a5c9
 * Arguments:
Packit 90a5c9
 *    -s filename     name of a file to record statistics
Packit 90a5c9
 *    -c              check the DNS for a matching A record for the host.
Packit 90a5c9
 *
Packit 90a5c9
 * Notes:             (For historical interest)
Packit 90a5c9
 *
Packit 90a5c9
 * To generate meaningful statistics from an HTTPD log file, it's good
Packit 90a5c9
 * to have the domain name of each machine that accessed your site, but
Packit 90a5c9
 * doing this on the fly can slow HTTPD down.
Packit 90a5c9
 *
Packit 90a5c9
 * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
Packit 90a5c9
 * resolution off. Before running your stats program, just run your log
Packit 90a5c9
 * file through this program (logresolve) and all of your IP numbers will
Packit 90a5c9
 * be resolved into hostnames (where possible).
Packit 90a5c9
 *
Packit 90a5c9
 * logresolve takes an HTTPD access log (in the COMMON log file format,
Packit 90a5c9
 * or any other format that has the IP number/domain name as the first
Packit 90a5c9
 * field for that matter), and outputs the same file with all of the
Packit 90a5c9
 * domain names looked up. Where no domain name can be found, the IP
Packit 90a5c9
 * number is left in.
Packit 90a5c9
 *
Packit 90a5c9
 * To minimize impact on your nameserver, logresolve has its very own
Packit 90a5c9
 * internal hash-table cache. This means that each IP number will only
Packit 90a5c9
 * be looked up the first time it is found in the log file.
Packit 90a5c9
 *
Packit 90a5c9
 * The -c option causes logresolve to apply the same check as httpd
Packit 90a5c9
 * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
Packit 90a5c9
 * address, it looks up the IP addresses for the hostname and checks
Packit 90a5c9
 * that one of these matches the original address.
Packit 90a5c9
 */
Packit 90a5c9
Packit 90a5c9
#include "apr.h"
Packit 90a5c9
#include "apr_lib.h"
Packit 90a5c9
#include "apr_hash.h"
Packit 90a5c9
#include "apr_getopt.h"
Packit 90a5c9
#include "apr_strings.h"
Packit 90a5c9
#include "apr_file_io.h"
Packit 90a5c9
#include "apr_network_io.h"
Packit 90a5c9
Packit 90a5c9
#if APR_HAVE_STDLIB_H
Packit 90a5c9
#include <stdlib.h>
Packit 90a5c9
#endif
Packit 90a5c9
Packit 90a5c9
#define READ_BUF_SIZE  128*1024
Packit 90a5c9
#define WRITE_BUF_SIZE 128*1024
Packit 90a5c9
#define LINE_BUF_SIZE  128*1024
Packit 90a5c9
Packit 90a5c9
static apr_file_t *errfile;
Packit 90a5c9
static const char *shortname = "logresolve";
Packit 90a5c9
static apr_hash_t *cache;
Packit 90a5c9
Packit 90a5c9
/* Statistics */
Packit 90a5c9
static int cachehits = 0;
Packit 90a5c9
static int cachesize = 0;
Packit 90a5c9
static int entries = 0;
Packit 90a5c9
static int resolves = 0;
Packit 90a5c9
static int withname = 0;
Packit 90a5c9
static int doublefailed = 0;
Packit 90a5c9
static int noreverse = 0;
Packit 90a5c9
Packit 90a5c9
/*
Packit 90a5c9
 * prints various statistics to output
Packit 90a5c9
 */
Packit 90a5c9
#define NL APR_EOL_STR
Packit 90a5c9
static void print_statistics (apr_file_t *output)
Packit 90a5c9
{
Packit 90a5c9
    apr_file_printf(output, "logresolve Statistics:" NL);
Packit 90a5c9
    apr_file_printf(output, "Entries: %d" NL, entries);
Packit 90a5c9
    apr_file_printf(output, "    With name   : %d" NL, withname);
Packit 90a5c9
    apr_file_printf(output, "    Resolves    : %d" NL, resolves);
Packit 90a5c9
Packit 90a5c9
    if (noreverse) {
Packit 90a5c9
        apr_file_printf(output, "    - No reverse : %d" NL,
Packit 90a5c9
                        noreverse);
Packit 90a5c9
    }
Packit 90a5c9
Packit 90a5c9
    if (doublefailed) {
Packit 90a5c9
        apr_file_printf(output, "    - Double lookup failed : %d" NL,
Packit 90a5c9
                        doublefailed);
Packit 90a5c9
    }
Packit 90a5c9
Packit 90a5c9
    apr_file_printf(output, "Cache hits      : %d" NL, cachehits);
Packit 90a5c9
    apr_file_printf(output, "Cache size      : %d" NL, cachesize);
Packit 90a5c9
}
Packit 90a5c9
Packit 90a5c9
/*
Packit 90a5c9
 * usage info
Packit 90a5c9
 */
Packit 90a5c9
static void usage(void)
Packit 90a5c9
{
Packit 90a5c9
    apr_file_printf(errfile,
Packit 90a5c9
    "%s -- Resolve IP-addresses to hostnames in Apache log files."           NL
Packit 90a5c9
    "Usage: %s [-s STATFILE] [-c]"                                           NL
Packit 90a5c9
                                                                             NL
Packit 90a5c9
    "Options:"                                                               NL
Packit 90a5c9
    "  -s   Record statistics to STATFILE when finished."                    NL
Packit 90a5c9
                                                                             NL
Packit 90a5c9
    "  -c   Perform double lookups when resolving IP addresses."             NL,
Packit 90a5c9
    shortname, shortname);
Packit 90a5c9
    exit(1);
Packit 90a5c9
}
Packit 90a5c9
#undef NL
Packit 90a5c9
Packit 90a5c9
int main(int argc, const char * const argv[])
Packit 90a5c9
{
Packit 90a5c9
    apr_file_t * outfile;
Packit 90a5c9
    apr_file_t * infile;
Packit 90a5c9
    apr_getopt_t * o;
Packit 90a5c9
    apr_pool_t * pool;
Packit 90a5c9
    apr_pool_t *pline;
Packit 90a5c9
    apr_status_t status;
Packit 90a5c9
    const char * arg;
Packit 90a5c9
    char * stats = NULL;
Packit 90a5c9
    char * inbuffer;
Packit 90a5c9
    char * outbuffer;
Packit 90a5c9
    char * line;
Packit 90a5c9
    int doublelookups = 0;
Packit 90a5c9
Packit 90a5c9
    if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
Packit 90a5c9
        return 1;
Packit 90a5c9
    }
Packit 90a5c9
    atexit(apr_terminate);
Packit 90a5c9
Packit 90a5c9
    if (argc) {
Packit 90a5c9
        shortname = apr_filepath_name_get(argv[0]);
Packit 90a5c9
    }
Packit 90a5c9
Packit 90a5c9
    if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
Packit 90a5c9
        return 1;
Packit 90a5c9
    }
Packit 90a5c9
    apr_file_open_stderr(&errfile, pool);
Packit 90a5c9
    apr_getopt_init(&o, pool, argc, argv);
Packit 90a5c9
Packit 90a5c9
    while (1) {
Packit 90a5c9
        char opt;
Packit 90a5c9
        status = apr_getopt(o, "s:c", &opt, &arg;;
Packit 90a5c9
        if (status == APR_EOF) {
Packit 90a5c9
            break;
Packit 90a5c9
        }
Packit 90a5c9
        else if (status != APR_SUCCESS) {
Packit 90a5c9
            usage();
Packit 90a5c9
        }
Packit 90a5c9
        else {
Packit 90a5c9
            switch (opt) {
Packit 90a5c9
            case 'c':
Packit 90a5c9
                if (doublelookups) {
Packit 90a5c9
                    usage();
Packit 90a5c9
                }
Packit 90a5c9
                doublelookups = 1;
Packit 90a5c9
                break;
Packit 90a5c9
            case 's':
Packit 90a5c9
                if (stats) {
Packit 90a5c9
                    usage();
Packit 90a5c9
                }
Packit 90a5c9
                stats = apr_pstrdup(pool, arg);
Packit 90a5c9
                break;
Packit 90a5c9
            } /* switch */
Packit 90a5c9
        } /* else */
Packit 90a5c9
    } /* while */
Packit 90a5c9
Packit 90a5c9
    apr_file_open_stdout(&outfile, pool);
Packit 90a5c9
    apr_file_open_stdin(&infile, pool);
Packit 90a5c9
Packit 90a5c9
    /* Allocate two new 10k file buffers */
Packit 90a5c9
    if (   (outbuffer = apr_palloc(pool, WRITE_BUF_SIZE)) == NULL
Packit 90a5c9
        || (inbuffer  = apr_palloc(pool, READ_BUF_SIZE))  == NULL
Packit 90a5c9
        || (line      = apr_palloc(pool, LINE_BUF_SIZE))  == NULL) {
Packit 90a5c9
        return 1;
Packit 90a5c9
    }
Packit 90a5c9
Packit 90a5c9
    /* Set the buffers */
Packit 90a5c9
    apr_file_buffer_set(infile, inbuffer, READ_BUF_SIZE);
Packit 90a5c9
    apr_file_buffer_set(outfile, outbuffer, WRITE_BUF_SIZE);
Packit 90a5c9
Packit 90a5c9
    cache = apr_hash_make(pool);
Packit 90a5c9
    if (apr_pool_create(&pline, pool) != APR_SUCCESS) {
Packit 90a5c9
        return 1;
Packit 90a5c9
    }
Packit 90a5c9
Packit 90a5c9
    while (apr_file_gets(line, LINE_BUF_SIZE, infile) == APR_SUCCESS) {
Packit 90a5c9
        char *hostname;
Packit 90a5c9
        char *space;
Packit 90a5c9
        apr_sockaddr_t *ip;
Packit 90a5c9
        apr_sockaddr_t *ipdouble;
Packit 90a5c9
        char dummy[] = " " APR_EOL_STR;
Packit 90a5c9
Packit 90a5c9
        if (line[0] == '\0') {
Packit 90a5c9
            continue;
Packit 90a5c9
        }
Packit 90a5c9
Packit 90a5c9
        /* Count our log entries */
Packit 90a5c9
        entries++;
Packit 90a5c9
Packit 90a5c9
        /* Check if this could even be an IP address */
Packit 90a5c9
        if (!apr_isxdigit(line[0]) && line[0] != ':') {
Packit 90a5c9
            withname++;
Packit 90a5c9
            apr_file_puts(line, outfile);
Packit 90a5c9
            continue;
Packit 90a5c9
        }
Packit 90a5c9
Packit 90a5c9
        /* Terminate the line at the next space */
Packit 90a5c9
        if ((space = strchr(line, ' ')) != NULL) {
Packit 90a5c9
            *space = '\0';
Packit 90a5c9
        }
Packit 90a5c9
        else {
Packit 90a5c9
            space = dummy;
Packit 90a5c9
        }
Packit 90a5c9
Packit 90a5c9
        /* See if we have it in our cache */
Packit 90a5c9
        hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING);
Packit 90a5c9
        if (hostname) {
Packit 90a5c9
            apr_file_printf(outfile, "%s %s", hostname, space + 1);
Packit 90a5c9
            cachehits++;
Packit 90a5c9
            continue;
Packit 90a5c9
        }
Packit 90a5c9
Packit 90a5c9
        /* Parse the IP address */
Packit 90a5c9
        status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC, 0, 0, pline);
Packit 90a5c9
        if (status != APR_SUCCESS) {
Packit 90a5c9
            /* Not an IP address */
Packit 90a5c9
            withname++;
Packit 90a5c9
            *space = ' ';
Packit 90a5c9
            apr_file_puts(line, outfile);
Packit 90a5c9
            continue;
Packit 90a5c9
        }
Packit 90a5c9
Packit 90a5c9
        /* This does not make much sense, but historically "resolves" means
Packit 90a5c9
         * "parsed as an IP address". It does not mean we actually resolved
Packit 90a5c9
         * the IP address into a hostname.
Packit 90a5c9
         */
Packit 90a5c9
        resolves++;
Packit 90a5c9
Packit 90a5c9
        /* From here on our we cache each result, even if it was not
Packit 90a5c9
         * successful
Packit 90a5c9
         */
Packit 90a5c9
        cachesize++;
Packit 90a5c9
Packit 90a5c9
        /* Try and perform a reverse lookup */
Packit 90a5c9
        status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
Packit 90a5c9
        if (status || hostname == NULL) {
Packit 90a5c9
            /* Could not perform a reverse lookup */
Packit 90a5c9
            *space = ' ';
Packit 90a5c9
            apr_file_puts(line, outfile);
Packit 90a5c9
            noreverse++;
Packit 90a5c9
Packit 90a5c9
            /* Add to cache */
Packit 90a5c9
            *space = '\0';
Packit 90a5c9
            apr_hash_set(cache, line, APR_HASH_KEY_STRING,
Packit 90a5c9
                         apr_pstrdup(apr_hash_pool_get(cache), line));
Packit 90a5c9
            continue;
Packit 90a5c9
        }
Packit 90a5c9
Packit 90a5c9
        /* Perform a double lookup */
Packit 90a5c9
        if (doublelookups) {
Packit 90a5c9
            /* Do a forward lookup on our hostname, and see if that matches our
Packit 90a5c9
             * original IP address.
Packit 90a5c9
             */
Packit 90a5c9
            status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
Packit 90a5c9
                                           0, pline);
Packit 90a5c9
            if (status != APR_SUCCESS ||
Packit 90a5c9
                memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
Packit 90a5c9
                /* Double-lookup failed  */
Packit 90a5c9
                *space = ' ';
Packit 90a5c9
                apr_file_puts(line, outfile);
Packit 90a5c9
                doublefailed++;
Packit 90a5c9
Packit 90a5c9
                /* Add to cache */
Packit 90a5c9
                *space = '\0';
Packit 90a5c9
                apr_hash_set(cache, line, APR_HASH_KEY_STRING,
Packit 90a5c9
                             apr_pstrdup(apr_hash_pool_get(cache), line));
Packit 90a5c9
                continue;
Packit 90a5c9
            }
Packit 90a5c9
        }
Packit 90a5c9
Packit 90a5c9
        /* Output the resolved name */
Packit 90a5c9
        apr_file_printf(outfile, "%s %s", hostname, space + 1);
Packit 90a5c9
Packit 90a5c9
        /* Store it in the cache */
Packit 90a5c9
        apr_hash_set(cache, line, APR_HASH_KEY_STRING,
Packit 90a5c9
                     apr_pstrdup(apr_hash_pool_get(cache), hostname));
Packit 90a5c9
Packit 90a5c9
        apr_pool_clear(pline);
Packit 90a5c9
    }
Packit 90a5c9
Packit 90a5c9
    /* Flush any remaining output */
Packit 90a5c9
    apr_file_flush(outfile);
Packit 90a5c9
Packit 90a5c9
    if (stats) {
Packit 90a5c9
        apr_file_t *statsfile;
Packit 90a5c9
        if (apr_file_open(&statsfile, stats,
Packit 90a5c9
                          APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
Packit 90a5c9
                          APR_OS_DEFAULT, pool) != APR_SUCCESS) {
Packit 90a5c9
            apr_file_printf(errfile, "%s: Could not open %s for writing.",
Packit 90a5c9
                            shortname, stats);
Packit 90a5c9
            return 1;
Packit 90a5c9
        }
Packit 90a5c9
        print_statistics(statsfile);
Packit 90a5c9
        apr_file_close(statsfile);
Packit 90a5c9
    }
Packit 90a5c9
Packit 90a5c9
    return 0;
Packit 90a5c9
}