Blame HTTPCache.cc

Packit a4aae4
Packit a4aae4
// -*- mode: c++; c-basic-offset:4 -*-
Packit a4aae4
Packit a4aae4
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
Packit a4aae4
// Access Protocol.
Packit a4aae4
Packit a4aae4
// Copyright (c) 2002,2003 OPeNDAP, Inc.
Packit a4aae4
// Author: James Gallagher <jgallagher@opendap.org>
Packit a4aae4
//
Packit a4aae4
// This library is free software; you can redistribute it and/or
Packit a4aae4
// modify it under the terms of the GNU Lesser General Public
Packit a4aae4
// License as published by the Free Software Foundation; either
Packit a4aae4
// version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
//
Packit a4aae4
// This library is distributed in the hope that it will be useful,
Packit a4aae4
// but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
// Lesser General Public License for more details.
Packit a4aae4
//
Packit a4aae4
// You should have received a copy of the GNU Lesser General Public
Packit a4aae4
// License along with this library; if not, write to the Free Software
Packit a4aae4
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Packit a4aae4
//
Packit a4aae4
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
Packit a4aae4
Packit a4aae4
#include "config.h"
Packit a4aae4
Packit a4aae4
// #define DODS_DEBUG
Packit a4aae4
// #define DODS_DEBUG2
Packit a4aae4
#undef USE_GETENV
Packit a4aae4
Packit a4aae4
#include <pthread.h>
Packit a4aae4
#include <limits.h>
Packit a4aae4
#include <unistd.h>   // for stat
Packit a4aae4
#include <sys/types.h>  // for stat and mkdir
Packit a4aae4
#include <sys/stat.h>
Packit a4aae4
Packit a4aae4
#include <cstring>
Packit a4aae4
#include <cerrno>
Packit a4aae4
Packit a4aae4
#include <iostream>
Packit a4aae4
#include <sstream>
Packit a4aae4
#include <algorithm>
Packit a4aae4
#include <iterator>
Packit a4aae4
#include <set>
Packit a4aae4
Packit a4aae4
#include "Error.h"
Packit a4aae4
#include "InternalErr.h"
Packit a4aae4
#include "ResponseTooBigErr.h"
Packit a4aae4
#ifndef WIN32
Packit a4aae4
#include "SignalHandler.h"
Packit a4aae4
#endif
Packit a4aae4
#include "HTTPCacheInterruptHandler.h"
Packit a4aae4
#include "HTTPCacheTable.h"
Packit a4aae4
#include "HTTPCache.h"
Packit a4aae4
#include "HTTPCacheMacros.h"
Packit a4aae4
#include "SignalHandlerRegisteredErr.h"
Packit a4aae4
Packit a4aae4
#include "util_mit.h"
Packit a4aae4
#include "debug.h"
Packit a4aae4
Packit a4aae4
using namespace std;
Packit a4aae4
Packit a4aae4
namespace libdap {
Packit a4aae4
Packit a4aae4
HTTPCache *HTTPCache::_instance = 0;
Packit a4aae4
Packit a4aae4
// instance_mutex is used to ensure that only one instance is created.
Packit a4aae4
// That is, it protects the body of the HTTPCache::instance() method. This
Packit a4aae4
// mutex is initialized from within the static function once_init_routine()
Packit a4aae4
// and the call to that takes place using pthread_once_init() where the mutex
Packit a4aae4
// once_block is used to protect that call. All of this ensures that no matter
Packit a4aae4
// how many threads call the instance() method, only one instance is ever
Packit a4aae4
// made.
Packit a4aae4
static pthread_mutex_t instance_mutex;
Packit a4aae4
static pthread_once_t once_block = PTHREAD_ONCE_INIT;
Packit a4aae4
Packit a4aae4
Packit a4aae4
#define NO_LM_EXPIRATION 24*3600 // 24 hours
Packit a4aae4
Packit a4aae4
#define DUMP_FREQUENCY 10 // Dump index every x loads
Packit a4aae4
Packit a4aae4
#define MEGA 0x100000L
Packit a4aae4
#define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
Packit a4aae4
#define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
Packit a4aae4
#define CACHE_GC_PCT 10  // 10% of cache size free after GC
Packit a4aae4
#define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
Packit a4aae4
#define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
Packit a4aae4
Packit a4aae4
static void
Packit a4aae4
once_init_routine()
Packit a4aae4
{
Packit a4aae4
    int status;
Packit a4aae4
    status = INIT(&instance_mutex);
Packit a4aae4
Packit a4aae4
    if (status != 0)
Packit a4aae4
        throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get a pointer to the HTTP 1.1 compliant cache. If not already
Packit a4aae4
    instantiated, this creates an instance of the HTTP cache object and
Packit a4aae4
    initializes it to use \c cache_root as the location of the persistent
Packit a4aae4
    store. If there's an index (\c .index) file in that directory, it is read
Packit a4aae4
    as part of the initialization. If the cache has already been initialized,
Packit a4aae4
    this method returns a pointer to that instance. Note HTTPCache uses the
Packit a4aae4
    singleton pattern; A process may have only one instance of this object.
Packit a4aae4
    Also note that HTTPCache is MT-safe. However, if the \c force parameter
Packit a4aae4
    is set to true, it may be possible for two or more processes to access
Packit a4aae4
    the persistent store at the same time resulting in undefined behavior.
Packit a4aae4
Packit a4aae4
    Default values: is_cache_enabled(): true, is_cache_protected(): false,
Packit a4aae4
    is_expire_ignored(): false, the total size of the cache is 20M, 2M of that
Packit a4aae4
    is reserved for response headers, during GC the cache is reduced to at
Packit a4aae4
    least 18M (total size - 10% of the total size), and the max size for an
Packit a4aae4
    individual entry is 3M. It is possible to change the size of the cache,
Packit a4aae4
    but not to make it smaller than 5M. If expiration information is not sent
Packit a4aae4
    with a response, it is assumed to expire in 24 hours.
Packit a4aae4
Packit a4aae4
    @param cache_root The fully qualified pathname of the directory which
Packit a4aae4
    will hold the cache data (i.e., the persistent store).
Packit a4aae4
    @param force Force access to the persistent store if true. By default
Packit a4aae4
    false. Use this only if you're sure no one else is using the same cache
Packit a4aae4
    root! This is included so that programs may use a cache that was
Packit a4aae4
    left in an inconsistent state.
Packit a4aae4
    @return A pointer to the HTTPCache object.
Packit a4aae4
    @exception Error thrown if the cache root cannot set. */
Packit a4aae4
Packit a4aae4
HTTPCache *
Packit a4aae4
HTTPCache::instance(const string &cache_root, bool force)
Packit a4aae4
{
Packit a4aae4
    int status = pthread_once(&once_block, once_init_routine);
Packit a4aae4
    if (status != 0)
Packit a4aae4
        throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
Packit a4aae4
Packit a4aae4
    LOCK(&instance_mutex);
Packit a4aae4
Packit a4aae4
    DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" << "... ");
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        if (!_instance) {
Packit a4aae4
            _instance = new HTTPCache(cache_root, force);
Packit a4aae4
Packit a4aae4
            DBG(cerr << "New instance: " << _instance << ", cache root: "
Packit a4aae4
                << _instance->d_cache_root << endl);
Packit a4aae4
Packit a4aae4
            atexit(delete_instance);
Packit a4aae4
Packit a4aae4
#ifndef WIN32
Packit a4aae4
            // Register the interrupt handler. If we've already registered
Packit a4aae4
            // one, barf. If this becomes a problem, hack SignalHandler so
Packit a4aae4
            // that we can chain these handlers... 02/10/04 jhrg
Packit a4aae4
            //
Packit a4aae4
            // Technically we're leaking memory here. However, since this
Packit a4aae4
            // class is a singleton, we know that only three objects will
Packit a4aae4
            // ever be created and they will all exist until the process
Packit a4aae4
            // exits. We can let this slide... 02/12/04 jhrg
Packit a4aae4
            EventHandler *old_eh = SignalHandler::instance()->register_handler(SIGINT, new HTTPCacheInterruptHandler, true);
Packit a4aae4
            if (old_eh) {
Packit a4aae4
                SignalHandler::instance()->register_handler(SIGINT, old_eh);
Packit a4aae4
                throw SignalHandlerRegisteredErr(
Packit a4aae4
                    "Could not register event handler for SIGINT without superseding an existing one.");
Packit a4aae4
            }
Packit a4aae4
Packit a4aae4
            old_eh = SignalHandler::instance()->register_handler(SIGPIPE, new HTTPCacheInterruptHandler, true);
Packit a4aae4
            if (old_eh) {
Packit a4aae4
                SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
Packit a4aae4
                throw SignalHandlerRegisteredErr(
Packit a4aae4
                    "Could not register event handler for SIGPIPE without superseding an existing one.");
Packit a4aae4
            }
Packit a4aae4
Packit a4aae4
            old_eh = SignalHandler::instance()->register_handler(SIGTERM, new HTTPCacheInterruptHandler, true);
Packit a4aae4
            if (old_eh) {
Packit a4aae4
                SignalHandler::instance()->register_handler(SIGTERM, old_eh);
Packit a4aae4
                throw SignalHandlerRegisteredErr(
Packit a4aae4
                    "Could not register event handler for SIGTERM without superseding an existing one.");
Packit a4aae4
            }
Packit a4aae4
#endif
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
        DBG2(cerr << "The constructor threw an Error!" << endl);
Packit a4aae4
        UNLOCK(&instance_mutex);
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    UNLOCK(&instance_mutex);
Packit a4aae4
    DBGN(cerr << "returning " << hex << _instance << dec << endl);
Packit a4aae4
Packit a4aae4
    return _instance;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** This static method is called using atexit(). It deletes the singleton;
Packit a4aae4
    see ~HTTPCache for all that implies. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::delete_instance()
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "Entering delete_instance()..." << endl);
Packit a4aae4
Packit a4aae4
    if (HTTPCache::_instance) {
Packit a4aae4
        DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
Packit a4aae4
        delete HTTPCache::_instance;
Packit a4aae4
        HTTPCache::_instance = 0;
Packit a4aae4
Packit a4aae4
        //Now remove the signal handlers
Packit a4aae4
        delete SignalHandler::instance()->remove_handler(SIGINT);
Packit a4aae4
        delete SignalHandler::instance()->remove_handler(SIGPIPE);
Packit a4aae4
        delete SignalHandler::instance()->remove_handler(SIGTERM);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    DBG(cerr << "Exiting delete_instance()" << endl);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Create an instance of the HTTP 1.1 compliant cache. This initializes the
Packit a4aae4
    both the cache root and the path to the index file. It then reads the
Packit a4aae4
    cache index file if one is present.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @note This assumes that the cache directory structure should be created!
Packit a4aae4
    @param cache_root The fully qualified pathname of the directory which
Packit a4aae4
    will hold the cache data.
Packit a4aae4
    @param force Force access to the persistent store!
Packit a4aae4
    @exception Error Thrown if the single user/process lock for the
Packit a4aae4
    persistent store cannot be obtained.
Packit a4aae4
    @see cache_index_read */
Packit a4aae4
Packit a4aae4
HTTPCache::HTTPCache(string cache_root, bool force) :
Packit a4aae4
        d_locked_open_file(0),
Packit a4aae4
        d_cache_enabled(false),
Packit a4aae4
        d_cache_protected(false),
Packit a4aae4
Packit a4aae4
        d_cache_disconnected(DISCONNECT_NONE),
Packit a4aae4
Packit a4aae4
        d_expire_ignored(false),
Packit a4aae4
        d_always_validate(false),
Packit a4aae4
        d_total_size(CACHE_TOTAL_SIZE * MEGA),
Packit a4aae4
        d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
Packit a4aae4
        d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
Packit a4aae4
        d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
Packit a4aae4
        d_default_expiration(NO_LM_EXPIRATION),
Packit a4aae4
        d_max_age(-1),
Packit a4aae4
        d_max_stale(-1),
Packit a4aae4
        d_min_fresh(-1),
Packit a4aae4
        d_http_cache_table(0)
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "Entering the constructor for " << this << "... ");
Packit a4aae4
#if 0
Packit a4aae4
	int status = pthread_once(&once_block, once_init_routine);
Packit a4aae4
	if (status != 0)
Packit a4aae4
		throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
Packit a4aae4
#endif
Packit a4aae4
	INIT(&d_cache_mutex);
Packit a4aae4
Packit a4aae4
	// This used to throw an Error object if we could not get the
Packit a4aae4
	// single user lock. However, that results in an invalid object. It's
Packit a4aae4
	// better to have an instance that has default values. If we cannot get
Packit a4aae4
	// the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
Packit a4aae4
	//
Packit a4aae4
	// I fixed this block so that the cache root is set before we try to get
Packit a4aae4
	// the single user lock. That was the fix for bug #661. To make that
Packit a4aae4
	// work, I had to move the call to create_cache_root out of
Packit a4aae4
	// set_cache_root(). 09/08/03 jhrg
Packit a4aae4
Packit a4aae4
	set_cache_root(cache_root);
Packit a4aae4
	int block_size;
Packit a4aae4
Packit a4aae4
	if (!get_single_user_lock(force))
Packit a4aae4
	    throw Error(internal_error, "Could not get single user lock for the cache");
Packit a4aae4
Packit a4aae4
#ifdef WIN32
Packit a4aae4
	//  Windows is unable to provide us this information.  4096 appears
Packit a4aae4
	//  a best guess.  It is likely to be in the range [2048, 8192] on
Packit a4aae4
	//  windows, but will the level of truth of that statement vary over
Packit a4aae4
	//  time ?
Packit a4aae4
	block_size = 4096;
Packit a4aae4
#else
Packit a4aae4
	struct stat s;
Packit a4aae4
	if (stat(cache_root.c_str(), &s) == 0)
Packit a4aae4
		block_size = s.st_blksize;
Packit a4aae4
	else
Packit a4aae4
		throw Error(internal_error, "Could not set file system block size.");
Packit a4aae4
#endif
Packit a4aae4
	d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
Packit a4aae4
	d_cache_enabled = true;
Packit a4aae4
Packit a4aae4
	DBGN(cerr << "exiting" << endl);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Destroy an instance of HTTPCache. This writes the cache index and frees
Packit a4aae4
    the in-memory cache table structure. The persistent cache (the response
Packit a4aae4
    headers and bodies and the index file) are not removed. To remove those,
Packit a4aae4
    either erase the directory that contains the cache using a file system
Packit a4aae4
    command or use the purge_cache() method (which leaves the cache directory
Packit a4aae4
    structure in place but removes all the cached information).
Packit a4aae4
Packit a4aae4
    This class uses the singleton pattern. Clients should \e never call this
Packit a4aae4
    method. The HTTPCache::instance() method arranges to call the
Packit a4aae4
    HTTPCache::delete_instance() using \c atexit(). If delete is called more
Packit a4aae4
    than once, the result will likely be an index file that is corrupt. */
Packit a4aae4
Packit a4aae4
HTTPCache::~HTTPCache()
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "Entering the destructor for " << this << "... ");
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        if (startGC())
Packit a4aae4
            perform_garbage_collection();
Packit a4aae4
Packit a4aae4
        d_http_cache_table->cache_index_write();
Packit a4aae4
    }
Packit a4aae4
    catch (Error &e) {
Packit a4aae4
        // If the cache index cannot be written, we've got problems. However,
Packit a4aae4
        // unless we're debugging, still free up the cache table in memory.
Packit a4aae4
        // How should we let users know they cache index is not being
Packit a4aae4
        // written?? 10/03/02 jhrg
Packit a4aae4
        DBG(cerr << e.get_error_message() << endl);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    delete d_http_cache_table;
Packit a4aae4
Packit a4aae4
    release_single_user_lock();
Packit a4aae4
Packit a4aae4
    DBGN(cerr << "exiting destructor." << endl);
Packit a4aae4
    DESTROY(&d_cache_mutex);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
Packit a4aae4
/** @name Garbage collection
Packit a4aae4
    These private methods manage the garbage collection tasks for the cache. */
Packit a4aae4
//@{
Packit a4aae4
Packit a4aae4
/** Enough removed from cache? A private method.
Packit a4aae4
    @return True if enough has been removed from the cache. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCache::stopGC() const
Packit a4aae4
{
Packit a4aae4
    return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Is there too much in the cache. A private method.
Packit a4aae4
Packit a4aae4
    @todo Modify this method so that it does not count locked entries. See
Packit a4aae4
    the note for hits_gc().
Packit a4aae4
    @return True if garbage collection should be performed. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCache::startGC() const
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
Packit a4aae4
    return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Perform garbage collection on the cache. First, all expired responses are
Packit a4aae4
    removed. Then, if the size of the cache is still too large, the cache is
Packit a4aae4
    scanned for responses larger than the max_entry_size property. At the
Packit a4aae4
    same time, responses are removed based on the number of cache hits. This
Packit a4aae4
    process continues until the size of the cache has been reduced to 90% of
Packit a4aae4
    the max_size property value. Once the garbage collection is complete,
Packit a4aae4
    update the index file. Note that locked entries are not removed!
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @see stopGC
Packit a4aae4
    @see expired_gc
Packit a4aae4
    @see hits_gc */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::perform_garbage_collection()
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "Performing garbage collection" << endl);
Packit a4aae4
Packit a4aae4
    // Remove all the expired responses.
Packit a4aae4
    expired_gc();
Packit a4aae4
Packit a4aae4
    // Remove entries larger than max_entry_size.
Packit a4aae4
    too_big_gc();
Packit a4aae4
Packit a4aae4
    // Remove entries starting with zero hits, 1, ..., until stopGC()
Packit a4aae4
    // returns true.
Packit a4aae4
    hits_gc();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Scan the current cache table and remove anything that has expired. Don't
Packit a4aae4
    remove locked entries.
Packit a4aae4
Packit a4aae4
    A private method. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::expired_gc()
Packit a4aae4
{
Packit a4aae4
    if (!d_expire_ignored) {
Packit a4aae4
        d_http_cache_table->delete_expired_entries();
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Scan the cache for entires that are larger than max_entry_size. Also
Packit a4aae4
    start removing entires with low hit counts. Start looking for entries
Packit a4aae4
    with zero hits, then one, and so on. Stop when the method stopGC returns
Packit a4aae4
    true. Locked entries are never removed.
Packit a4aae4
Packit a4aae4
    @note Potential infinite loop. What if more than 80% of the cache holds
Packit a4aae4
    entries that are locked? One solution is to modify startGC() so that it
Packit a4aae4
    does not count locked entries.
Packit a4aae4
Packit a4aae4
    @todo Change this method to that it looks at the oldest entries first,
Packit a4aae4
    using the CacheEntry::date to determine entry age. Using the current
Packit a4aae4
    algorithm it's possible to remove the latest entry which is probably not
Packit a4aae4
    what we want.
Packit a4aae4
Packit a4aae4
    A private method. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::hits_gc()
Packit a4aae4
{
Packit a4aae4
    int hits = 0;
Packit a4aae4
Packit a4aae4
    if (startGC()) {
Packit a4aae4
		while (!stopGC()) {
Packit a4aae4
			d_http_cache_table->delete_by_hits(hits);
Packit a4aae4
			hits++;
Packit a4aae4
		}
Packit a4aae4
	}
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Scan the current cache table and remove anything that has is too big.
Packit a4aae4
 	Don't remove locked entries.
Packit a4aae4
Packit a4aae4
    A private method. */
Packit a4aae4
void HTTPCache::too_big_gc() {
Packit a4aae4
	if (startGC())
Packit a4aae4
		d_http_cache_table->delete_by_size(d_max_entry_size);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
//@} End of the garbage collection methods.
Packit a4aae4
Packit a4aae4
/** Lock the persistent store part of the cache. Return true if the cache lock
Packit a4aae4
    was acquired, false otherwise. This is a single user cache, so it
Packit a4aae4
    requires locking at the process level.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param force If True force access to the persistent store. False by
Packit a4aae4
    default.
Packit a4aae4
    @return True if the cache was locked for our use, False otherwise. */
Packit a4aae4
Packit a4aae4
bool HTTPCache::get_single_user_lock(bool force) 
Packit a4aae4
{
Packit a4aae4
    if (!d_locked_open_file) {
Packit a4aae4
	FILE * fp = NULL;
Packit a4aae4
Packit a4aae4
	try {
Packit a4aae4
	    // It's OK to call create_cache_root if the directory already
Packit a4aae4
	    // exists.
Packit a4aae4
	    create_cache_root(d_cache_root);
Packit a4aae4
	}
Packit a4aae4
	catch (Error &e) {
Packit a4aae4
	    // We need to catch and return false because this method is
Packit a4aae4
	    // called from a ctor and throwing at this point will result in a
Packit a4aae4
	    // partially constructed object. 01/22/04 jhrg
Packit a4aae4
	    DBG(cerr << "Failure to create the cache root" << endl);
Packit a4aae4
	    return false;
Packit a4aae4
	}
Packit a4aae4
Packit a4aae4
	// Try to read the lock file. If we can open for reading, it exists.
Packit a4aae4
	string lock = d_cache_root + CACHE_LOCK;
Packit a4aae4
	if ((fp = fopen(lock.c_str(), "r")) != NULL) {
Packit a4aae4
	    int res = fclose(fp);
Packit a4aae4
	    if (res) {
Packit a4aae4
		DBG(cerr << "Failed to close " << (void *)fp << endl);
Packit a4aae4
	    }
Packit a4aae4
	    if (force)
Packit a4aae4
		REMOVE(lock.c_str());
Packit a4aae4
	    else
Packit a4aae4
		return false;
Packit a4aae4
	}
Packit a4aae4
Packit a4aae4
	if ((fp = fopen(lock.c_str(), "w")) == NULL) {
Packit a4aae4
	    DBG(cerr << "Could not open for write access" << endl);
Packit a4aae4
	    return false;
Packit a4aae4
	}
Packit a4aae4
Packit a4aae4
	d_locked_open_file = fp;
Packit a4aae4
	return true;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    DBG(cerr << "locked_open_file is true" << endl);
Packit a4aae4
    return false;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Release the single user (process) lock. A private method. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::release_single_user_lock()
Packit a4aae4
{
Packit a4aae4
    if (d_locked_open_file) {
Packit a4aae4
        int res = fclose(d_locked_open_file);
Packit a4aae4
        if (res) {
Packit a4aae4
            DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
Packit a4aae4
        }
Packit a4aae4
        d_locked_open_file = 0;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    string lock = d_cache_root + CACHE_LOCK;
Packit a4aae4
    REMOVE(lock.c_str());
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** @name Accessors and Mutators for various properties. */
Packit a4aae4
//@{
Packit a4aae4
Packit a4aae4
/** Get the current cache root directory.
Packit a4aae4
    @return A string that contains the cache root directory. */
Packit a4aae4
Packit a4aae4
string
Packit a4aae4
HTTPCache::get_cache_root() const
Packit a4aae4
{
Packit a4aae4
    return d_cache_root;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
Packit a4aae4
/** Create the cache's root directory. This is the persistent store used by
Packit a4aae4
    the cache. Paths must always end in DIR_SEPARATOR_CHAR.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param cache_root The pathname to the desired cache root directory.
Packit a4aae4
    @exception Error Thrown if the given pathname cannot be created. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::create_cache_root(const string &cache_root)
Packit a4aae4
{
Packit a4aae4
#ifdef WIN32
Packit a4aae4
    string::size_type cur = cache_root[1] == ':' ? 3 : 1;
Packit a4aae4
    typedef int mode_t;
Packit a4aae4
Packit a4aae4
    while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
Packit a4aae4
        string dir = cache_root.substr(0, cur);
Packit a4aae4
        struct stat stat_info;
Packit a4aae4
        if (stat(dir.c_str(), &stat_info) == -1) {
Packit a4aae4
            DBG2(cerr << "Cache....... Creating " << dir << endl);
Packit a4aae4
            mode_t mask = UMASK(0);
Packit a4aae4
            if (MKDIR(dir.c_str(), 0777) < 0) {
Packit a4aae4
                DBG2(cerr << "Error: can't create." << endl);
Packit a4aae4
                UMASK(mask);
Packit a4aae4
                throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
Packit a4aae4
            }
Packit a4aae4
            UMASK(mask);
Packit a4aae4
        }
Packit a4aae4
        else {
Packit a4aae4
            DBG2(cerr << "Cache....... Found " << dir << endl);
Packit a4aae4
        }
Packit a4aae4
        cur++;
Packit a4aae4
    }
Packit a4aae4
#else
Packit a4aae4
    // OSX and Linux
Packit a4aae4
Packit a4aae4
    // Save the mask
Packit a4aae4
    mode_t mask = umask(0);
Packit a4aae4
Packit a4aae4
    // Ignore the error if the directory exists
Packit a4aae4
    errno = 0;
Packit a4aae4
    if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
Packit a4aae4
        umask(mask);
Packit a4aae4
        throw Error("Could not create the directory for the cache at '" + cache_root + "' (" + strerror(errno) + ").");
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    // Restore themask
Packit a4aae4
    umask(mask);
Packit a4aae4
Packit a4aae4
#endif
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Set the cache's root directory to the given path. If no path is given,
Packit a4aae4
    look at the DODS_CACHE, TMP and TEMP environment variables (in that
Packit a4aae4
    order) to guess at a good location. If those are all NULL, use \c /tmp.
Packit a4aae4
    If the cache root directory cannot be created, throw an exception.
Packit a4aae4
Packit a4aae4
    Note that in most cases callers should look for this path in the user's
Packit a4aae4
    .dodsrc file.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @see RCReader
Packit a4aae4
    @param root Set the cache root to this pathname. Defaults to "".
Packit a4aae4
    @exception Error Thrown if the path can neither be deduced nor created. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::set_cache_root(const string &root)
Packit a4aae4
{
Packit a4aae4
    if (root != "") {
Packit a4aae4
        d_cache_root = root;
Packit a4aae4
        // cache root should end in /.
Packit a4aae4
        if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
Packit a4aae4
            d_cache_root += DIR_SEPARATOR_CHAR;
Packit a4aae4
    }
Packit a4aae4
    else {
Packit a4aae4
        // If no cache root has been indicated then look for a suitable
Packit a4aae4
        // location.
Packit a4aae4
#ifdef USE_GETENV
Packit a4aae4
        char * cr = (char *) getenv("DODS_CACHE");
Packit a4aae4
        if (!cr) cr = (char *) getenv("TMP");
Packit a4aae4
        if (!cr) cr = (char *) getenv("TEMP");
Packit a4aae4
        if (!cr) cr = (char*)CACHE_LOCATION;
Packit a4aae4
        d_cache_root = cr;
Packit a4aae4
#else
Packit a4aae4
        d_cache_root = CACHE_LOCATION;
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
        if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
Packit a4aae4
            d_cache_root += DIR_SEPARATOR_CHAR;
Packit a4aae4
Packit a4aae4
        d_cache_root += CACHE_ROOT;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    // Test d_hhtp_cache_table because this method can be called before that
Packit a4aae4
    // instance is created and also can be called later to change the cache
Packit a4aae4
    // root. jhrg 05.14.08
Packit a4aae4
    if (d_http_cache_table)
Packit a4aae4
    	d_http_cache_table->set_cache_root(d_cache_root);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Enable or disable the cache. The cache can be temporarily suspended using
Packit a4aae4
    the enable/disable property. This does not prevent the cache from being
Packit a4aae4
    enabled/disable at a later point in time.
Packit a4aae4
Packit a4aae4
    Default: yes
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param mode True if the cache should be enabled, False if it should be
Packit a4aae4
    disabled. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::set_cache_enabled(bool mode)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    d_cache_enabled = mode;
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Is the cache currently enabled? */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCache::is_cache_enabled() const
Packit a4aae4
{
Packit a4aae4
    DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
Packit a4aae4
         << endl);
Packit a4aae4
    return d_cache_enabled;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Set the cache's disconnected property. The cache can operate either
Packit a4aae4
    disconnected from the network or using a proxy cache (but tell that proxy
Packit a4aae4
    not to use the network).
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param mode One of DISCONNECT_NONE, DISCONNECT_NORMAL or
Packit a4aae4
    DISCONNECT_EXTERNAL.
Packit a4aae4
    @see CacheDIsconnectedMode */
Packit a4aae4
void
Packit a4aae4
HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    d_cache_disconnected = mode;
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get the cache's disconnected mode property. */
Packit a4aae4
Packit a4aae4
CacheDisconnectedMode
Packit a4aae4
HTTPCache::get_cache_disconnected() const
Packit a4aae4
{
Packit a4aae4
    return d_cache_disconnected;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** How should the cache handle the Expires header?
Packit a4aae4
    Default: no
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param mode True if a responses Expires header should be ignored, False
Packit a4aae4
    otherwise. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::set_expire_ignored(bool mode)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    d_expire_ignored = mode;
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Is the cache ignoring Expires headers returned with responses that have
Packit a4aae4
   been cached? */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCache::is_expire_ignored() const
Packit a4aae4
{
Packit a4aae4
    return d_expire_ignored;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Cache size management. The default cache size is 20M. The minimum size is
Packit a4aae4
    5M in order not to get into weird problems while writing the cache. The
Packit a4aae4
    size is indicated in Mega bytes. Note that reducing the size of the cache
Packit a4aae4
    may trigger a garbage collection operation.
Packit a4aae4
Packit a4aae4
    @note The maximum cache size is UINT_MAX bytes (usually 4294967295 for
Packit a4aae4
    32-bit computers). If \e size is larger the value will be truncated to
Packit a4aae4
    the value of that constant. It seems pretty unlikely that will happen
Packit a4aae4
    given that the parameter is an unsigned long. This is a fix for bug 689
Packit a4aae4
    which was reported when the parameter type was signed.
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param size The maximum size of the cache in megabytes. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::set_max_size(unsigned long size)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
Packit a4aae4
                                 MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
Packit a4aae4
        unsigned long old_size = d_total_size;
Packit a4aae4
        d_total_size = new_size;
Packit a4aae4
        d_folder_size = d_total_size / CACHE_FOLDER_PCT;
Packit a4aae4
        d_gc_buffer = d_total_size / CACHE_GC_PCT;
Packit a4aae4
Packit a4aae4
        if (new_size < old_size && startGC()) {
Packit a4aae4
            perform_garbage_collection();
Packit a4aae4
            d_http_cache_table->cache_index_write();
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
        DBGN(cerr << "Unlocking interface." << endl);
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    DBG2(cerr << "Cache....... Total cache size: " << d_total_size
Packit a4aae4
         << " with " << d_folder_size
Packit a4aae4
         << " bytes for meta information and folders and at least "
Packit a4aae4
         << d_gc_buffer << " bytes free after every gc" << endl);
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** How big is the cache? The value returned is the size in megabytes. */
Packit a4aae4
Packit a4aae4
unsigned long
Packit a4aae4
HTTPCache::get_max_size() const
Packit a4aae4
{
Packit a4aae4
    return d_total_size / MEGA;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Set the maximum size for a single entry in the cache.
Packit a4aae4
Packit a4aae4
    Default: 3M
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param size The size in megabytes. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::set_max_entry_size(unsigned long size)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        unsigned long new_size = size * MEGA;
Packit a4aae4
        if (new_size > 0 && new_size < d_total_size - d_folder_size) {
Packit a4aae4
            unsigned long old_size = d_max_entry_size;
Packit a4aae4
            d_max_entry_size = new_size;
Packit a4aae4
            if (new_size < old_size && startGC()) {
Packit a4aae4
                perform_garbage_collection();
Packit a4aae4
                d_http_cache_table->cache_index_write();
Packit a4aae4
            }
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    DBG2(cerr << "Cache...... Max entry cache size is "
Packit a4aae4
         << d_max_entry_size << endl);
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get the maximum size of an individual entry in the cache.
Packit a4aae4
Packit a4aae4
    @return The maximum size in megabytes. */
Packit a4aae4
Packit a4aae4
unsigned long
Packit a4aae4
HTTPCache::get_max_entry_size() const
Packit a4aae4
{
Packit a4aae4
    return d_max_entry_size / MEGA;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Set the default expiration time. Use the default expiration
Packit a4aae4
    property to determine when a cached response becomes stale if the
Packit a4aae4
    response lacks the information necessary to compute a specific value.
Packit a4aae4
Packit a4aae4
    Default: 24 hours (86,400 seconds)
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param exp_time The time in seconds. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::set_default_expiration(const int exp_time)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    d_default_expiration = exp_time;
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get the default expiration time used by the cache. */
Packit a4aae4
Packit a4aae4
int
Packit a4aae4
HTTPCache::get_default_expiration() const
Packit a4aae4
{
Packit a4aae4
    return d_default_expiration;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Should every cache entry be validated?
Packit a4aae4
    @param validate True if every cache entry should be validated before
Packit a4aae4
    being used. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::set_always_validate(bool validate)
Packit a4aae4
{
Packit a4aae4
    d_always_validate = validate;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Should every cache entry be validated before each use?
Packit a4aae4
    @return True if all cache entries require validation. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCache::get_always_validate() const
Packit a4aae4
{
Packit a4aae4
    return d_always_validate;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Set the request Cache-Control headers. If a request must be satisfied
Packit a4aae4
    using HTTP, these headers should be included in request since they might
Packit a4aae4
    be pertinent to a proxy cache.
Packit a4aae4
Packit a4aae4
    Ignored headers: no-transform, only-if-cached. These headers are not used
Packit a4aae4
    by HTTPCache and are not recorded. However, if present in the vector
Packit a4aae4
    passed to this method, they will be present in the vector returned by
Packit a4aae4
    get_cache_control.
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param cc A vector of strings, each string holds one Cache-Control
Packit a4aae4
    header.
Packit a4aae4
    @exception InternalErr Thrown if one of the strings in \c cc does not
Packit a4aae4
    start with 'Cache-Control: '. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::set_cache_control(const vector<string> &cc)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        d_cache_control = cc;
Packit a4aae4
Packit a4aae4
        vector<string>::const_iterator i;
Packit a4aae4
        for (i = cc.begin(); i != cc.end(); ++i) {
Packit a4aae4
            string header = (*i).substr(0, (*i).find(':'));
Packit a4aae4
            string value = (*i).substr((*i).find(": ") + 2);
Packit a4aae4
            if (header != "Cache-Control") {
Packit a4aae4
                throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
Packit a4aae4
            }
Packit a4aae4
            else {
Packit a4aae4
                if (value == "no-cache" || value == "no-store")
Packit a4aae4
                    d_cache_enabled = false;
Packit a4aae4
                else if (value.find("max-age") != string::npos) {
Packit a4aae4
                    string max_age = value.substr(value.find("=" + 1));
Packit a4aae4
                    d_max_age = parse_time(max_age.c_str());
Packit a4aae4
                }
Packit a4aae4
                else if (value == "max-stale")
Packit a4aae4
                    d_max_stale = 0; // indicates will take anything;
Packit a4aae4
                else if (value.find("max-stale") != string::npos) {
Packit a4aae4
                    string max_stale = value.substr(value.find("=" + 1));
Packit a4aae4
                    d_max_stale = parse_time(max_stale.c_str());
Packit a4aae4
                }
Packit a4aae4
                else if (value.find("min-fresh") != string::npos) {
Packit a4aae4
                    string min_fresh = value.substr(value.find("=" + 1));
Packit a4aae4
                    d_min_fresh = parse_time(min_fresh.c_str());
Packit a4aae4
                }
Packit a4aae4
            }
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
Packit a4aae4
/** Get the Cache-Control headers.
Packit a4aae4
Packit a4aae4
    @return A vector of strings, one string for each header. */
Packit a4aae4
Packit a4aae4
vector<string>
Packit a4aae4
HTTPCache::get_cache_control()
Packit a4aae4
{
Packit a4aae4
    return d_cache_control;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
//@}
Packit a4aae4
Packit a4aae4
/** Look in the cache for the given \c url. Is it in the cache table?
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
	@todo Remove this is broken.
Packit a4aae4
    @param url The url to look for.
Packit a4aae4
    @return True if \c url is found, otherwise False. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCache::is_url_in_cache(const string &url)
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
Packit a4aae4
Packit a4aae4
    HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
Packit a4aae4
    bool status = entry != 0;
Packit a4aae4
    if (entry) {
Packit a4aae4
        entry->unlock_read_response();
Packit a4aae4
    }
Packit a4aae4
    return  status;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Is the header a hop by hop header? If so, we're not supposed to store it
Packit a4aae4
    in the cache. See RFC 2616, Section 13.5.1.
Packit a4aae4
Packit a4aae4
    @return True if the header is, otherwise False. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
is_hop_by_hop_header(const string &header)
Packit a4aae4
{
Packit a4aae4
    return header.find("Connection") != string::npos
Packit a4aae4
           || header.find("Keep-Alive") != string::npos
Packit a4aae4
           || header.find("Proxy-Authenticate") != string::npos
Packit a4aae4
           || header.find("Proxy-Authorization") != string::npos
Packit a4aae4
           || header.find("Transfer-Encoding") != string::npos
Packit a4aae4
           || header.find("Upgrade") != string::npos;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Dump the headers out to the meta data file. The file is truncated if it
Packit a4aae4
    already exists.
Packit a4aae4
Packit a4aae4
    @todo This code could be replaced with STL/iostream stuff.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param cachename Base name of file for meta data.
Packit a4aae4
    @param headers A vector of strings, one header per string.
Packit a4aae4
    @exception InternalErr Thrown if the file cannot be opened. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
Packit a4aae4
{
Packit a4aae4
    string fname = cachename + CACHE_META;
Packit a4aae4
    d_open_files.push_back(fname);
Packit a4aae4
Packit a4aae4
    FILE *dest = fopen(fname.c_str(), "w");
Packit a4aae4
    if (!dest) {
Packit a4aae4
        throw InternalErr(__FILE__, __LINE__,
Packit a4aae4
                          "Could not open named cache entry file.");
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    vector<string>::const_iterator i;
Packit a4aae4
    for (i = headers.begin(); i != headers.end(); ++i) {
Packit a4aae4
        if (!is_hop_by_hop_header(*i)) {
Packit a4aae4
            int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
Packit a4aae4
            if (s != 1) {
Packit a4aae4
                fclose(dest);
Packit a4aae4
            	throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
Packit a4aae4
            }
Packit a4aae4
            s = fwrite("\n", 1, 1, dest);
Packit a4aae4
            if (s != 1) {
Packit a4aae4
                fclose(dest);
Packit a4aae4
            	throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
Packit a4aae4
            }
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    int res = fclose(dest);
Packit a4aae4
    if (res) {
Packit a4aae4
        DBG(cerr << "HTTPCache::write_metadata - Failed to close "
Packit a4aae4
            << dest << endl);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    d_open_files.pop_back();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Read headers from a .meta.
Packit a4aae4
Packit a4aae4
    @todo This code could be replaced with STL/iostream code.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param cachename The name of the file in the persistent store.
Packit a4aae4
    @param headers The headers are returned using this parameter.
Packit a4aae4
    @exception InternalErr Thrown if the file cannot be opened. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
Packit a4aae4
{
Packit a4aae4
    FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
Packit a4aae4
    if (!md) {
Packit a4aae4
        throw InternalErr(__FILE__, __LINE__,
Packit a4aae4
                          "Could not open named cache entry meta data file.");
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    char line[1024];
Packit a4aae4
    while (!feof(md) && fgets(line, 1024, md)) {
Packit a4aae4
        line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
Packit a4aae4
        headers.push_back(string(line));
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    int res = fclose(md);
Packit a4aae4
    if (res) {
Packit a4aae4
        DBG(cerr << "HTTPCache::read_metadata - Failed to close "
Packit a4aae4
            << md << endl);
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Write the body of the HTTP response to the cache.
Packit a4aae4
Packit a4aae4
    This method used to throw ResponseTooBig if any response was larger than
Packit a4aae4
    max_entry_size. I've disabled that since perform_garbage_collection will
Packit a4aae4
    remove any such entry if it's causing problems. Note that if
Packit a4aae4
    parse_headers finds a Content-Length header that indicates a response is
Packit a4aae4
    too big, the response won't be cached. The idea here is that once we've
Packit a4aae4
    already written a bunch of bytes to the cache, we might as well continue.
Packit a4aae4
    If it overflows the cache, perform_garbage_collection() will remove it.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param cachename Write data to this file.
Packit a4aae4
    @param src Read data from this stream.
Packit a4aae4
    @return The total number of bytes written.
Packit a4aae4
    @exception InternalErr Thrown if the file cannot be opened or if an I/O
Packit a4aae4
    error was detected.
Packit a4aae4
    @exception ResponseTooBig Thrown if the response was found to be bigger
Packit a4aae4
    than the max_entry_size property. This is not longer thrown. 10/11/02
Packit a4aae4
    jhrg */
Packit a4aae4
Packit a4aae4
int
Packit a4aae4
HTTPCache::write_body(const string &cachename, const FILE *src)
Packit a4aae4
{
Packit a4aae4
    d_open_files.push_back(cachename);
Packit a4aae4
Packit a4aae4
    FILE *dest = fopen(cachename.c_str(), "wb");
Packit a4aae4
    if (!dest) {
Packit a4aae4
        throw InternalErr(__FILE__, __LINE__,
Packit a4aae4
                          "Could not open named cache entry file.");
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    // Read and write in 1k blocks; an attempt at doing this efficiently.
Packit a4aae4
    // 09/30/02 jhrg
Packit a4aae4
    char line[1024];
Packit a4aae4
    size_t n;
Packit a4aae4
    int total = 0;
Packit a4aae4
    while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
Packit a4aae4
        total += fwrite(line, 1, n, dest);
Packit a4aae4
        DBG2(sleep(3));
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
Packit a4aae4
        int res = fclose(dest);
Packit a4aae4
        res = res & unlink(cachename.c_str());
Packit a4aae4
        if (res) {
Packit a4aae4
            DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
Packit a4aae4
                << dest << endl);
Packit a4aae4
        }
Packit a4aae4
        throw InternalErr(__FILE__, __LINE__,
Packit a4aae4
                          "I/O error transferring data to the cache.");
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    rewind(const_cast<FILE *>(src));
Packit a4aae4
Packit a4aae4
    int res = fclose(dest);
Packit a4aae4
    if (res) {
Packit a4aae4
        DBG(cerr << "HTTPCache::write_body - Failed to close "
Packit a4aae4
            << dest << endl);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    d_open_files.pop_back();
Packit a4aae4
Packit a4aae4
    return total;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get a pointer to file that contains the body of a cached response. The
Packit a4aae4
    returned FILE* can be used both for reading and for writing.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param cachename The name of the file that holds the response body.
Packit a4aae4
    @exception InternalErr Thrown if the file cannot be opened. */
Packit a4aae4
Packit a4aae4
FILE *
Packit a4aae4
HTTPCache::open_body(const string &cachename)
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "cachename: " << cachename << endl);
Packit a4aae4
Packit a4aae4
    FILE *src = fopen(cachename.c_str(), "rb"); // Read only
Packit a4aae4
    if (!src)
Packit a4aae4
	throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
Packit a4aae4
Packit a4aae4
    return src;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Add a new response to the cache, or replace an existing cached response
Packit a4aae4
    with new data. This method returns True if the information for \c url was
Packit a4aae4
    added to the cache. A response might not be cache-able; in that case this
Packit a4aae4
    method returns false. (For example, the response might contain the
Packit a4aae4
    'Cache-Control: no-cache' header.)
Packit a4aae4
Packit a4aae4
    Note that the FILE *body is rewound so that the caller can re-read it
Packit a4aae4
    without using fseek or rewind.
Packit a4aae4
Packit a4aae4
    If a response for \c url is already present in the cache, it will be
Packit a4aae4
    replaced by the new headers and body. To update a response in the cache
Packit a4aae4
    with new meta data, use update_response().
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param url A string which holds the request URL.
Packit a4aae4
    @param request_time The time when the request was made, in seconds since
Packit a4aae4
    1 Jan 1970.
Packit a4aae4
    @param headers A vector of strings which hold the response headers.
Packit a4aae4
    @param body A FILE * to a file which holds the response body.
Packit a4aae4
    @return True if the response was cached, False if the response could not
Packit a4aae4
    be cached.
Packit a4aae4
    @exception InternalErr Thrown if there was a I/O error while writing to
Packit a4aae4
    the persistent store. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCache::cache_response(const string &url, time_t request_time,
Packit a4aae4
                          const vector<string> &headers, const FILE *body)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    DBG(cerr << "Caching url: " << url << "." << endl);
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        // If this is not an http or https URL, don't cache.
Packit a4aae4
        if (url.find("http:") == string::npos &&
Packit a4aae4
            url.find("https:") == string::npos) {
Packit a4aae4
            unlock_cache_interface();
Packit a4aae4
            return false;
Packit a4aae4
        }
Packit a4aae4
Packit a4aae4
        // This does nothing if url is not already in the cache. It's
Packit a4aae4
        // more efficient to do this than to first check and see if the entry
Packit a4aae4
        // exists. 10/10/02 jhrg
Packit a4aae4
        d_http_cache_table->remove_entry_from_cache_table(url);
Packit a4aae4
Packit a4aae4
        HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url);
Packit a4aae4
        entry->lock_write_response();
Packit a4aae4
Packit a4aae4
        try {
Packit a4aae4
            d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
Packit a4aae4
            if (entry->is_no_cache()) {
Packit a4aae4
                DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
Packit a4aae4
                    << "(" << url << ")" << endl);
Packit a4aae4
                entry->unlock_write_response();
Packit a4aae4
                delete entry; entry = 0;
Packit a4aae4
                unlock_cache_interface();
Packit a4aae4
                return false;
Packit a4aae4
            }
Packit a4aae4
Packit a4aae4
            // corrected_initial_age, freshness_lifetime, response_time.
Packit a4aae4
            d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
Packit a4aae4
Packit a4aae4
            d_http_cache_table->create_location(entry); // cachename, cache_body_fd
Packit a4aae4
            // move these write function to cache table
Packit a4aae4
            entry->set_size(write_body(entry->get_cachename(), body));
Packit a4aae4
            write_metadata(entry->get_cachename(), headers);
Packit a4aae4
            d_http_cache_table->add_entry_to_cache_table(entry);
Packit a4aae4
            entry->unlock_write_response();
Packit a4aae4
        }
Packit a4aae4
        catch (ResponseTooBigErr &e) {
Packit a4aae4
            // Oops. Bummer. Clean up and exit.
Packit a4aae4
            DBG(cerr << e.get_error_message() << endl);
Packit a4aae4
            REMOVE(entry->get_cachename().c_str());
Packit a4aae4
            REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
Packit a4aae4
            DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
Packit a4aae4
                << ")" << endl);
Packit a4aae4
            entry->unlock_write_response();
Packit a4aae4
            delete entry; entry = 0;
Packit a4aae4
            unlock_cache_interface();
Packit a4aae4
            return false;
Packit a4aae4
        }
Packit a4aae4
Packit a4aae4
        if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
Packit a4aae4
            if (startGC())
Packit a4aae4
                perform_garbage_collection();
Packit a4aae4
Packit a4aae4
            d_http_cache_table->cache_index_write(); // resets new_entries
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
Packit a4aae4
    return true;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Build the headers to send along with a GET request to make that request
Packit a4aae4
    conditional. This method examines the headers for a given response in the
Packit a4aae4
    cache and formulates the correct headers for a valid HTTP 1.1 conditional
Packit a4aae4
    GET request. See RFC 2616, Section 13.3.4.
Packit a4aae4
Packit a4aae4
    Rules: If an ETag is present, it must be used. Use If-None-Match. If a
Packit a4aae4
    Last-Modified header is present, use it. Use If-Modified-Since. If both
Packit a4aae4
    are present, use both (this means that HTTP 1.0 daemons are more likely
Packit a4aae4
    to work). If a Last-Modified header is not present, use the value of the
Packit a4aae4
    Cache-Control max-age or Expires header(s). Note that a 'Cache-Control:
Packit a4aae4
    max-age' header overrides an Expires header (Sec 14.9.3).
Packit a4aae4
Packit a4aae4
    This method locks the cache interface and the cache entry.
Packit a4aae4
Packit a4aae4
    @param url Get the HTTPCacheTable::CacheEntry for this URL.
Packit a4aae4
    @return A vector of strings, one request header per string.
Packit a4aae4
    @exception Error Thrown if the \e url is not in the cache. */
Packit a4aae4
Packit a4aae4
vector<string>
Packit a4aae4
HTTPCache::get_conditional_request_headers(const string &url)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    HTTPCacheTable::CacheEntry *entry = 0;
Packit a4aae4
    vector<string> headers;
Packit a4aae4
Packit a4aae4
    DBG(cerr << "Getting conditional request headers for " << url << endl);
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
Packit a4aae4
        if (!entry)
Packit a4aae4
            throw Error(internal_error, "There is no cache entry for the URL: " + url);
Packit a4aae4
Packit a4aae4
        if (entry->get_etag() != "")
Packit a4aae4
            headers.push_back(string("If-None-Match: ") + entry->get_etag());
Packit a4aae4
Packit a4aae4
        if (entry->get_lm() > 0) {
Packit a4aae4
        	time_t lm = entry->get_lm();
Packit a4aae4
            headers.push_back(string("If-Modified-Since: ")
Packit a4aae4
                              + date_time_str(&lm);;
Packit a4aae4
        }
Packit a4aae4
        else if (entry->get_max_age() > 0) {
Packit a4aae4
        	time_t max_age = entry->get_max_age();
Packit a4aae4
            headers.push_back(string("If-Modified-Since: ")
Packit a4aae4
                              + date_time_str(&max_age));
Packit a4aae4
        }
Packit a4aae4
        else if (entry->get_expires() > 0) {
Packit a4aae4
        	time_t expires = entry->get_expires();
Packit a4aae4
            headers.push_back(string("If-Modified-Since: ")
Packit a4aae4
                              + date_time_str(&expires));
Packit a4aae4
        }
Packit a4aae4
        entry->unlock_read_response();
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
	unlock_cache_interface();
Packit a4aae4
	if (entry) {
Packit a4aae4
	    entry->unlock_read_response();
Packit a4aae4
	}
Packit a4aae4
	throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    return headers;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Functor/Predicate which orders two MIME headers based on the header name
Packit a4aae4
    only (discounting the value). */
Packit a4aae4
Packit a4aae4
struct HeaderLess: binary_function<const string&, const string&, bool>
Packit a4aae4
{
Packit a4aae4
    bool operator()(const string &s1, const string &s2) const {
Packit a4aae4
        return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
Packit a4aae4
    }
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
/** Update the meta data for a response already in the cache. This method
Packit a4aae4
    provides a way to merge response headers returned from a conditional GET
Packit a4aae4
    request, for the given URL, with those already present.
Packit a4aae4
Packit a4aae4
    This method locks the class' interface and the cache entry.
Packit a4aae4
Packit a4aae4
    @param url Update the meta data for this cache entry.
Packit a4aae4
    @param request_time The time (Unix time, seconds since 1 Jan 1970) that
Packit a4aae4
    the conditional request was made.
Packit a4aae4
    @param headers New headers, one header per string, returned in the
Packit a4aae4
    response.
Packit a4aae4
    @exception Error Thrown if the \c url is not in the cache. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::update_response(const string &url, time_t request_time,
Packit a4aae4
                           const vector<string> &headers)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    HTTPCacheTable::CacheEntry *entry = 0;
Packit a4aae4
    DBG(cerr << "Updating the response headers for: " << url << endl);
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
Packit a4aae4
        if (!entry)
Packit a4aae4
            throw Error(internal_error, "There is no cache entry for the URL: " + url);
Packit a4aae4
Packit a4aae4
        // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
Packit a4aae4
        d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
Packit a4aae4
Packit a4aae4
        // Update corrected_initial_age, freshness_lifetime, response_time.
Packit a4aae4
        d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
Packit a4aae4
Packit a4aae4
        // Merge the new headers with those in the persistent store. How:
Packit a4aae4
        // Load the new headers into a set, then merge the old headers. Since
Packit a4aae4
        // set<> ignores duplicates, old headers with the same name as a new
Packit a4aae4
        // header will got into the bit bucket. Define a special compare
Packit a4aae4
        // functor to make sure that headers are compared using only their
Packit a4aae4
        // name and not their value too.
Packit a4aae4
        set<string, HeaderLess> merged_headers;
Packit a4aae4
Packit a4aae4
        // Load in the new headers
Packit a4aae4
        copy(headers.begin(), headers.end(),
Packit a4aae4
             inserter(merged_headers, merged_headers.begin()));
Packit a4aae4
Packit a4aae4
        // Get the old headers and load them in.
Packit a4aae4
        vector<string> old_headers;
Packit a4aae4
        read_metadata(entry->get_cachename(), old_headers);
Packit a4aae4
        copy(old_headers.begin(), old_headers.end(),
Packit a4aae4
             inserter(merged_headers, merged_headers.begin()));
Packit a4aae4
Packit a4aae4
        // Read the values back out. Use reverse iterators with back_inserter
Packit a4aae4
        // to preserve header order. NB: vector<> does not support push_front
Packit a4aae4
        // so we can't use front_inserter(). 01/09/03 jhrg
Packit a4aae4
        vector<string> result;
Packit a4aae4
        copy(merged_headers.rbegin(), merged_headers.rend(),
Packit a4aae4
             back_inserter(result));
Packit a4aae4
Packit a4aae4
        write_metadata(entry->get_cachename(), result);
Packit a4aae4
        entry->unlock_write_response();
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
        if (entry) {
Packit a4aae4
            entry->unlock_read_response();
Packit a4aae4
        }
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Look in the cache and return the status (validity) of the cached
Packit a4aae4
    response. This method should be used to determine if a cached response
Packit a4aae4
    requires validation.
Packit a4aae4
Packit a4aae4
    This method locks the class' interface and the cache entry.
Packit a4aae4
Packit a4aae4
    @param url Find the cached response associated with this URL.
Packit a4aae4
    @return True indicates that the response can be used, False indicates
Packit a4aae4
    that it must first be validated.
Packit a4aae4
    @exception Error Thrown if the URL's response is not in the cache. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCache::is_url_valid(const string &url)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    bool freshness;
Packit a4aae4
    HTTPCacheTable::CacheEntry *entry = 0;
Packit a4aae4
Packit a4aae4
    DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        if (d_always_validate) {
Packit a4aae4
            unlock_cache_interface();
Packit a4aae4
            return false;  // force re-validation.
Packit a4aae4
        }
Packit a4aae4
Packit a4aae4
        entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
Packit a4aae4
        if (!entry)
Packit a4aae4
            throw Error(internal_error, "There is no cache entry for the URL: " + url);
Packit a4aae4
Packit a4aae4
        // If we supported range requests, we'd need code here to check if
Packit a4aae4
        // there was only a partial response in the cache. 10/02/02 jhrg
Packit a4aae4
Packit a4aae4
        // In case this entry is of type "must-revalidate" then we consider it
Packit a4aae4
        // invalid.
Packit a4aae4
        if (entry->get_must_revalidate()) {
Packit a4aae4
            entry->unlock_read_response();
Packit a4aae4
            unlock_cache_interface();
Packit a4aae4
            return false;
Packit a4aae4
        }
Packit a4aae4
Packit a4aae4
        time_t resident_time = time(NULL) - entry->get_response_time();
Packit a4aae4
        time_t current_age = entry->get_corrected_initial_age() + resident_time;
Packit a4aae4
Packit a4aae4
        // Check that the max-age, max-stale, and min-fresh directives
Packit a4aae4
        // given in the request cache control header is followed.
Packit a4aae4
        if (d_max_age >= 0 && current_age > d_max_age) {
Packit a4aae4
            DBG(cerr << "Cache....... Max-age validation" << endl);
Packit a4aae4
            entry->unlock_read_response();
Packit a4aae4
            unlock_cache_interface();
Packit a4aae4
            return false;
Packit a4aae4
        }
Packit a4aae4
        if (d_min_fresh >= 0
Packit a4aae4
            && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
Packit a4aae4
            DBG(cerr << "Cache....... Min-fresh validation" << endl);
Packit a4aae4
            entry->unlock_read_response();
Packit a4aae4
            unlock_cache_interface();
Packit a4aae4
            return false;
Packit a4aae4
        }
Packit a4aae4
Packit a4aae4
        freshness = (entry->get_freshness_lifetime()
Packit a4aae4
                     + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
Packit a4aae4
        entry->unlock_read_response();
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
    	if (entry) {
Packit a4aae4
    	    entry->unlock_read_response();
Packit a4aae4
    	}
Packit a4aae4
    	unlock_cache_interface();
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    return freshness;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get information from the cache. For a given URL, get the headers, cache
Packit a4aae4
    object name and body
Packit a4aae4
    stored in the cache. Note that this method increments the hit counter for
Packit a4aae4
    url's entry and \e locks that entry. To release the lock,
Packit a4aae4
    the method release_cached_response() \e must be called. Methods that
Packit a4aae4
    block on a locked entry are: get_conditional_request_headers(),
Packit a4aae4
    update_response() and is_url_valid(). In addition, purge_cache() throws
Packit a4aae4
    Error if it's called and any entries are locked. The garbage collection
Packit a4aae4
    system will not reclaim locked entries (but works fine when some entries
Packit a4aae4
    are locked).
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    This method does \e not check to see that the response is valid, just
Packit a4aae4
    that it is in the cache. To see if a cached response is valid, use
Packit a4aae4
    is_url_valid(). The FILE* returned can be used for both reading and
Packit a4aae4
    writing. The latter allows a client to update the body of a cached
Packit a4aae4
    response without having to first dump it all to a separate file and then
Packit a4aae4
    copy it into the cache (using cache_response()).
Packit a4aae4
Packit a4aae4
    @param url Get response information for this URL.
Packit a4aae4
    @param headers Return the response headers in this parameter
Packit a4aae4
    @param cacheName A value-result parameter; the name of the cache file
Packit a4aae4
    @return A FILE * to the response body.
Packit a4aae4
    @exception Error Thrown if the URL's response is not in the cache.
Packit a4aae4
    @exception InternalErr Thrown if the persistent store cannot be opened. */
Packit a4aae4
Packit a4aae4
FILE * HTTPCache::get_cached_response(const string &url,
Packit a4aae4
		vector<string> &headers, string &cacheName) {
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    FILE *body = 0;
Packit a4aae4
    HTTPCacheTable::CacheEntry *entry = 0;
Packit a4aae4
Packit a4aae4
    DBG(cerr << "Getting the cached response for " << url << endl);
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
Packit a4aae4
        if (!entry) {
Packit a4aae4
        	unlock_cache_interface();
Packit a4aae4
        	return 0;
Packit a4aae4
        }
Packit a4aae4
Packit a4aae4
        cacheName = entry->get_cachename();
Packit a4aae4
        read_metadata(entry->get_cachename(), headers);
Packit a4aae4
Packit a4aae4
        DBG(cerr << "Headers just read from cache: " << endl);
Packit a4aae4
        DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
Packit a4aae4
Packit a4aae4
        body = open_body(entry->get_cachename());
Packit a4aae4
Packit a4aae4
        DBG(cerr << "Returning: " << url << " from the cache." << endl);
Packit a4aae4
Packit a4aae4
        d_http_cache_table->bind_entry_to_data(entry, body);
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
    	// Why make this unlock operation conditional on entry?
Packit a4aae4
        if (entry)
Packit a4aae4
        	unlock_cache_interface();
Packit a4aae4
        if (body != 0)
Packit a4aae4
            fclose(body);
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
Packit a4aae4
    return body;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get information from the cache. This is a convenience method that calls
Packit a4aae4
 	the three parameter version of get_cache_response().
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param url Get response information for this URL.
Packit a4aae4
    @param headers Return the response headers in this parameter
Packit a4aae4
    @return A FILE * to the response body.
Packit a4aae4
    @exception Error Thrown if the URL's response is not in the cache.
Packit a4aae4
    @exception InternalErr Thrown if the persistent store cannot be opened. */
Packit a4aae4
Packit a4aae4
FILE *
Packit a4aae4
HTTPCache::get_cached_response(const string &url, vector<string> &headers)
Packit a4aae4
{
Packit a4aae4
	string discard_name;
Packit a4aae4
	return get_cached_response(url, headers, discard_name);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get a pointer to a cached response body. This is a convenience method that
Packit a4aae4
 	calls the three parameter version of get_cache_response().
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param url Find the body associated with this URL.
Packit a4aae4
    @return A FILE* that points to the response body.
Packit a4aae4
    @exception Error Thrown if the URL is not in the cache.
Packit a4aae4
    @exception InternalErr Thrown if an I/O error is detected. */
Packit a4aae4
Packit a4aae4
FILE *
Packit a4aae4
HTTPCache::get_cached_response(const string &url)
Packit a4aae4
{
Packit a4aae4
	string discard_name;
Packit a4aae4
	vector<string> discard_headers;
Packit a4aae4
	return get_cached_response(url, discard_headers, discard_name);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Call this method to inform the cache that a particular response is no
Packit a4aae4
    longer in use. When a response is accessed using get_cached_response(), it
Packit a4aae4
    is locked so that updates and removal (e.g., by the garbage collector)
Packit a4aae4
    are not possible. Calling this method frees that lock.
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @param body Release the lock on the response information associated with
Packit a4aae4
    this FILE *.
Packit a4aae4
    @exception Error Thrown if \c body does not belong to an entry in the
Packit a4aae4
    cache or if the entry was already released. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::release_cached_response(FILE *body)
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
    	// fclose(body); This results in a seg fault on linux jhrg 8/27/13
Packit a4aae4
    	d_http_cache_table->uncouple_entry_from_data(body);
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Purge both the in-memory cache table and the contents of the cache on
Packit a4aae4
    disk. This method deletes every entry in the persistent store but leaves
Packit a4aae4
    the structure intact. The client of HTTPCache is responsible for making
Packit a4aae4
    sure that all threads have released any responses they pulled from the
Packit a4aae4
    cache. If this method is called when a response is still in use, it will
Packit a4aae4
    throw an Error object and not purge the cache.
Packit a4aae4
Packit a4aae4
    This method locks the class' interface.
Packit a4aae4
Packit a4aae4
    @exception Error Thrown if an attempt is made to purge the cache when
Packit a4aae4
    an entry is still in use. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCache::purge_cache()
Packit a4aae4
{
Packit a4aae4
    lock_cache_interface();
Packit a4aae4
Packit a4aae4
    try {
Packit a4aae4
        if (d_http_cache_table->is_locked_read_responses())
Packit a4aae4
            throw Error(internal_error, "Attempt to purge the cache with entries in use.");
Packit a4aae4
Packit a4aae4
        d_http_cache_table->delete_all_entries();
Packit a4aae4
    }
Packit a4aae4
    catch (...) {
Packit a4aae4
        unlock_cache_interface();
Packit a4aae4
        throw;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    unlock_cache_interface();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
} // namespace libdap