|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// -*- mode: c++; c-basic-offset:4 -*-
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
|
|
Packit |
a4aae4 |
// Access Protocol.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Copyright (c) 2002,2003 OPeNDAP, Inc.
|
|
Packit |
a4aae4 |
// Author: James Gallagher <jgallagher@opendap.org>
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is free software; you can redistribute it and/or
|
|
Packit |
a4aae4 |
// modify it under the terms of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License as published by the Free Software Foundation; either
|
|
Packit |
a4aae4 |
// version 2.1 of the License, or (at your option) any later version.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is distributed in the hope that it will be useful,
|
|
Packit |
a4aae4 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
a4aae4 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
a4aae4 |
// Lesser General Public License for more details.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You should have received a copy of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License along with this library; if not, write to the Free Software
|
|
Packit |
a4aae4 |
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include "config.h"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// #define DODS_DEBUG
|
|
Packit |
a4aae4 |
// #define DODS_DEBUG2
|
|
Packit |
a4aae4 |
#undef USE_GETENV
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <pthread.h>
|
|
Packit |
a4aae4 |
#include <limits.h>
|
|
Packit |
a4aae4 |
#include <unistd.h> // for stat
|
|
Packit |
a4aae4 |
#include <sys/types.h> // for stat and mkdir
|
|
Packit |
a4aae4 |
#include <sys/stat.h>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <cstring>
|
|
Packit |
a4aae4 |
#include <cerrno>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <iostream>
|
|
Packit |
a4aae4 |
#include <sstream>
|
|
Packit |
a4aae4 |
#include <algorithm>
|
|
Packit |
a4aae4 |
#include <iterator>
|
|
Packit |
a4aae4 |
#include <set>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include "Error.h"
|
|
Packit |
a4aae4 |
#include "InternalErr.h"
|
|
Packit |
a4aae4 |
#include "ResponseTooBigErr.h"
|
|
Packit |
a4aae4 |
#ifndef WIN32
|
|
Packit |
a4aae4 |
#include "SignalHandler.h"
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
#include "HTTPCacheInterruptHandler.h"
|
|
Packit |
a4aae4 |
#include "HTTPCacheTable.h"
|
|
Packit |
a4aae4 |
#include "HTTPCache.h"
|
|
Packit |
a4aae4 |
#include "HTTPCacheMacros.h"
|
|
Packit |
a4aae4 |
#include "SignalHandlerRegisteredErr.h"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include "util_mit.h"
|
|
Packit |
a4aae4 |
#include "debug.h"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
using namespace std;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
namespace libdap {
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCache *HTTPCache::_instance = 0;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// instance_mutex is used to ensure that only one instance is created.
|
|
Packit |
a4aae4 |
// That is, it protects the body of the HTTPCache::instance() method. This
|
|
Packit |
a4aae4 |
// mutex is initialized from within the static function once_init_routine()
|
|
Packit |
a4aae4 |
// and the call to that takes place using pthread_once_init() where the mutex
|
|
Packit |
a4aae4 |
// once_block is used to protect that call. All of this ensures that no matter
|
|
Packit |
a4aae4 |
// how many threads call the instance() method, only one instance is ever
|
|
Packit |
a4aae4 |
// made.
|
|
Packit |
a4aae4 |
static pthread_mutex_t instance_mutex;
|
|
Packit |
a4aae4 |
static pthread_once_t once_block = PTHREAD_ONCE_INIT;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#define NO_LM_EXPIRATION 24*3600 // 24 hours
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#define DUMP_FREQUENCY 10 // Dump index every x loads
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#define MEGA 0x100000L
|
|
Packit |
a4aae4 |
#define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
|
|
Packit |
a4aae4 |
#define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
|
|
Packit |
a4aae4 |
#define CACHE_GC_PCT 10 // 10% of cache size free after GC
|
|
Packit |
a4aae4 |
#define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
|
|
Packit |
a4aae4 |
#define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
static void
|
|
Packit |
a4aae4 |
once_init_routine()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
int status;
|
|
Packit |
a4aae4 |
status = INIT(&instance_mutex);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (status != 0)
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get a pointer to the HTTP 1.1 compliant cache. If not already
|
|
Packit |
a4aae4 |
instantiated, this creates an instance of the HTTP cache object and
|
|
Packit |
a4aae4 |
initializes it to use \c cache_root as the location of the persistent
|
|
Packit |
a4aae4 |
store. If there's an index (\c .index) file in that directory, it is read
|
|
Packit |
a4aae4 |
as part of the initialization. If the cache has already been initialized,
|
|
Packit |
a4aae4 |
this method returns a pointer to that instance. Note HTTPCache uses the
|
|
Packit |
a4aae4 |
singleton pattern; A process may have only one instance of this object.
|
|
Packit |
a4aae4 |
Also note that HTTPCache is MT-safe. However, if the \c force parameter
|
|
Packit |
a4aae4 |
is set to true, it may be possible for two or more processes to access
|
|
Packit |
a4aae4 |
the persistent store at the same time resulting in undefined behavior.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Default values: is_cache_enabled(): true, is_cache_protected(): false,
|
|
Packit |
a4aae4 |
is_expire_ignored(): false, the total size of the cache is 20M, 2M of that
|
|
Packit |
a4aae4 |
is reserved for response headers, during GC the cache is reduced to at
|
|
Packit |
a4aae4 |
least 18M (total size - 10% of the total size), and the max size for an
|
|
Packit |
a4aae4 |
individual entry is 3M. It is possible to change the size of the cache,
|
|
Packit |
a4aae4 |
but not to make it smaller than 5M. If expiration information is not sent
|
|
Packit |
a4aae4 |
with a response, it is assumed to expire in 24 hours.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param cache_root The fully qualified pathname of the directory which
|
|
Packit |
a4aae4 |
will hold the cache data (i.e., the persistent store).
|
|
Packit |
a4aae4 |
@param force Force access to the persistent store if true. By default
|
|
Packit |
a4aae4 |
false. Use this only if you're sure no one else is using the same cache
|
|
Packit |
a4aae4 |
root! This is included so that programs may use a cache that was
|
|
Packit |
a4aae4 |
left in an inconsistent state.
|
|
Packit |
a4aae4 |
@return A pointer to the HTTPCache object.
|
|
Packit |
a4aae4 |
@exception Error thrown if the cache root cannot set. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCache *
|
|
Packit |
a4aae4 |
HTTPCache::instance(const string &cache_root, bool force)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
int status = pthread_once(&once_block, once_init_routine);
|
|
Packit |
a4aae4 |
if (status != 0)
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
LOCK(&instance_mutex);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" << "... ");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
if (!_instance) {
|
|
Packit |
a4aae4 |
_instance = new HTTPCache(cache_root, force);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "New instance: " << _instance << ", cache root: "
|
|
Packit |
a4aae4 |
<< _instance->d_cache_root << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
atexit(delete_instance);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifndef WIN32
|
|
Packit |
a4aae4 |
// Register the interrupt handler. If we've already registered
|
|
Packit |
a4aae4 |
// one, barf. If this becomes a problem, hack SignalHandler so
|
|
Packit |
a4aae4 |
// that we can chain these handlers... 02/10/04 jhrg
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// Technically we're leaking memory here. However, since this
|
|
Packit |
a4aae4 |
// class is a singleton, we know that only three objects will
|
|
Packit |
a4aae4 |
// ever be created and they will all exist until the process
|
|
Packit |
a4aae4 |
// exits. We can let this slide... 02/12/04 jhrg
|
|
Packit |
a4aae4 |
EventHandler *old_eh = SignalHandler::instance()->register_handler(SIGINT, new HTTPCacheInterruptHandler, true);
|
|
Packit |
a4aae4 |
if (old_eh) {
|
|
Packit |
a4aae4 |
SignalHandler::instance()->register_handler(SIGINT, old_eh);
|
|
Packit |
a4aae4 |
throw SignalHandlerRegisteredErr(
|
|
Packit |
a4aae4 |
"Could not register event handler for SIGINT without superseding an existing one.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
old_eh = SignalHandler::instance()->register_handler(SIGPIPE, new HTTPCacheInterruptHandler, true);
|
|
Packit |
a4aae4 |
if (old_eh) {
|
|
Packit |
a4aae4 |
SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
|
|
Packit |
a4aae4 |
throw SignalHandlerRegisteredErr(
|
|
Packit |
a4aae4 |
"Could not register event handler for SIGPIPE without superseding an existing one.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
old_eh = SignalHandler::instance()->register_handler(SIGTERM, new HTTPCacheInterruptHandler, true);
|
|
Packit |
a4aae4 |
if (old_eh) {
|
|
Packit |
a4aae4 |
SignalHandler::instance()->register_handler(SIGTERM, old_eh);
|
|
Packit |
a4aae4 |
throw SignalHandlerRegisteredErr(
|
|
Packit |
a4aae4 |
"Could not register event handler for SIGTERM without superseding an existing one.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
DBG2(cerr << "The constructor threw an Error!" << endl);
|
|
Packit |
a4aae4 |
UNLOCK(&instance_mutex);
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
UNLOCK(&instance_mutex);
|
|
Packit |
a4aae4 |
DBGN(cerr << "returning " << hex << _instance << dec << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return _instance;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** This static method is called using atexit(). It deletes the singleton;
|
|
Packit |
a4aae4 |
see ~HTTPCache for all that implies. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::delete_instance()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
DBG(cerr << "Entering delete_instance()..." << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (HTTPCache::_instance) {
|
|
Packit |
a4aae4 |
DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
|
|
Packit |
a4aae4 |
delete HTTPCache::_instance;
|
|
Packit |
a4aae4 |
HTTPCache::_instance = 0;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
//Now remove the signal handlers
|
|
Packit |
a4aae4 |
delete SignalHandler::instance()->remove_handler(SIGINT);
|
|
Packit |
a4aae4 |
delete SignalHandler::instance()->remove_handler(SIGPIPE);
|
|
Packit |
a4aae4 |
delete SignalHandler::instance()->remove_handler(SIGTERM);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "Exiting delete_instance()" << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Create an instance of the HTTP 1.1 compliant cache. This initializes the
|
|
Packit |
a4aae4 |
both the cache root and the path to the index file. It then reads the
|
|
Packit |
a4aae4 |
cache index file if one is present.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@note This assumes that the cache directory structure should be created!
|
|
Packit |
a4aae4 |
@param cache_root The fully qualified pathname of the directory which
|
|
Packit |
a4aae4 |
will hold the cache data.
|
|
Packit |
a4aae4 |
@param force Force access to the persistent store!
|
|
Packit |
a4aae4 |
@exception Error Thrown if the single user/process lock for the
|
|
Packit |
a4aae4 |
persistent store cannot be obtained.
|
|
Packit |
a4aae4 |
@see cache_index_read */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCache::HTTPCache(string cache_root, bool force) :
|
|
Packit |
a4aae4 |
d_locked_open_file(0),
|
|
Packit |
a4aae4 |
d_cache_enabled(false),
|
|
Packit |
a4aae4 |
d_cache_protected(false),
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_cache_disconnected(DISCONNECT_NONE),
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_expire_ignored(false),
|
|
Packit |
a4aae4 |
d_always_validate(false),
|
|
Packit |
a4aae4 |
d_total_size(CACHE_TOTAL_SIZE * MEGA),
|
|
Packit |
a4aae4 |
d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
|
|
Packit |
a4aae4 |
d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
|
|
Packit |
a4aae4 |
d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
|
|
Packit |
a4aae4 |
d_default_expiration(NO_LM_EXPIRATION),
|
|
Packit |
a4aae4 |
d_max_age(-1),
|
|
Packit |
a4aae4 |
d_max_stale(-1),
|
|
Packit |
a4aae4 |
d_min_fresh(-1),
|
|
Packit |
a4aae4 |
d_http_cache_table(0)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
DBG(cerr << "Entering the constructor for " << this << "... ");
|
|
Packit |
a4aae4 |
#if 0
|
|
Packit |
a4aae4 |
int status = pthread_once(&once_block, once_init_routine);
|
|
Packit |
a4aae4 |
if (status != 0)
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
INIT(&d_cache_mutex);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This used to throw an Error object if we could not get the
|
|
Packit |
a4aae4 |
// single user lock. However, that results in an invalid object. It's
|
|
Packit |
a4aae4 |
// better to have an instance that has default values. If we cannot get
|
|
Packit |
a4aae4 |
// the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// I fixed this block so that the cache root is set before we try to get
|
|
Packit |
a4aae4 |
// the single user lock. That was the fix for bug #661. To make that
|
|
Packit |
a4aae4 |
// work, I had to move the call to create_cache_root out of
|
|
Packit |
a4aae4 |
// set_cache_root(). 09/08/03 jhrg
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
set_cache_root(cache_root);
|
|
Packit |
a4aae4 |
int block_size;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (!get_single_user_lock(force))
|
|
Packit |
a4aae4 |
throw Error(internal_error, "Could not get single user lock for the cache");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifdef WIN32
|
|
Packit |
a4aae4 |
// Windows is unable to provide us this information. 4096 appears
|
|
Packit |
a4aae4 |
// a best guess. It is likely to be in the range [2048, 8192] on
|
|
Packit |
a4aae4 |
// windows, but will the level of truth of that statement vary over
|
|
Packit |
a4aae4 |
// time ?
|
|
Packit |
a4aae4 |
block_size = 4096;
|
|
Packit |
a4aae4 |
#else
|
|
Packit |
a4aae4 |
struct stat s;
|
|
Packit |
a4aae4 |
if (stat(cache_root.c_str(), &s) == 0)
|
|
Packit |
a4aae4 |
block_size = s.st_blksize;
|
|
Packit |
a4aae4 |
else
|
|
Packit |
a4aae4 |
throw Error(internal_error, "Could not set file system block size.");
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
|
|
Packit |
a4aae4 |
d_cache_enabled = true;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBGN(cerr << "exiting" << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Destroy an instance of HTTPCache. This writes the cache index and frees
|
|
Packit |
a4aae4 |
the in-memory cache table structure. The persistent cache (the response
|
|
Packit |
a4aae4 |
headers and bodies and the index file) are not removed. To remove those,
|
|
Packit |
a4aae4 |
either erase the directory that contains the cache using a file system
|
|
Packit |
a4aae4 |
command or use the purge_cache() method (which leaves the cache directory
|
|
Packit |
a4aae4 |
structure in place but removes all the cached information).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This class uses the singleton pattern. Clients should \e never call this
|
|
Packit |
a4aae4 |
method. The HTTPCache::instance() method arranges to call the
|
|
Packit |
a4aae4 |
HTTPCache::delete_instance() using \c atexit(). If delete is called more
|
|
Packit |
a4aae4 |
than once, the result will likely be an index file that is corrupt. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCache::~HTTPCache()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
DBG(cerr << "Entering the destructor for " << this << "... ");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
if (startGC())
|
|
Packit |
a4aae4 |
perform_garbage_collection();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_http_cache_table->cache_index_write();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (Error &e) {
|
|
Packit |
a4aae4 |
// If the cache index cannot be written, we've got problems. However,
|
|
Packit |
a4aae4 |
// unless we're debugging, still free up the cache table in memory.
|
|
Packit |
a4aae4 |
// How should we let users know they cache index is not being
|
|
Packit |
a4aae4 |
// written?? 10/03/02 jhrg
|
|
Packit |
a4aae4 |
DBG(cerr << e.get_error_message() << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
delete d_http_cache_table;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
release_single_user_lock();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBGN(cerr << "exiting destructor." << endl);
|
|
Packit |
a4aae4 |
DESTROY(&d_cache_mutex);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** @name Garbage collection
|
|
Packit |
a4aae4 |
These private methods manage the garbage collection tasks for the cache. */
|
|
Packit |
a4aae4 |
//@{
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Enough removed from cache? A private method.
|
|
Packit |
a4aae4 |
@return True if enough has been removed from the cache. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool
|
|
Packit |
a4aae4 |
HTTPCache::stopGC() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Is there too much in the cache. A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@todo Modify this method so that it does not count locked entries. See
|
|
Packit |
a4aae4 |
the note for hits_gc().
|
|
Packit |
a4aae4 |
@return True if garbage collection should be performed. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool
|
|
Packit |
a4aae4 |
HTTPCache::startGC() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
|
|
Packit |
a4aae4 |
return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Perform garbage collection on the cache. First, all expired responses are
|
|
Packit |
a4aae4 |
removed. Then, if the size of the cache is still too large, the cache is
|
|
Packit |
a4aae4 |
scanned for responses larger than the max_entry_size property. At the
|
|
Packit |
a4aae4 |
same time, responses are removed based on the number of cache hits. This
|
|
Packit |
a4aae4 |
process continues until the size of the cache has been reduced to 90% of
|
|
Packit |
a4aae4 |
the max_size property value. Once the garbage collection is complete,
|
|
Packit |
a4aae4 |
update the index file. Note that locked entries are not removed!
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@see stopGC
|
|
Packit |
a4aae4 |
@see expired_gc
|
|
Packit |
a4aae4 |
@see hits_gc */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::perform_garbage_collection()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
DBG(cerr << "Performing garbage collection" << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Remove all the expired responses.
|
|
Packit |
a4aae4 |
expired_gc();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Remove entries larger than max_entry_size.
|
|
Packit |
a4aae4 |
too_big_gc();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Remove entries starting with zero hits, 1, ..., until stopGC()
|
|
Packit |
a4aae4 |
// returns true.
|
|
Packit |
a4aae4 |
hits_gc();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Scan the current cache table and remove anything that has expired. Don't
|
|
Packit |
a4aae4 |
remove locked entries.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::expired_gc()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
if (!d_expire_ignored) {
|
|
Packit |
a4aae4 |
d_http_cache_table->delete_expired_entries();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Scan the cache for entires that are larger than max_entry_size. Also
|
|
Packit |
a4aae4 |
start removing entires with low hit counts. Start looking for entries
|
|
Packit |
a4aae4 |
with zero hits, then one, and so on. Stop when the method stopGC returns
|
|
Packit |
a4aae4 |
true. Locked entries are never removed.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@note Potential infinite loop. What if more than 80% of the cache holds
|
|
Packit |
a4aae4 |
entries that are locked? One solution is to modify startGC() so that it
|
|
Packit |
a4aae4 |
does not count locked entries.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@todo Change this method to that it looks at the oldest entries first,
|
|
Packit |
a4aae4 |
using the CacheEntry::date to determine entry age. Using the current
|
|
Packit |
a4aae4 |
algorithm it's possible to remove the latest entry which is probably not
|
|
Packit |
a4aae4 |
what we want.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::hits_gc()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
int hits = 0;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (startGC()) {
|
|
Packit |
a4aae4 |
while (!stopGC()) {
|
|
Packit |
a4aae4 |
d_http_cache_table->delete_by_hits(hits);
|
|
Packit |
a4aae4 |
hits++;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Scan the current cache table and remove anything that has is too big.
|
|
Packit |
a4aae4 |
Don't remove locked entries.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method. */
|
|
Packit |
a4aae4 |
void HTTPCache::too_big_gc() {
|
|
Packit |
a4aae4 |
if (startGC())
|
|
Packit |
a4aae4 |
d_http_cache_table->delete_by_size(d_max_entry_size);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
//@} End of the garbage collection methods.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Lock the persistent store part of the cache. Return true if the cache lock
|
|
Packit |
a4aae4 |
was acquired, false otherwise. This is a single user cache, so it
|
|
Packit |
a4aae4 |
requires locking at the process level.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param force If True force access to the persistent store. False by
|
|
Packit |
a4aae4 |
default.
|
|
Packit |
a4aae4 |
@return True if the cache was locked for our use, False otherwise. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool HTTPCache::get_single_user_lock(bool force)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
if (!d_locked_open_file) {
|
|
Packit |
a4aae4 |
FILE * fp = NULL;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
// It's OK to call create_cache_root if the directory already
|
|
Packit |
a4aae4 |
// exists.
|
|
Packit |
a4aae4 |
create_cache_root(d_cache_root);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (Error &e) {
|
|
Packit |
a4aae4 |
// We need to catch and return false because this method is
|
|
Packit |
a4aae4 |
// called from a ctor and throwing at this point will result in a
|
|
Packit |
a4aae4 |
// partially constructed object. 01/22/04 jhrg
|
|
Packit |
a4aae4 |
DBG(cerr << "Failure to create the cache root" << endl);
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Try to read the lock file. If we can open for reading, it exists.
|
|
Packit |
a4aae4 |
string lock = d_cache_root + CACHE_LOCK;
|
|
Packit |
a4aae4 |
if ((fp = fopen(lock.c_str(), "r")) != NULL) {
|
|
Packit |
a4aae4 |
int res = fclose(fp);
|
|
Packit |
a4aae4 |
if (res) {
|
|
Packit |
a4aae4 |
DBG(cerr << "Failed to close " << (void *)fp << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
if (force)
|
|
Packit |
a4aae4 |
REMOVE(lock.c_str());
|
|
Packit |
a4aae4 |
else
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if ((fp = fopen(lock.c_str(), "w")) == NULL) {
|
|
Packit |
a4aae4 |
DBG(cerr << "Could not open for write access" << endl);
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_locked_open_file = fp;
|
|
Packit |
a4aae4 |
return true;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "locked_open_file is true" << endl);
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Release the single user (process) lock. A private method. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::release_single_user_lock()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
if (d_locked_open_file) {
|
|
Packit |
a4aae4 |
int res = fclose(d_locked_open_file);
|
|
Packit |
a4aae4 |
if (res) {
|
|
Packit |
a4aae4 |
DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
d_locked_open_file = 0;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
string lock = d_cache_root + CACHE_LOCK;
|
|
Packit |
a4aae4 |
REMOVE(lock.c_str());
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** @name Accessors and Mutators for various properties. */
|
|
Packit |
a4aae4 |
//@{
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get the current cache root directory.
|
|
Packit |
a4aae4 |
@return A string that contains the cache root directory. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
HTTPCache::get_cache_root() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return d_cache_root;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Create the cache's root directory. This is the persistent store used by
|
|
Packit |
a4aae4 |
the cache. Paths must always end in DIR_SEPARATOR_CHAR.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param cache_root The pathname to the desired cache root directory.
|
|
Packit |
a4aae4 |
@exception Error Thrown if the given pathname cannot be created. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::create_cache_root(const string &cache_root)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
#ifdef WIN32
|
|
Packit |
a4aae4 |
string::size_type cur = cache_root[1] == ':' ? 3 : 1;
|
|
Packit |
a4aae4 |
typedef int mode_t;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
|
|
Packit |
a4aae4 |
string dir = cache_root.substr(0, cur);
|
|
Packit |
a4aae4 |
struct stat stat_info;
|
|
Packit |
a4aae4 |
if (stat(dir.c_str(), &stat_info) == -1) {
|
|
Packit |
a4aae4 |
DBG2(cerr << "Cache....... Creating " << dir << endl);
|
|
Packit |
a4aae4 |
mode_t mask = UMASK(0);
|
|
Packit |
a4aae4 |
if (MKDIR(dir.c_str(), 0777) < 0) {
|
|
Packit |
a4aae4 |
DBG2(cerr << "Error: can't create." << endl);
|
|
Packit |
a4aae4 |
UMASK(mask);
|
|
Packit |
a4aae4 |
throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
UMASK(mask);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
else {
|
|
Packit |
a4aae4 |
DBG2(cerr << "Cache....... Found " << dir << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
cur++;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
#else
|
|
Packit |
a4aae4 |
// OSX and Linux
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Save the mask
|
|
Packit |
a4aae4 |
mode_t mask = umask(0);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Ignore the error if the directory exists
|
|
Packit |
a4aae4 |
errno = 0;
|
|
Packit |
a4aae4 |
if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
|
|
Packit |
a4aae4 |
umask(mask);
|
|
Packit |
a4aae4 |
throw Error("Could not create the directory for the cache at '" + cache_root + "' (" + strerror(errno) + ").");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Restore themask
|
|
Packit |
a4aae4 |
umask(mask);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Set the cache's root directory to the given path. If no path is given,
|
|
Packit |
a4aae4 |
look at the DODS_CACHE, TMP and TEMP environment variables (in that
|
|
Packit |
a4aae4 |
order) to guess at a good location. If those are all NULL, use \c /tmp.
|
|
Packit |
a4aae4 |
If the cache root directory cannot be created, throw an exception.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Note that in most cases callers should look for this path in the user's
|
|
Packit |
a4aae4 |
.dodsrc file.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@see RCReader
|
|
Packit |
a4aae4 |
@param root Set the cache root to this pathname. Defaults to "".
|
|
Packit |
a4aae4 |
@exception Error Thrown if the path can neither be deduced nor created. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::set_cache_root(const string &root)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
if (root != "") {
|
|
Packit |
a4aae4 |
d_cache_root = root;
|
|
Packit |
a4aae4 |
// cache root should end in /.
|
|
Packit |
a4aae4 |
if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
|
|
Packit |
a4aae4 |
d_cache_root += DIR_SEPARATOR_CHAR;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
else {
|
|
Packit |
a4aae4 |
// If no cache root has been indicated then look for a suitable
|
|
Packit |
a4aae4 |
// location.
|
|
Packit |
a4aae4 |
#ifdef USE_GETENV
|
|
Packit |
a4aae4 |
char * cr = (char *) getenv("DODS_CACHE");
|
|
Packit |
a4aae4 |
if (!cr) cr = (char *) getenv("TMP");
|
|
Packit |
a4aae4 |
if (!cr) cr = (char *) getenv("TEMP");
|
|
Packit |
a4aae4 |
if (!cr) cr = (char*)CACHE_LOCATION;
|
|
Packit |
a4aae4 |
d_cache_root = cr;
|
|
Packit |
a4aae4 |
#else
|
|
Packit |
a4aae4 |
d_cache_root = CACHE_LOCATION;
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
|
|
Packit |
a4aae4 |
d_cache_root += DIR_SEPARATOR_CHAR;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_cache_root += CACHE_ROOT;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Test d_hhtp_cache_table because this method can be called before that
|
|
Packit |
a4aae4 |
// instance is created and also can be called later to change the cache
|
|
Packit |
a4aae4 |
// root. jhrg 05.14.08
|
|
Packit |
a4aae4 |
if (d_http_cache_table)
|
|
Packit |
a4aae4 |
d_http_cache_table->set_cache_root(d_cache_root);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Enable or disable the cache. The cache can be temporarily suspended using
|
|
Packit |
a4aae4 |
the enable/disable property. This does not prevent the cache from being
|
|
Packit |
a4aae4 |
enabled/disable at a later point in time.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Default: yes
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param mode True if the cache should be enabled, False if it should be
|
|
Packit |
a4aae4 |
disabled. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::set_cache_enabled(bool mode)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_cache_enabled = mode;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Is the cache currently enabled? */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool
|
|
Packit |
a4aae4 |
HTTPCache::is_cache_enabled() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
|
|
Packit |
a4aae4 |
<< endl);
|
|
Packit |
a4aae4 |
return d_cache_enabled;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Set the cache's disconnected property. The cache can operate either
|
|
Packit |
a4aae4 |
disconnected from the network or using a proxy cache (but tell that proxy
|
|
Packit |
a4aae4 |
not to use the network).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param mode One of DISCONNECT_NONE, DISCONNECT_NORMAL or
|
|
Packit |
a4aae4 |
DISCONNECT_EXTERNAL.
|
|
Packit |
a4aae4 |
@see CacheDIsconnectedMode */
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_cache_disconnected = mode;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get the cache's disconnected mode property. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
CacheDisconnectedMode
|
|
Packit |
a4aae4 |
HTTPCache::get_cache_disconnected() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return d_cache_disconnected;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** How should the cache handle the Expires header?
|
|
Packit |
a4aae4 |
Default: no
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param mode True if a responses Expires header should be ignored, False
|
|
Packit |
a4aae4 |
otherwise. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::set_expire_ignored(bool mode)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_expire_ignored = mode;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* Is the cache ignoring Expires headers returned with responses that have
|
|
Packit |
a4aae4 |
been cached? */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool
|
|
Packit |
a4aae4 |
HTTPCache::is_expire_ignored() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return d_expire_ignored;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Cache size management. The default cache size is 20M. The minimum size is
|
|
Packit |
a4aae4 |
5M in order not to get into weird problems while writing the cache. The
|
|
Packit |
a4aae4 |
size is indicated in Mega bytes. Note that reducing the size of the cache
|
|
Packit |
a4aae4 |
may trigger a garbage collection operation.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@note The maximum cache size is UINT_MAX bytes (usually 4294967295 for
|
|
Packit |
a4aae4 |
32-bit computers). If \e size is larger the value will be truncated to
|
|
Packit |
a4aae4 |
the value of that constant. It seems pretty unlikely that will happen
|
|
Packit |
a4aae4 |
given that the parameter is an unsigned long. This is a fix for bug 689
|
|
Packit |
a4aae4 |
which was reported when the parameter type was signed.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param size The maximum size of the cache in megabytes. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::set_max_size(unsigned long size)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
|
|
Packit |
a4aae4 |
MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
|
|
Packit |
a4aae4 |
unsigned long old_size = d_total_size;
|
|
Packit |
a4aae4 |
d_total_size = new_size;
|
|
Packit |
a4aae4 |
d_folder_size = d_total_size / CACHE_FOLDER_PCT;
|
|
Packit |
a4aae4 |
d_gc_buffer = d_total_size / CACHE_GC_PCT;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (new_size < old_size && startGC()) {
|
|
Packit |
a4aae4 |
perform_garbage_collection();
|
|
Packit |
a4aae4 |
d_http_cache_table->cache_index_write();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
DBGN(cerr << "Unlocking interface." << endl);
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG2(cerr << "Cache....... Total cache size: " << d_total_size
|
|
Packit |
a4aae4 |
<< " with " << d_folder_size
|
|
Packit |
a4aae4 |
<< " bytes for meta information and folders and at least "
|
|
Packit |
a4aae4 |
<< d_gc_buffer << " bytes free after every gc" << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** How big is the cache? The value returned is the size in megabytes. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unsigned long
|
|
Packit |
a4aae4 |
HTTPCache::get_max_size() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return d_total_size / MEGA;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Set the maximum size for a single entry in the cache.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Default: 3M
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param size The size in megabytes. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::set_max_entry_size(unsigned long size)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
unsigned long new_size = size * MEGA;
|
|
Packit |
a4aae4 |
if (new_size > 0 && new_size < d_total_size - d_folder_size) {
|
|
Packit |
a4aae4 |
unsigned long old_size = d_max_entry_size;
|
|
Packit |
a4aae4 |
d_max_entry_size = new_size;
|
|
Packit |
a4aae4 |
if (new_size < old_size && startGC()) {
|
|
Packit |
a4aae4 |
perform_garbage_collection();
|
|
Packit |
a4aae4 |
d_http_cache_table->cache_index_write();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG2(cerr << "Cache...... Max entry cache size is "
|
|
Packit |
a4aae4 |
<< d_max_entry_size << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get the maximum size of an individual entry in the cache.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@return The maximum size in megabytes. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unsigned long
|
|
Packit |
a4aae4 |
HTTPCache::get_max_entry_size() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return d_max_entry_size / MEGA;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Set the default expiration time. Use the default expiration
|
|
Packit |
a4aae4 |
property to determine when a cached response becomes stale if the
|
|
Packit |
a4aae4 |
response lacks the information necessary to compute a specific value.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Default: 24 hours (86,400 seconds)
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param exp_time The time in seconds. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::set_default_expiration(const int exp_time)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_default_expiration = exp_time;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get the default expiration time used by the cache. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
int
|
|
Packit |
a4aae4 |
HTTPCache::get_default_expiration() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return d_default_expiration;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Should every cache entry be validated?
|
|
Packit |
a4aae4 |
@param validate True if every cache entry should be validated before
|
|
Packit |
a4aae4 |
being used. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::set_always_validate(bool validate)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
d_always_validate = validate;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Should every cache entry be validated before each use?
|
|
Packit |
a4aae4 |
@return True if all cache entries require validation. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool
|
|
Packit |
a4aae4 |
HTTPCache::get_always_validate() const
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return d_always_validate;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Set the request Cache-Control headers. If a request must be satisfied
|
|
Packit |
a4aae4 |
using HTTP, these headers should be included in request since they might
|
|
Packit |
a4aae4 |
be pertinent to a proxy cache.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Ignored headers: no-transform, only-if-cached. These headers are not used
|
|
Packit |
a4aae4 |
by HTTPCache and are not recorded. However, if present in the vector
|
|
Packit |
a4aae4 |
passed to this method, they will be present in the vector returned by
|
|
Packit |
a4aae4 |
get_cache_control.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param cc A vector of strings, each string holds one Cache-Control
|
|
Packit |
a4aae4 |
header.
|
|
Packit |
a4aae4 |
@exception InternalErr Thrown if one of the strings in \c cc does not
|
|
Packit |
a4aae4 |
start with 'Cache-Control: '. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::set_cache_control(const vector<string> &cc)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
d_cache_control = cc;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
vector<string>::const_iterator i;
|
|
Packit |
a4aae4 |
for (i = cc.begin(); i != cc.end(); ++i) {
|
|
Packit |
a4aae4 |
string header = (*i).substr(0, (*i).find(':'));
|
|
Packit |
a4aae4 |
string value = (*i).substr((*i).find(": ") + 2);
|
|
Packit |
a4aae4 |
if (header != "Cache-Control") {
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
else {
|
|
Packit |
a4aae4 |
if (value == "no-cache" || value == "no-store")
|
|
Packit |
a4aae4 |
d_cache_enabled = false;
|
|
Packit |
a4aae4 |
else if (value.find("max-age") != string::npos) {
|
|
Packit |
a4aae4 |
string max_age = value.substr(value.find("=" + 1));
|
|
Packit |
a4aae4 |
d_max_age = parse_time(max_age.c_str());
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
else if (value == "max-stale")
|
|
Packit |
a4aae4 |
d_max_stale = 0; // indicates will take anything;
|
|
Packit |
a4aae4 |
else if (value.find("max-stale") != string::npos) {
|
|
Packit |
a4aae4 |
string max_stale = value.substr(value.find("=" + 1));
|
|
Packit |
a4aae4 |
d_max_stale = parse_time(max_stale.c_str());
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
else if (value.find("min-fresh") != string::npos) {
|
|
Packit |
a4aae4 |
string min_fresh = value.substr(value.find("=" + 1));
|
|
Packit |
a4aae4 |
d_min_fresh = parse_time(min_fresh.c_str());
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get the Cache-Control headers.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@return A vector of strings, one string for each header. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
vector<string>
|
|
Packit |
a4aae4 |
HTTPCache::get_cache_control()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return d_cache_control;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
//@}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Look in the cache for the given \c url. Is it in the cache table?
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@todo Remove this is broken.
|
|
Packit |
a4aae4 |
@param url The url to look for.
|
|
Packit |
a4aae4 |
@return True if \c url is found, otherwise False. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool
|
|
Packit |
a4aae4 |
HTTPCache::is_url_in_cache(const string &url)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
|
|
Packit |
a4aae4 |
bool status = entry != 0;
|
|
Packit |
a4aae4 |
if (entry) {
|
|
Packit |
a4aae4 |
entry->unlock_read_response();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
return status;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Is the header a hop by hop header? If so, we're not supposed to store it
|
|
Packit |
a4aae4 |
in the cache. See RFC 2616, Section 13.5.1.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@return True if the header is, otherwise False. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool
|
|
Packit |
a4aae4 |
is_hop_by_hop_header(const string &header)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return header.find("Connection") != string::npos
|
|
Packit |
a4aae4 |
|| header.find("Keep-Alive") != string::npos
|
|
Packit |
a4aae4 |
|| header.find("Proxy-Authenticate") != string::npos
|
|
Packit |
a4aae4 |
|| header.find("Proxy-Authorization") != string::npos
|
|
Packit |
a4aae4 |
|| header.find("Transfer-Encoding") != string::npos
|
|
Packit |
a4aae4 |
|| header.find("Upgrade") != string::npos;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Dump the headers out to the meta data file. The file is truncated if it
|
|
Packit |
a4aae4 |
already exists.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@todo This code could be replaced with STL/iostream stuff.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param cachename Base name of file for meta data.
|
|
Packit |
a4aae4 |
@param headers A vector of strings, one header per string.
|
|
Packit |
a4aae4 |
@exception InternalErr Thrown if the file cannot be opened. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string fname = cachename + CACHE_META;
|
|
Packit |
a4aae4 |
d_open_files.push_back(fname);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
FILE *dest = fopen(fname.c_str(), "w");
|
|
Packit |
a4aae4 |
if (!dest) {
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__,
|
|
Packit |
a4aae4 |
"Could not open named cache entry file.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
vector<string>::const_iterator i;
|
|
Packit |
a4aae4 |
for (i = headers.begin(); i != headers.end(); ++i) {
|
|
Packit |
a4aae4 |
if (!is_hop_by_hop_header(*i)) {
|
|
Packit |
a4aae4 |
int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
|
|
Packit |
a4aae4 |
if (s != 1) {
|
|
Packit |
a4aae4 |
fclose(dest);
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
s = fwrite("\n", 1, 1, dest);
|
|
Packit |
a4aae4 |
if (s != 1) {
|
|
Packit |
a4aae4 |
fclose(dest);
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
int res = fclose(dest);
|
|
Packit |
a4aae4 |
if (res) {
|
|
Packit |
a4aae4 |
DBG(cerr << "HTTPCache::write_metadata - Failed to close "
|
|
Packit |
a4aae4 |
<< dest << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_open_files.pop_back();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Read headers from a .meta.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@todo This code could be replaced with STL/iostream code.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param cachename The name of the file in the persistent store.
|
|
Packit |
a4aae4 |
@param headers The headers are returned using this parameter.
|
|
Packit |
a4aae4 |
@exception InternalErr Thrown if the file cannot be opened. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
|
|
Packit |
a4aae4 |
if (!md) {
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__,
|
|
Packit |
a4aae4 |
"Could not open named cache entry meta data file.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
char line[1024];
|
|
Packit |
a4aae4 |
while (!feof(md) && fgets(line, 1024, md)) {
|
|
Packit |
a4aae4 |
line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
|
|
Packit |
a4aae4 |
headers.push_back(string(line));
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
int res = fclose(md);
|
|
Packit |
a4aae4 |
if (res) {
|
|
Packit |
a4aae4 |
DBG(cerr << "HTTPCache::read_metadata - Failed to close "
|
|
Packit |
a4aae4 |
<< md << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Write the body of the HTTP response to the cache.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method used to throw ResponseTooBig if any response was larger than
|
|
Packit |
a4aae4 |
max_entry_size. I've disabled that since perform_garbage_collection will
|
|
Packit |
a4aae4 |
remove any such entry if it's causing problems. Note that if
|
|
Packit |
a4aae4 |
parse_headers finds a Content-Length header that indicates a response is
|
|
Packit |
a4aae4 |
too big, the response won't be cached. The idea here is that once we've
|
|
Packit |
a4aae4 |
already written a bunch of bytes to the cache, we might as well continue.
|
|
Packit |
a4aae4 |
If it overflows the cache, perform_garbage_collection() will remove it.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param cachename Write data to this file.
|
|
Packit |
a4aae4 |
@param src Read data from this stream.
|
|
Packit |
a4aae4 |
@return The total number of bytes written.
|
|
Packit |
a4aae4 |
@exception InternalErr Thrown if the file cannot be opened or if an I/O
|
|
Packit |
a4aae4 |
error was detected.
|
|
Packit |
a4aae4 |
@exception ResponseTooBig Thrown if the response was found to be bigger
|
|
Packit |
a4aae4 |
than the max_entry_size property. This is not longer thrown. 10/11/02
|
|
Packit |
a4aae4 |
jhrg */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
int
|
|
Packit |
a4aae4 |
HTTPCache::write_body(const string &cachename, const FILE *src)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
d_open_files.push_back(cachename);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
FILE *dest = fopen(cachename.c_str(), "wb");
|
|
Packit |
a4aae4 |
if (!dest) {
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__,
|
|
Packit |
a4aae4 |
"Could not open named cache entry file.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Read and write in 1k blocks; an attempt at doing this efficiently.
|
|
Packit |
a4aae4 |
// 09/30/02 jhrg
|
|
Packit |
a4aae4 |
char line[1024];
|
|
Packit |
a4aae4 |
size_t n;
|
|
Packit |
a4aae4 |
int total = 0;
|
|
Packit |
a4aae4 |
while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
|
|
Packit |
a4aae4 |
total += fwrite(line, 1, n, dest);
|
|
Packit |
a4aae4 |
DBG2(sleep(3));
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
|
|
Packit |
a4aae4 |
int res = fclose(dest);
|
|
Packit |
a4aae4 |
res = res & unlink(cachename.c_str());
|
|
Packit |
a4aae4 |
if (res) {
|
|
Packit |
a4aae4 |
DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
|
|
Packit |
a4aae4 |
<< dest << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__,
|
|
Packit |
a4aae4 |
"I/O error transferring data to the cache.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
rewind(const_cast<FILE *>(src));
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
int res = fclose(dest);
|
|
Packit |
a4aae4 |
if (res) {
|
|
Packit |
a4aae4 |
DBG(cerr << "HTTPCache::write_body - Failed to close "
|
|
Packit |
a4aae4 |
<< dest << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_open_files.pop_back();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return total;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get a pointer to file that contains the body of a cached response. The
|
|
Packit |
a4aae4 |
returned FILE* can be used both for reading and for writing.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
A private method.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param cachename The name of the file that holds the response body.
|
|
Packit |
a4aae4 |
@exception InternalErr Thrown if the file cannot be opened. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
FILE *
|
|
Packit |
a4aae4 |
HTTPCache::open_body(const string &cachename)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
DBG(cerr << "cachename: " << cachename << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
FILE *src = fopen(cachename.c_str(), "rb"); // Read only
|
|
Packit |
a4aae4 |
if (!src)
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return src;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Add a new response to the cache, or replace an existing cached response
|
|
Packit |
a4aae4 |
with new data. This method returns True if the information for \c url was
|
|
Packit |
a4aae4 |
added to the cache. A response might not be cache-able; in that case this
|
|
Packit |
a4aae4 |
method returns false. (For example, the response might contain the
|
|
Packit |
a4aae4 |
'Cache-Control: no-cache' header.)
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Note that the FILE *body is rewound so that the caller can re-read it
|
|
Packit |
a4aae4 |
without using fseek or rewind.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
If a response for \c url is already present in the cache, it will be
|
|
Packit |
a4aae4 |
replaced by the new headers and body. To update a response in the cache
|
|
Packit |
a4aae4 |
with new meta data, use update_response().
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param url A string which holds the request URL.
|
|
Packit |
a4aae4 |
@param request_time The time when the request was made, in seconds since
|
|
Packit |
a4aae4 |
1 Jan 1970.
|
|
Packit |
a4aae4 |
@param headers A vector of strings which hold the response headers.
|
|
Packit |
a4aae4 |
@param body A FILE * to a file which holds the response body.
|
|
Packit |
a4aae4 |
@return True if the response was cached, False if the response could not
|
|
Packit |
a4aae4 |
be cached.
|
|
Packit |
a4aae4 |
@exception InternalErr Thrown if there was a I/O error while writing to
|
|
Packit |
a4aae4 |
the persistent store. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool
|
|
Packit |
a4aae4 |
HTTPCache::cache_response(const string &url, time_t request_time,
|
|
Packit |
a4aae4 |
const vector<string> &headers, const FILE *body)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "Caching url: " << url << "." << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
// If this is not an http or https URL, don't cache.
|
|
Packit |
a4aae4 |
if (url.find("http:") == string::npos &&
|
|
Packit |
a4aae4 |
url.find("https:") == string::npos) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This does nothing if url is not already in the cache. It's
|
|
Packit |
a4aae4 |
// more efficient to do this than to first check and see if the entry
|
|
Packit |
a4aae4 |
// exists. 10/10/02 jhrg
|
|
Packit |
a4aae4 |
d_http_cache_table->remove_entry_from_cache_table(url);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url);
|
|
Packit |
a4aae4 |
entry->lock_write_response();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
|
|
Packit |
a4aae4 |
if (entry->is_no_cache()) {
|
|
Packit |
a4aae4 |
DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
|
|
Packit |
a4aae4 |
<< "(" << url << ")" << endl);
|
|
Packit |
a4aae4 |
entry->unlock_write_response();
|
|
Packit |
a4aae4 |
delete entry; entry = 0;
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// corrected_initial_age, freshness_lifetime, response_time.
|
|
Packit |
a4aae4 |
d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_http_cache_table->create_location(entry); // cachename, cache_body_fd
|
|
Packit |
a4aae4 |
// move these write function to cache table
|
|
Packit |
a4aae4 |
entry->set_size(write_body(entry->get_cachename(), body));
|
|
Packit |
a4aae4 |
write_metadata(entry->get_cachename(), headers);
|
|
Packit |
a4aae4 |
d_http_cache_table->add_entry_to_cache_table(entry);
|
|
Packit |
a4aae4 |
entry->unlock_write_response();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (ResponseTooBigErr &e) {
|
|
Packit |
a4aae4 |
// Oops. Bummer. Clean up and exit.
|
|
Packit |
a4aae4 |
DBG(cerr << e.get_error_message() << endl);
|
|
Packit |
a4aae4 |
REMOVE(entry->get_cachename().c_str());
|
|
Packit |
a4aae4 |
REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
|
|
Packit |
a4aae4 |
DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
|
|
Packit |
a4aae4 |
<< ")" << endl);
|
|
Packit |
a4aae4 |
entry->unlock_write_response();
|
|
Packit |
a4aae4 |
delete entry; entry = 0;
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
|
|
Packit |
a4aae4 |
if (startGC())
|
|
Packit |
a4aae4 |
perform_garbage_collection();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_http_cache_table->cache_index_write(); // resets new_entries
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return true;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Build the headers to send along with a GET request to make that request
|
|
Packit |
a4aae4 |
conditional. This method examines the headers for a given response in the
|
|
Packit |
a4aae4 |
cache and formulates the correct headers for a valid HTTP 1.1 conditional
|
|
Packit |
a4aae4 |
GET request. See RFC 2616, Section 13.3.4.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Rules: If an ETag is present, it must be used. Use If-None-Match. If a
|
|
Packit |
a4aae4 |
Last-Modified header is present, use it. Use If-Modified-Since. If both
|
|
Packit |
a4aae4 |
are present, use both (this means that HTTP 1.0 daemons are more likely
|
|
Packit |
a4aae4 |
to work). If a Last-Modified header is not present, use the value of the
|
|
Packit |
a4aae4 |
Cache-Control max-age or Expires header(s). Note that a 'Cache-Control:
|
|
Packit |
a4aae4 |
max-age' header overrides an Expires header (Sec 14.9.3).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the cache interface and the cache entry.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param url Get the HTTPCacheTable::CacheEntry for this URL.
|
|
Packit |
a4aae4 |
@return A vector of strings, one request header per string.
|
|
Packit |
a4aae4 |
@exception Error Thrown if the \e url is not in the cache. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
vector<string>
|
|
Packit |
a4aae4 |
HTTPCache::get_conditional_request_headers(const string &url)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCacheTable::CacheEntry *entry = 0;
|
|
Packit |
a4aae4 |
vector<string> headers;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "Getting conditional request headers for " << url << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
|
|
Packit |
a4aae4 |
if (!entry)
|
|
Packit |
a4aae4 |
throw Error(internal_error, "There is no cache entry for the URL: " + url);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (entry->get_etag() != "")
|
|
Packit |
a4aae4 |
headers.push_back(string("If-None-Match: ") + entry->get_etag());
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (entry->get_lm() > 0) {
|
|
Packit |
a4aae4 |
time_t lm = entry->get_lm();
|
|
Packit |
a4aae4 |
headers.push_back(string("If-Modified-Since: ")
|
|
Packit |
a4aae4 |
+ date_time_str(&lm);;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
else if (entry->get_max_age() > 0) {
|
|
Packit |
a4aae4 |
time_t max_age = entry->get_max_age();
|
|
Packit |
a4aae4 |
headers.push_back(string("If-Modified-Since: ")
|
|
Packit |
a4aae4 |
+ date_time_str(&max_age));
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
else if (entry->get_expires() > 0) {
|
|
Packit |
a4aae4 |
time_t expires = entry->get_expires();
|
|
Packit |
a4aae4 |
headers.push_back(string("If-Modified-Since: ")
|
|
Packit |
a4aae4 |
+ date_time_str(&expires));
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
entry->unlock_read_response();
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
if (entry) {
|
|
Packit |
a4aae4 |
entry->unlock_read_response();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return headers;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Functor/Predicate which orders two MIME headers based on the header name
|
|
Packit |
a4aae4 |
only (discounting the value). */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
struct HeaderLess: binary_function<const string&, const string&, bool>
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
bool operator()(const string &s1, const string &s2) const {
|
|
Packit |
a4aae4 |
return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
};
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Update the meta data for a response already in the cache. This method
|
|
Packit |
a4aae4 |
provides a way to merge response headers returned from a conditional GET
|
|
Packit |
a4aae4 |
request, for the given URL, with those already present.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface and the cache entry.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param url Update the meta data for this cache entry.
|
|
Packit |
a4aae4 |
@param request_time The time (Unix time, seconds since 1 Jan 1970) that
|
|
Packit |
a4aae4 |
the conditional request was made.
|
|
Packit |
a4aae4 |
@param headers New headers, one header per string, returned in the
|
|
Packit |
a4aae4 |
response.
|
|
Packit |
a4aae4 |
@exception Error Thrown if the \c url is not in the cache. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::update_response(const string &url, time_t request_time,
|
|
Packit |
a4aae4 |
const vector<string> &headers)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCacheTable::CacheEntry *entry = 0;
|
|
Packit |
a4aae4 |
DBG(cerr << "Updating the response headers for: " << url << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
|
|
Packit |
a4aae4 |
if (!entry)
|
|
Packit |
a4aae4 |
throw Error(internal_error, "There is no cache entry for the URL: " + url);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
|
|
Packit |
a4aae4 |
d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Update corrected_initial_age, freshness_lifetime, response_time.
|
|
Packit |
a4aae4 |
d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Merge the new headers with those in the persistent store. How:
|
|
Packit |
a4aae4 |
// Load the new headers into a set, then merge the old headers. Since
|
|
Packit |
a4aae4 |
// set<> ignores duplicates, old headers with the same name as a new
|
|
Packit |
a4aae4 |
// header will got into the bit bucket. Define a special compare
|
|
Packit |
a4aae4 |
// functor to make sure that headers are compared using only their
|
|
Packit |
a4aae4 |
// name and not their value too.
|
|
Packit |
a4aae4 |
set<string, HeaderLess> merged_headers;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Load in the new headers
|
|
Packit |
a4aae4 |
copy(headers.begin(), headers.end(),
|
|
Packit |
a4aae4 |
inserter(merged_headers, merged_headers.begin()));
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Get the old headers and load them in.
|
|
Packit |
a4aae4 |
vector<string> old_headers;
|
|
Packit |
a4aae4 |
read_metadata(entry->get_cachename(), old_headers);
|
|
Packit |
a4aae4 |
copy(old_headers.begin(), old_headers.end(),
|
|
Packit |
a4aae4 |
inserter(merged_headers, merged_headers.begin()));
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Read the values back out. Use reverse iterators with back_inserter
|
|
Packit |
a4aae4 |
// to preserve header order. NB: vector<> does not support push_front
|
|
Packit |
a4aae4 |
// so we can't use front_inserter(). 01/09/03 jhrg
|
|
Packit |
a4aae4 |
vector<string> result;
|
|
Packit |
a4aae4 |
copy(merged_headers.rbegin(), merged_headers.rend(),
|
|
Packit |
a4aae4 |
back_inserter(result));
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
write_metadata(entry->get_cachename(), result);
|
|
Packit |
a4aae4 |
entry->unlock_write_response();
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
if (entry) {
|
|
Packit |
a4aae4 |
entry->unlock_read_response();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Look in the cache and return the status (validity) of the cached
|
|
Packit |
a4aae4 |
response. This method should be used to determine if a cached response
|
|
Packit |
a4aae4 |
requires validation.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface and the cache entry.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param url Find the cached response associated with this URL.
|
|
Packit |
a4aae4 |
@return True indicates that the response can be used, False indicates
|
|
Packit |
a4aae4 |
that it must first be validated.
|
|
Packit |
a4aae4 |
@exception Error Thrown if the URL's response is not in the cache. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool
|
|
Packit |
a4aae4 |
HTTPCache::is_url_valid(const string &url)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool freshness;
|
|
Packit |
a4aae4 |
HTTPCacheTable::CacheEntry *entry = 0;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
if (d_always_validate) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
return false; // force re-validation.
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
|
|
Packit |
a4aae4 |
if (!entry)
|
|
Packit |
a4aae4 |
throw Error(internal_error, "There is no cache entry for the URL: " + url);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// If we supported range requests, we'd need code here to check if
|
|
Packit |
a4aae4 |
// there was only a partial response in the cache. 10/02/02 jhrg
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// In case this entry is of type "must-revalidate" then we consider it
|
|
Packit |
a4aae4 |
// invalid.
|
|
Packit |
a4aae4 |
if (entry->get_must_revalidate()) {
|
|
Packit |
a4aae4 |
entry->unlock_read_response();
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
time_t resident_time = time(NULL) - entry->get_response_time();
|
|
Packit |
a4aae4 |
time_t current_age = entry->get_corrected_initial_age() + resident_time;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Check that the max-age, max-stale, and min-fresh directives
|
|
Packit |
a4aae4 |
// given in the request cache control header is followed.
|
|
Packit |
a4aae4 |
if (d_max_age >= 0 && current_age > d_max_age) {
|
|
Packit |
a4aae4 |
DBG(cerr << "Cache....... Max-age validation" << endl);
|
|
Packit |
a4aae4 |
entry->unlock_read_response();
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
if (d_min_fresh >= 0
|
|
Packit |
a4aae4 |
&& entry->get_freshness_lifetime() < current_age + d_min_fresh) {
|
|
Packit |
a4aae4 |
DBG(cerr << "Cache....... Min-fresh validation" << endl);
|
|
Packit |
a4aae4 |
entry->unlock_read_response();
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
return false;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
freshness = (entry->get_freshness_lifetime()
|
|
Packit |
a4aae4 |
+ (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
|
|
Packit |
a4aae4 |
entry->unlock_read_response();
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
if (entry) {
|
|
Packit |
a4aae4 |
entry->unlock_read_response();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return freshness;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get information from the cache. For a given URL, get the headers, cache
|
|
Packit |
a4aae4 |
object name and body
|
|
Packit |
a4aae4 |
stored in the cache. Note that this method increments the hit counter for
|
|
Packit |
a4aae4 |
url 's entry and \e locks that entry. To release the lock,
|
|
Packit |
a4aae4 |
the method release_cached_response() \e must be called. Methods that
|
|
Packit |
a4aae4 |
block on a locked entry are: get_conditional_request_headers(),
|
|
Packit |
a4aae4 |
update_response() and is_url_valid(). In addition, purge_cache() throws
|
|
Packit |
a4aae4 |
Error if it's called and any entries are locked. The garbage collection
|
|
Packit |
a4aae4 |
system will not reclaim locked entries (but works fine when some entries
|
|
Packit |
a4aae4 |
are locked).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method does \e not check to see that the response is valid, just
|
|
Packit |
a4aae4 |
that it is in the cache. To see if a cached response is valid, use
|
|
Packit |
a4aae4 |
is_url_valid(). The FILE* returned can be used for both reading and
|
|
Packit |
a4aae4 |
writing. The latter allows a client to update the body of a cached
|
|
Packit |
a4aae4 |
response without having to first dump it all to a separate file and then
|
|
Packit |
a4aae4 |
copy it into the cache (using cache_response()).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param url Get response information for this URL.
|
|
Packit |
a4aae4 |
@param headers Return the response headers in this parameter
|
|
Packit |
a4aae4 |
@param cacheName A value-result parameter; the name of the cache file
|
|
Packit |
a4aae4 |
@return A FILE * to the response body.
|
|
Packit |
a4aae4 |
@exception Error Thrown if the URL's response is not in the cache.
|
|
Packit |
a4aae4 |
@exception InternalErr Thrown if the persistent store cannot be opened. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
FILE * HTTPCache::get_cached_response(const string &url,
|
|
Packit |
a4aae4 |
vector<string> &headers, string &cacheName) {
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
FILE *body = 0;
|
|
Packit |
a4aae4 |
HTTPCacheTable::CacheEntry *entry = 0;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "Getting the cached response for " << url << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
|
|
Packit |
a4aae4 |
if (!entry) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
return 0;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
cacheName = entry->get_cachename();
|
|
Packit |
a4aae4 |
read_metadata(entry->get_cachename(), headers);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "Headers just read from cache: " << endl);
|
|
Packit |
a4aae4 |
DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
body = open_body(entry->get_cachename());
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "Returning: " << url << " from the cache." << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_http_cache_table->bind_entry_to_data(entry, body);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
// Why make this unlock operation conditional on entry?
|
|
Packit |
a4aae4 |
if (entry)
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
if (body != 0)
|
|
Packit |
a4aae4 |
fclose(body);
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return body;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get information from the cache. This is a convenience method that calls
|
|
Packit |
a4aae4 |
the three parameter version of get_cache_response().
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param url Get response information for this URL.
|
|
Packit |
a4aae4 |
@param headers Return the response headers in this parameter
|
|
Packit |
a4aae4 |
@return A FILE * to the response body.
|
|
Packit |
a4aae4 |
@exception Error Thrown if the URL's response is not in the cache.
|
|
Packit |
a4aae4 |
@exception InternalErr Thrown if the persistent store cannot be opened. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
FILE *
|
|
Packit |
a4aae4 |
HTTPCache::get_cached_response(const string &url, vector<string> &headers)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string discard_name;
|
|
Packit |
a4aae4 |
return get_cached_response(url, headers, discard_name);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Get a pointer to a cached response body. This is a convenience method that
|
|
Packit |
a4aae4 |
calls the three parameter version of get_cache_response().
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param url Find the body associated with this URL.
|
|
Packit |
a4aae4 |
@return A FILE* that points to the response body.
|
|
Packit |
a4aae4 |
@exception Error Thrown if the URL is not in the cache.
|
|
Packit |
a4aae4 |
@exception InternalErr Thrown if an I/O error is detected. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
FILE *
|
|
Packit |
a4aae4 |
HTTPCache::get_cached_response(const string &url)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string discard_name;
|
|
Packit |
a4aae4 |
vector<string> discard_headers;
|
|
Packit |
a4aae4 |
return get_cached_response(url, discard_headers, discard_name);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Call this method to inform the cache that a particular response is no
|
|
Packit |
a4aae4 |
longer in use. When a response is accessed using get_cached_response(), it
|
|
Packit |
a4aae4 |
is locked so that updates and removal (e.g., by the garbage collector)
|
|
Packit |
a4aae4 |
are not possible. Calling this method frees that lock.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param body Release the lock on the response information associated with
|
|
Packit |
a4aae4 |
this FILE *.
|
|
Packit |
a4aae4 |
@exception Error Thrown if \c body does not belong to an entry in the
|
|
Packit |
a4aae4 |
cache or if the entry was already released. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::release_cached_response(FILE *body)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
// fclose(body); This results in a seg fault on linux jhrg 8/27/13
|
|
Packit |
a4aae4 |
d_http_cache_table->uncouple_entry_from_data(body);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Purge both the in-memory cache table and the contents of the cache on
|
|
Packit |
a4aae4 |
disk. This method deletes every entry in the persistent store but leaves
|
|
Packit |
a4aae4 |
the structure intact. The client of HTTPCache is responsible for making
|
|
Packit |
a4aae4 |
sure that all threads have released any responses they pulled from the
|
|
Packit |
a4aae4 |
cache. If this method is called when a response is still in use, it will
|
|
Packit |
a4aae4 |
throw an Error object and not purge the cache.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This method locks the class' interface.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@exception Error Thrown if an attempt is made to purge the cache when
|
|
Packit |
a4aae4 |
an entry is still in use. */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
HTTPCache::purge_cache()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
lock_cache_interface();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
try {
|
|
Packit |
a4aae4 |
if (d_http_cache_table->is_locked_read_responses())
|
|
Packit |
a4aae4 |
throw Error(internal_error, "Attempt to purge the cache with entries in use.");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
d_http_cache_table->delete_all_entries();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
catch (...) {
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
throw;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unlock_cache_interface();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
} // namespace libdap
|