Blame HTTPCacheTable.cc

Packit a4aae4
Packit a4aae4
// -*- mode: c++; c-basic-offset:4 -*-
Packit a4aae4
Packit a4aae4
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
Packit a4aae4
// Access Protocol.
Packit a4aae4
Packit a4aae4
// Copyright (c) 2002,2003 OPeNDAP, Inc.
Packit a4aae4
// Author: James Gallagher <jgallagher@opendap.org>
Packit a4aae4
//
Packit a4aae4
// This library is free software; you can redistribute it and/or
Packit a4aae4
// modify it under the terms of the GNU Lesser General Public
Packit a4aae4
// License as published by the Free Software Foundation; either
Packit a4aae4
// version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
//
Packit a4aae4
// This library is distributed in the hope that it will be useful,
Packit a4aae4
// but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
// Lesser General Public License for more details.
Packit a4aae4
//
Packit a4aae4
// You should have received a copy of the GNU Lesser General Public
Packit a4aae4
// License along with this library; if not, write to the Free Software
Packit a4aae4
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Packit a4aae4
//
Packit a4aae4
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
Packit a4aae4
Packit a4aae4
#include "config.h"
Packit a4aae4
Packit a4aae4
// #define DODS_DEBUG
Packit a4aae4
Packit a4aae4
// TODO: Remove unneeded includes.
Packit a4aae4
Packit a4aae4
#include <pthread.h>
Packit a4aae4
#include <limits.h>
Packit a4aae4
#include <unistd.h>   // for stat
Packit a4aae4
#include <sys/types.h>  // for stat and mkdir
Packit a4aae4
#include <sys/stat.h>
Packit a4aae4
Packit a4aae4
#include <cstring>
Packit a4aae4
#include <cerrno>
Packit a4aae4
Packit a4aae4
#include <iostream>
Packit a4aae4
#include <sstream>
Packit a4aae4
#include <algorithm>
Packit a4aae4
#include <iterator>
Packit a4aae4
#include <set>
Packit a4aae4
Packit a4aae4
#include "Error.h"
Packit a4aae4
#include "InternalErr.h"
Packit a4aae4
#include "ResponseTooBigErr.h"
Packit a4aae4
#ifndef WIN32
Packit a4aae4
#include "SignalHandler.h"
Packit a4aae4
#endif
Packit a4aae4
#include "HTTPCacheInterruptHandler.h"
Packit a4aae4
#include "HTTPCacheTable.h"
Packit a4aae4
#include "HTTPCacheMacros.h"
Packit a4aae4
Packit a4aae4
#include "util_mit.h"
Packit a4aae4
#include "debug.h"
Packit a4aae4
Packit a4aae4
#ifdef WIN32
Packit a4aae4
#include <direct.h>
Packit a4aae4
#include <time.h>
Packit a4aae4
#include <fcntl.h>
Packit a4aae4
#define MKDIR(a,b) _mkdir((a))
Packit a4aae4
#define REMOVE(a) do { \
Packit a4aae4
		int s = remove((a)); \
Packit a4aae4
		if (s != 0) \
Packit a4aae4
			throw InternalErr(__FILE__, __LINE__, "Cache error; could not remove file: " + long_to_string(s)); \
Packit a4aae4
	} while(0);
Packit a4aae4
#define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
Packit a4aae4
#define DIR_SEPARATOR_CHAR '\\'
Packit a4aae4
#define DIR_SEPARATOR_STR "\\"
Packit a4aae4
#else
Packit a4aae4
#define MKDIR(a,b) mkdir((a), (b))
Packit a4aae4
#define MKSTEMP(a) mkstemp((a))
Packit a4aae4
#define DIR_SEPARATOR_CHAR '/'
Packit a4aae4
#define DIR_SEPARATOR_STR "/"
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
#define CACHE_META ".meta"
Packit a4aae4
#define CACHE_INDEX ".index"
Packit a4aae4
#define CACHE_EMPTY_ETAG "@cache@"
Packit a4aae4
Packit a4aae4
#define NO_LM_EXPIRATION 24*3600 // 24 hours
Packit a4aae4
#define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM
Packit a4aae4
Packit a4aae4
// If using LM to find the expiration then take 10% and no more than
Packit a4aae4
// MAX_LM_EXPIRATION.
Packit a4aae4
#ifndef LM_EXPIRATION
Packit a4aae4
#define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
const int CACHE_TABLE_SIZE = 1499;
Packit a4aae4
Packit a4aae4
using namespace std;
Packit a4aae4
Packit a4aae4
namespace libdap {
Packit a4aae4
Packit a4aae4
/** Compute the hash value for a URL.
Packit a4aae4
    @param url
Packit a4aae4
    @return An integer hash code between 0 and CACHE_TABLE_SIZE. */
Packit a4aae4
int
Packit a4aae4
get_hash(const string &url)
Packit a4aae4
{
Packit a4aae4
    int hash = 0;
Packit a4aae4
Packit a4aae4
    for (const char *ptr = url.c_str(); *ptr; ptr++)
Packit a4aae4
        hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
Packit a4aae4
Packit a4aae4
    return hash;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size) :
Packit a4aae4
    d_cache_root(cache_root), d_block_size(block_size), d_current_size(0), d_new_entries(0)
Packit a4aae4
{
Packit a4aae4
    d_cache_index = cache_root + CACHE_INDEX;
Packit a4aae4
Packit a4aae4
    d_cache_table = new CacheEntries*[CACHE_TABLE_SIZE];
Packit a4aae4
Packit a4aae4
    // Initialize the cache table.
Packit a4aae4
    for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
Packit a4aae4
	d_cache_table[i] = 0;
Packit a4aae4
Packit a4aae4
    cache_index_read();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Called by for_each inside ~HTTPCache().
Packit a4aae4
    @param e The cache entry to delete. */
Packit a4aae4
Packit a4aae4
static inline void
Packit a4aae4
delete_cache_entry(HTTPCacheTable::CacheEntry *e)
Packit a4aae4
{
Packit a4aae4
    DBG2(cerr << "Deleting CacheEntry: " << e << endl);
Packit a4aae4
    delete e;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
HTTPCacheTable::~HTTPCacheTable()
Packit a4aae4
{
Packit a4aae4
    for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
Packit a4aae4
        HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
Packit a4aae4
        if (cp) {
Packit a4aae4
            // delete each entry
Packit a4aae4
            for_each(cp->begin(), cp->end(), delete_cache_entry);
Packit a4aae4
Packit a4aae4
            // now delete the vector that held the entries
Packit a4aae4
            delete get_cache_table()[i];
Packit a4aae4
            get_cache_table()[i] = 0;
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    delete[] d_cache_table;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Functor which deletes and nulls a single CacheEntry if it has expired.
Packit a4aae4
    This functor is called by expired_gc which then uses the
Packit a4aae4
    erase(remove(...) ...) idiom to really remove all the vector entries that
Packit a4aae4
    belonged to the deleted CacheEntry objects.
Packit a4aae4
Packit a4aae4
    @see expired_gc. */
Packit a4aae4
Packit a4aae4
class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
Packit a4aae4
	time_t d_time;
Packit a4aae4
	HTTPCacheTable &d_table;
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
	DeleteExpired(HTTPCacheTable &table, time_t t) :
Packit a4aae4
		d_time(t), d_table(table) {
Packit a4aae4
		if (!t)
Packit a4aae4
			d_time = time(0); // 0 == now
Packit a4aae4
	} 
Packit a4aae4
Packit a4aae4
	void operator()(HTTPCacheTable::CacheEntry *&e) {
Packit a4aae4
		if (e && !e->readers && (e->freshness_lifetime
Packit a4aae4
				< (e->corrected_initial_age + (d_time - e->response_time)))) {
Packit a4aae4
			DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
Packit a4aae4
			d_table.remove_cache_entry(e);
Packit a4aae4
			delete e; e = 0;
Packit a4aae4
		}
Packit a4aae4
	}
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
// @param time base deletes againt this time, defaults to 0 (now)
Packit a4aae4
void HTTPCacheTable::delete_expired_entries(time_t time) {
Packit a4aae4
	// Walk through and delete all the expired entries.
Packit a4aae4
	for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
Packit a4aae4
		HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
Packit a4aae4
		if (slot) {
Packit a4aae4
			for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
Packit a4aae4
			slot->erase(remove(slot->begin(), slot->end(),
Packit a4aae4
					static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
Packit a4aae4
		}
Packit a4aae4
	}
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Functor which deletes and nulls a single CacheEntry which has less than
Packit a4aae4
    or equal to \c hits hits or if it is larger than the cache's
Packit a4aae4
    max_entry_size property.
Packit a4aae4
Packit a4aae4
    @see hits_gc. */
Packit a4aae4
Packit a4aae4
class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
Packit a4aae4
	HTTPCacheTable &d_table;
Packit a4aae4
	int d_hits;
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
	DeleteByHits(HTTPCacheTable &table, int hits) :
Packit a4aae4
		d_table(table), d_hits(hits) {
Packit a4aae4
	}
Packit a4aae4
Packit a4aae4
	void operator()(HTTPCacheTable::CacheEntry *&e) {
Packit a4aae4
		if (e && !e->readers && e->hits <= d_hits) {
Packit a4aae4
			DBG(cerr << "Deleting cache entry: " << e->url << endl);
Packit a4aae4
			d_table.remove_cache_entry(e);
Packit a4aae4
			delete e; e = 0;
Packit a4aae4
		}
Packit a4aae4
	}
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
void 
Packit a4aae4
HTTPCacheTable::delete_by_hits(int hits) {
Packit a4aae4
    for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
Packit a4aae4
        if (get_cache_table()[cnt]) {
Packit a4aae4
            HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
Packit a4aae4
            for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
Packit a4aae4
            slot->erase(remove(slot->begin(), slot->end(),
Packit a4aae4
                               static_cast<HTTPCacheTable::CacheEntry*>(0)),
Packit a4aae4
                        slot->end());
Packit a4aae4
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Functor which deletes and nulls a single CacheEntry which is larger than 
Packit a4aae4
    a given size.
Packit a4aae4
    @see hits_gc. */
Packit a4aae4
Packit a4aae4
class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
Packit a4aae4
	HTTPCacheTable &d_table;
Packit a4aae4
	unsigned int d_size;
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
	DeleteBySize(HTTPCacheTable &table, unsigned int size) :
Packit a4aae4
		d_table(table), d_size(size) {
Packit a4aae4
	}
Packit a4aae4
Packit a4aae4
	void operator()(HTTPCacheTable::CacheEntry *&e) {
Packit a4aae4
		if (e && !e->readers && e->size > d_size) {
Packit a4aae4
			DBG(cerr << "Deleting cache entry: " << e->url << endl);
Packit a4aae4
			d_table.remove_cache_entry(e);
Packit a4aae4
			delete e; e = 0;
Packit a4aae4
		}
Packit a4aae4
	}
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
void HTTPCacheTable::delete_by_size(unsigned int size) {
Packit a4aae4
    for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
Packit a4aae4
        if (get_cache_table()[cnt]) {
Packit a4aae4
            HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
Packit a4aae4
            for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
Packit a4aae4
            slot->erase(remove(slot->begin(), slot->end(),
Packit a4aae4
                               static_cast<HTTPCacheTable::CacheEntry*>(0)),
Packit a4aae4
                        slot->end());
Packit a4aae4
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** @name Cache Index
Packit a4aae4
Packit a4aae4
    These methods manage the cache's index file. Each cache holds an index
Packit a4aae4
    file named \c .index which stores the cache's state information. */
Packit a4aae4
Packit a4aae4
//@{
Packit a4aae4
Packit a4aae4
/** Remove the cache index file.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @return True if the file was deleted, otherwise false. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCacheTable::cache_index_delete()
Packit a4aae4
{
Packit a4aae4
	d_new_entries = 0;
Packit a4aae4
	
Packit a4aae4
    return (REMOVE_BOOL(d_cache_index.c_str()) == 0);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Read the saved set of cached entries from disk. Consistency between the
Packit a4aae4
    in-memory cache and the index is maintained by only reading the index
Packit a4aae4
    file when the HTTPCache object is created!
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @return True when a cache index was found and read, false otherwise. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPCacheTable::cache_index_read()
Packit a4aae4
{
Packit a4aae4
    FILE *fp = fopen(d_cache_index.c_str(), "r");
Packit a4aae4
    // If the cache index can't be opened that's OK; start with an empty
Packit a4aae4
    // cache. 09/05/02 jhrg
Packit a4aae4
    if (!fp) {
Packit a4aae4
        return false;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    char line[1024];
Packit a4aae4
    while (!feof(fp) && fgets(line, 1024, fp)) {
Packit a4aae4
    	add_entry_to_cache_table(cache_index_parse_line(line));
Packit a4aae4
        DBG2(cerr << line << endl);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    int res = fclose(fp) ;
Packit a4aae4
    if (res) {
Packit a4aae4
        DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    d_new_entries = 0;
Packit a4aae4
    
Packit a4aae4
    return true;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Parse one line of the index file.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param line A single line from the \c .index file.
Packit a4aae4
    @return A CacheEntry initialized with the information from \c line. */
Packit a4aae4
Packit a4aae4
HTTPCacheTable::CacheEntry *
Packit a4aae4
HTTPCacheTable::cache_index_parse_line(const char *line)
Packit a4aae4
{
Packit a4aae4
    // Read the line and create the cache object
Packit a4aae4
	HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry;
Packit a4aae4
    istringstream iss(line);
Packit a4aae4
    iss >> entry->url;
Packit a4aae4
    iss >> entry->cachename;
Packit a4aae4
Packit a4aae4
    iss >> entry->etag;
Packit a4aae4
    if (entry->etag == CACHE_EMPTY_ETAG)
Packit a4aae4
        entry->etag = "";
Packit a4aae4
Packit a4aae4
    iss >> entry->lm;
Packit a4aae4
    iss >> entry->expires;
Packit a4aae4
    iss >> entry->size;
Packit a4aae4
    iss >> entry->range; // range is not used. 10/02/02 jhrg
Packit a4aae4
Packit a4aae4
    iss >> entry->hash;
Packit a4aae4
    iss >> entry->hits;
Packit a4aae4
    iss >> entry->freshness_lifetime;
Packit a4aae4
    iss >> entry->response_time;
Packit a4aae4
    iss >> entry->corrected_initial_age;
Packit a4aae4
Packit a4aae4
    iss >> entry->must_revalidate;
Packit a4aae4
Packit a4aae4
    return entry;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Functor which writes a single CacheEntry to the \c .index file. */
Packit a4aae4
Packit a4aae4
class WriteOneCacheEntry :
Packit a4aae4
	public unary_function<HTTPCacheTable::CacheEntry *, void>
Packit a4aae4
{
Packit a4aae4
Packit a4aae4
    FILE *d_fp;
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
    WriteOneCacheEntry(FILE *fp) : d_fp(fp)
Packit a4aae4
    {}
Packit a4aae4
Packit a4aae4
    void operator()(HTTPCacheTable::CacheEntry *e)
Packit a4aae4
    {
Packit a4aae4
        if (e && fprintf(d_fp,
Packit a4aae4
                         "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
Packit a4aae4
                         e->url.c_str(),
Packit a4aae4
                         e->cachename.c_str(),
Packit a4aae4
                         e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(),
Packit a4aae4
                         (long)(e->lm),
Packit a4aae4
                         (long)(e->expires),
Packit a4aae4
                         e->size,
Packit a4aae4
                         e->range ? '1' : '0', // not used. 10/02/02 jhrg
Packit a4aae4
                         e->hash,
Packit a4aae4
                         e->hits,
Packit a4aae4
                         (long)(e->freshness_lifetime),
Packit a4aae4
                         (long)(e->response_time),
Packit a4aae4
                         (long)(e->corrected_initial_age),
Packit a4aae4
                         e->must_revalidate ? '1' : '0') < 0)
Packit a4aae4
            throw Error(internal_error, "Cache Index. Error writing cache index\n");
Packit a4aae4
    }
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
/** Walk through the list of cached objects and write the cache index file to
Packit a4aae4
    disk. If the file does not exist, it is created. If the file does exist,
Packit a4aae4
    it is overwritten. As a side effect, zero the new_entries counter.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @exception Error Thrown if the index file cannot be opened for writing.
Packit a4aae4
    @note The HTTPCache destructor calls this method and silently ignores
Packit a4aae4
    this exception. */
Packit a4aae4
void
Packit a4aae4
HTTPCacheTable::cache_index_write()
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
Packit a4aae4
Packit a4aae4
    // Open the file for writing.
Packit a4aae4
    FILE * fp = NULL;
Packit a4aae4
    if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
Packit a4aae4
        throw Error(string("Cache Index. Can't open `") + d_cache_index
Packit a4aae4
                    + string("' for writing"));
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    // Walk through the list and write it out. The format is really
Packit a4aae4
    // simple as we keep it all in ASCII.
Packit a4aae4
Packit a4aae4
    for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
Packit a4aae4
        HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
Packit a4aae4
        if (cp)
Packit a4aae4
            for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    /* Done writing */
Packit a4aae4
    int res = fclose(fp);
Packit a4aae4
    if (res) {
Packit a4aae4
        DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
Packit a4aae4
            << (void *)fp << endl);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    d_new_entries = 0;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
//@} End of the cache index methods.
Packit a4aae4
/** Create the directory path for cache file. The cache uses a set of
Packit a4aae4
    directories within d_cache_root to store individual responses. The name
Packit a4aae4
    of the directory that holds a given response is the value returned by the
Packit a4aae4
    get_hash() function (i.e., it's a number). If the directory exists, this
Packit a4aae4
    method does nothing.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param hash The hash value (i.e., directory name). An integer between 0
Packit a4aae4
    and CACHE_TABLE_SIZE (See HTTPCache.h).
Packit a4aae4
    @return The pathname to the directory (even if it already existed).
Packit a4aae4
    @exception Error Thrown if the directory cannot be created.*/
Packit a4aae4
Packit a4aae4
string
Packit a4aae4
HTTPCacheTable::create_hash_directory(int hash)
Packit a4aae4
{
Packit a4aae4
#if 0
Packit a4aae4
    struct stat stat_info;
Packit a4aae4
    ostringstream path;
Packit a4aae4
Packit a4aae4
    path << d_cache_root << hash;
Packit a4aae4
    string p = path.str();
Packit a4aae4
Packit a4aae4
    if (stat(p.c_str(), &stat_info) == -1) {
Packit a4aae4
        DBG2(cerr << "Cache....... Create dir " << p << endl);
Packit a4aae4
        if (MKDIR(p.c_str(), 0777) < 0) {
Packit a4aae4
            DBG2(cerr << "Cache....... Can't create..." << endl);
Packit a4aae4
            throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
    else {
Packit a4aae4
        DBG2(cerr << "Cache....... Directory " << p << " already exists"
Packit a4aae4
             << endl);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    return p;
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
    ostringstream path;
Packit a4aae4
    path << d_cache_root << hash;
Packit a4aae4
Packit a4aae4
    // Save the mask
Packit a4aae4
    mode_t mask = umask(0);
Packit a4aae4
Packit a4aae4
    // Ignore the error if the directory exists
Packit a4aae4
    errno = 0;
Packit a4aae4
    if (mkdir(path.str().c_str(), 0777) < 0 && errno != EEXIST) {
Packit a4aae4
        umask(mask);
Packit a4aae4
        throw Error(internal_error, "Could not create the directory for the cache at '" + path.str() + "' (" + strerror(errno) + ").");
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    // Restore themask
Packit a4aae4
    umask(mask);
Packit a4aae4
Packit a4aae4
    return path.str();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Create the directory for this url (using the hash value from get_hash())
Packit a4aae4
    and a file within that directory to hold the response's information. The
Packit a4aae4
    cache name and cache_body_fd fields of \c entry are updated.
Packit a4aae4
Packit a4aae4
    mkstemp opens the file it creates, which is a good thing but it makes
Packit a4aae4
    tracking resources hard for the HTTPCache object (because an exception
Packit a4aae4
    might cause a file descriptor resource leak). So I close that file
Packit a4aae4
    descriptor here.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param entry The cache entry object to operate on.
Packit a4aae4
    @exception Error If the file for the response's body cannot be created. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCacheTable::create_location(HTTPCacheTable::CacheEntry *entry)
Packit a4aae4
{
Packit a4aae4
    string hash_dir = create_hash_directory(entry->hash);
Packit a4aae4
#ifdef WIN32
Packit a4aae4
    hash_dir += "\\dodsXXXXXX";
Packit a4aae4
#else
Packit a4aae4
    hash_dir += "/dodsXXXXXX"; // mkstemp uses six characters.
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
    // mkstemp uses the storage passed to it; must be writable and local.
Packit a4aae4
    // char *templat = new char[hash_dir.size() + 1];
Packit a4aae4
    vector<char> templat(hash_dir.size() + 1);
Packit a4aae4
    strncpy(&templat[0], hash_dir.c_str(), hash_dir.size() + 1);
Packit a4aae4
Packit a4aae4
    // Open truncated for update. NB: mkstemp() returns a file descriptor.
Packit a4aae4
    // man mkstemp says "... The file is opened with the O_EXCL flag,
Packit a4aae4
    // guaranteeing that when mkstemp returns successfully we are the only
Packit a4aae4
    // user." 09/19/02 jhrg
Packit a4aae4
#ifndef WIN32
Packit a4aae4
    // Make sure that temp files are accessible only by the owner.
Packit a4aae4
    umask(077);
Packit a4aae4
#endif
Packit a4aae4
    int fd = MKSTEMP(&templat[0]); // fd mode is 666 or 600 (Unix)
Packit a4aae4
    if (fd < 0) {
Packit a4aae4
        // delete[] templat; templat = 0;
Packit a4aae4
        // close(fd); Calling close() when fd is < 0 is a bad idea! jhrg 7/2/15
Packit a4aae4
        throw Error(internal_error, "The HTTP Cache could not create a file to hold the response; it will not be cached.");
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    entry->cachename = &templat[0];
Packit a4aae4
    // delete[] templat; templat = 0;
Packit a4aae4
    close(fd);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
Packit a4aae4
/** compute real disk space for an entry. */
Packit a4aae4
static inline int
Packit a4aae4
entry_disk_space(int size, unsigned int block_size)
Packit a4aae4
{
Packit a4aae4
    unsigned int num_of_blocks = (size + block_size) / block_size;
Packit a4aae4
    
Packit a4aae4
    DBG(cerr << "size: " << size << ", block_size: " << block_size
Packit a4aae4
        << ", num_of_blocks: " << num_of_blocks << endl);
Packit a4aae4
Packit a4aae4
    return num_of_blocks * block_size;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** @name Methods to manipulate instances of CacheEntry. */
Packit a4aae4
Packit a4aae4
//@{
Packit a4aae4
Packit a4aae4
/** Add a CacheEntry to the cache table. As each entry is read, load it into
Packit a4aae4
    the in-memory cache table and update the HTTPCache's current_size. The
Packit a4aae4
    later is used by the garbage collection method.
Packit a4aae4
Packit a4aae4
    @param entry The CacheEntry instance to add. */
Packit a4aae4
void
Packit a4aae4
HTTPCacheTable::add_entry_to_cache_table(CacheEntry *entry)
Packit a4aae4
{
Packit a4aae4
    int hash = entry->hash;
Packit a4aae4
    if (hash > CACHE_TABLE_SIZE-1 || hash < 0)
Packit a4aae4
        throw InternalErr(__FILE__, __LINE__, "Hash value too large!");
Packit a4aae4
Packit a4aae4
    if (!d_cache_table[hash])
Packit a4aae4
        d_cache_table[hash] = new CacheEntries;
Packit a4aae4
Packit a4aae4
    d_cache_table[hash]->push_back(entry);
Packit a4aae4
    
Packit a4aae4
    DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
Packit a4aae4
        << ", entry->size: " << entry->size << ", block size: " << d_block_size 
Packit a4aae4
        << endl);
Packit a4aae4
    
Packit a4aae4
    d_current_size += entry_disk_space(entry->size, d_block_size);
Packit a4aae4
Packit a4aae4
    DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
Packit a4aae4
    
Packit a4aae4
    increment_new_entries();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get a pointer to a CacheEntry from the cache table.
Packit a4aae4
Packit a4aae4
    @param url Look for this URL. */
Packit a4aae4
HTTPCacheTable::CacheEntry *
Packit a4aae4
HTTPCacheTable::get_locked_entry_from_cache_table(const string &url) /*const*/
Packit a4aae4
{
Packit a4aae4
    return get_locked_entry_from_cache_table(get_hash(url), url);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get a pointer to a CacheEntry from the cache table. Providing a way to
Packit a4aae4
    pass the hash code into this method makes it easier to test for correct
Packit a4aae4
    behavior when two entries collide. 10/07/02 jhrg
Packit a4aae4
Packit a4aae4
    @param hash The hash code for \c url.
Packit a4aae4
    @param url Look for this URL.
Packit a4aae4
    @return The matching CacheEntry instance or NULL if none was found. */
Packit a4aae4
HTTPCacheTable::CacheEntry *
Packit a4aae4
HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url) /*const*/
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "url: " << url << "; hash: " << hash << endl);
Packit a4aae4
    DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
Packit a4aae4
    if (d_cache_table[hash]) {
Packit a4aae4
	CacheEntries *cp = d_cache_table[hash];
Packit a4aae4
	for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
Packit a4aae4
	    // Must test *i because perform_garbage_collection may have
Packit a4aae4
	    // removed this entry; the CacheEntry will then be null.
Packit a4aae4
	    if ((*i) && (*i)->url == url) {
Packit a4aae4
		(*i)->lock_read_response(); // Lock the response
Packit a4aae4
		return *i;
Packit a4aae4
	    }
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    return 0;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Get a pointer to a CacheEntry from the cache table. Providing a way to
Packit a4aae4
    pass the hash code into this method makes it easier to test for correct
Packit a4aae4
    behavior when two entries collide. 10/07/02 jhrg
Packit a4aae4
Packit a4aae4
    @param url Look for this URL.
Packit a4aae4
    @return The matching CacheEntry instance or NULL if none was found. */
Packit a4aae4
HTTPCacheTable::CacheEntry *
Packit a4aae4
HTTPCacheTable::get_write_locked_entry_from_cache_table(const string &url)
Packit a4aae4
{
Packit a4aae4
	int hash = get_hash(url);
Packit a4aae4
    if (d_cache_table[hash]) {
Packit a4aae4
        CacheEntries *cp = d_cache_table[hash];
Packit a4aae4
        for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
Packit a4aae4
            // Must test *i because perform_garbage_collection may have
Packit a4aae4
            // removed this entry; the CacheEntry will then be null.
Packit a4aae4
            if ((*i) && (*i)->url == url) {
Packit a4aae4
            	(*i)->lock_write_response();	// Lock the response
Packit a4aae4
            	return *i;
Packit a4aae4
            }
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    return 0;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Remove a CacheEntry. This means delete the entry's files on disk and free
Packit a4aae4
    the CacheEntry object. The caller should null the entry's pointer in the
Packit a4aae4
    cache_table. The total size of the cache is decremented once the entry is
Packit a4aae4
    deleted.
Packit a4aae4
Packit a4aae4
    @param entry The CacheEntry to delete.
Packit a4aae4
    @exception InternalErr Thrown if \c entry is in use. */
Packit a4aae4
void
Packit a4aae4
HTTPCacheTable::remove_cache_entry(HTTPCacheTable::CacheEntry *entry)
Packit a4aae4
{
Packit a4aae4
    // This should never happen; all calls to this method are protected by
Packit a4aae4
    // the caller, hence the InternalErr.
Packit a4aae4
    if (entry->readers)
Packit a4aae4
        throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
Packit a4aae4
Packit a4aae4
    REMOVE(entry->cachename.c_str());
Packit a4aae4
    REMOVE(string(entry->cachename + CACHE_META).c_str());
Packit a4aae4
Packit a4aae4
    DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
Packit a4aae4
Packit a4aae4
    unsigned int eds = entry_disk_space(entry->size, get_block_size());
Packit a4aae4
    set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
Packit a4aae4
    
Packit a4aae4
    DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Functor which deletes and nulls a CacheEntry if the given entry matches
Packit a4aae4
    the url. */
Packit a4aae4
class DeleteCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void>
Packit a4aae4
{
Packit a4aae4
    string d_url;
Packit a4aae4
    HTTPCacheTable *d_cache_table;
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
    DeleteCacheEntry(HTTPCacheTable *c, const string &url)
Packit a4aae4
            : d_url(url), d_cache_table(c)
Packit a4aae4
    {}
Packit a4aae4
Packit a4aae4
    void operator()(HTTPCacheTable::CacheEntry *&e)
Packit a4aae4
    {
Packit a4aae4
        if (e && e->url == d_url) {
Packit a4aae4
        	e->lock_write_response();
Packit a4aae4
            d_cache_table->remove_cache_entry(e);
Packit a4aae4
        	e->unlock_write_response();
Packit a4aae4
            delete e; e = 0;
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
/** Find the CacheEntry for the given url and remove both its information in
Packit a4aae4
    the persistent store and the entry in d_cache_table. If \c url is not in
Packit a4aae4
    the cache, this method does nothing.
Packit a4aae4
Packit a4aae4
    @param url Remove this URL's entry.
Packit a4aae4
    @exception InternalErr Thrown if the CacheEntry for \c url is locked. */
Packit a4aae4
void
Packit a4aae4
HTTPCacheTable::remove_entry_from_cache_table(const string &url)
Packit a4aae4
{
Packit a4aae4
    int hash = get_hash(url);
Packit a4aae4
    if (d_cache_table[hash]) {
Packit a4aae4
        CacheEntries *cp = d_cache_table[hash];
Packit a4aae4
        for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
Packit a4aae4
        cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry*>(0)),
Packit a4aae4
                  cp->end());
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Functor to delete and null all unlocked HTTPCacheTable::CacheEntry objects. */
Packit a4aae4
Packit a4aae4
class DeleteUnlockedCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void> {
Packit a4aae4
    HTTPCacheTable &d_table;
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
    DeleteUnlockedCacheEntry(HTTPCacheTable &t) :
Packit a4aae4
	d_table(t)
Packit a4aae4
    {
Packit a4aae4
    }
Packit a4aae4
    void operator()(HTTPCacheTable::CacheEntry *&e)
Packit a4aae4
    {
Packit a4aae4
	if (e) {
Packit a4aae4
	    d_table.remove_cache_entry(e);
Packit a4aae4
	    delete e;
Packit a4aae4
	    e = 0;
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
void HTTPCacheTable::delete_all_entries()
Packit a4aae4
{
Packit a4aae4
    // Walk through the cache table and, for every entry in the cache, delete
Packit a4aae4
    // it on disk and in the cache table.
Packit a4aae4
    for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
Packit a4aae4
	HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
Packit a4aae4
	if (slot) {
Packit a4aae4
	    for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
Packit a4aae4
	    slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *> (0)), slot->end());
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    cache_index_delete();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Calculate the corrected_initial_age of the object. We use the time when
Packit a4aae4
    this function is called as the response_time as this is when we have
Packit a4aae4
    received the complete response. This may cause a delay if the response
Packit a4aae4
    header is very big but should not cause any incorrect behavior.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param entry The CacheEntry object.
Packit a4aae4
    @param default_expiration The default value of the cached object's
Packit a4aae4
    expiration time.
Packit a4aae4
    @param request_time When was the request made? I think this value must be
Packit a4aae4
    passed into the method that calls this method... */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
Packit a4aae4
{
Packit a4aae4
    entry->response_time = time(NULL);
Packit a4aae4
    time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
Packit a4aae4
    time_t corrected_received_age = max(apparent_age, entry->age);
Packit a4aae4
    time_t response_delay = entry->response_time - request_time;
Packit a4aae4
    entry->corrected_initial_age = corrected_received_age + response_delay;
Packit a4aae4
Packit a4aae4
    // Estimate an expires time using the max-age and expires time. If we
Packit a4aae4
    // don't have an explicit expires time then set it to 10% of the LM date
Packit a4aae4
    // (although max 24 h). If no LM date is available then use 24 hours.
Packit a4aae4
    time_t freshness_lifetime = entry->max_age;
Packit a4aae4
    if (freshness_lifetime < 0) {
Packit a4aae4
        if (entry->expires < 0) {
Packit a4aae4
            if (entry->lm < 0) {
Packit a4aae4
                freshness_lifetime = default_expiration;
Packit a4aae4
            }
Packit a4aae4
            else {
Packit a4aae4
                freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
Packit a4aae4
            }
Packit a4aae4
        }
Packit a4aae4
        else
Packit a4aae4
            freshness_lifetime = entry->expires - entry->date;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
Packit a4aae4
Packit a4aae4
    DBG2(cerr << "Cache....... Received Age " << entry->age
Packit a4aae4
         << ", corrected " << entry->corrected_initial_age
Packit a4aae4
         << ", freshness lifetime " << entry->freshness_lifetime << endl);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Parse various headers from the vector (which can be retrieved from
Packit a4aae4
    libcurl once a response is received) and load the CacheEntry object with
Packit a4aae4
    values. This method should only be called with headers from a response
Packit a4aae4
    (it should not be used to parse request headers).
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param entry Store values from the headers here.
Packit a4aae4
    @param max_entry_size DO not cache entries larger than this.
Packit a4aae4
    @param headers A vector of header lines. */
Packit a4aae4
Packit a4aae4
void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size,
Packit a4aae4
	const vector<string> &headers)
Packit a4aae4
{
Packit a4aae4
    vector<string>::const_iterator i;
Packit a4aae4
    for (i = headers.begin(); i != headers.end(); ++i) {
Packit a4aae4
	// skip a blank header.
Packit a4aae4
	if ((*i).empty())
Packit a4aae4
	    continue;
Packit a4aae4
Packit a4aae4
	string::size_type colon = (*i).find(':');
Packit a4aae4
Packit a4aae4
	// skip a header with no colon in it.
Packit a4aae4
	if (colon == string::npos)
Packit a4aae4
	    continue;
Packit a4aae4
Packit a4aae4
	string header = (*i).substr(0, (*i).find(':'));
Packit a4aae4
	string value = (*i).substr((*i).find(": ") + 2);
Packit a4aae4
	DBG2(cerr << "Header: " << header << endl);DBG2(cerr << "Value: " << value << endl);
Packit a4aae4
Packit a4aae4
	if (header == "ETag") {
Packit a4aae4
	    entry->etag = value;
Packit a4aae4
	}
Packit a4aae4
	else if (header == "Last-Modified") {
Packit a4aae4
	    entry->lm = parse_time(value.c_str());
Packit a4aae4
	}
Packit a4aae4
	else if (header == "Expires") {
Packit a4aae4
	    entry->expires = parse_time(value.c_str());
Packit a4aae4
	}
Packit a4aae4
	else if (header == "Date") {
Packit a4aae4
	    entry->date = parse_time(value.c_str());
Packit a4aae4
	}
Packit a4aae4
	else if (header == "Age") {
Packit a4aae4
	    entry->age = parse_time(value.c_str());
Packit a4aae4
	}
Packit a4aae4
	else if (header == "Content-Length") {
Packit a4aae4
	    unsigned long clength = strtoul(value.c_str(), 0, 0);
Packit a4aae4
	    if (clength > max_entry_size)
Packit a4aae4
		entry->set_no_cache(true);
Packit a4aae4
	}
Packit a4aae4
	else if (header == "Cache-Control") {
Packit a4aae4
	    // Ignored Cache-Control values: public, private, no-transform,
Packit a4aae4
	    // proxy-revalidate, s-max-age. These are used by shared caches.
Packit a4aae4
	    // See section 14.9 of RFC 2612. 10/02/02 jhrg
Packit a4aae4
	    if (value == "no-cache" || value == "no-store")
Packit a4aae4
		// Note that we *can* store a 'no-store' response in volatile
Packit a4aae4
		// memory according to RFC 2616 (section 14.9.2) but those
Packit a4aae4
		// will be rare coming from DAP servers. 10/02/02 jhrg
Packit a4aae4
		entry->set_no_cache(true);
Packit a4aae4
	    else if (value == "must-revalidate")
Packit a4aae4
		entry->must_revalidate = true;
Packit a4aae4
	    else if (value.find("max-age") != string::npos) {
Packit a4aae4
		string max_age = value.substr(value.find("=" + 1));
Packit a4aae4
		entry->max_age = parse_time(max_age.c_str());
Packit a4aae4
	    }
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
//@} End of the CacheEntry methods.
Packit a4aae4
Packit a4aae4
// @TODO Change name to record locked response
Packit a4aae4
void HTTPCacheTable::bind_entry_to_data(HTTPCacheTable::CacheEntry *entry, FILE *body) {
Packit a4aae4
	entry->hits++;  // Mark hit
Packit a4aae4
    d_locked_entries[body] = entry; // record lock, see release_cached_r...
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
void HTTPCacheTable::uncouple_entry_from_data(FILE *body) {
Packit a4aae4
Packit a4aae4
    HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
Packit a4aae4
    if (!entry)
Packit a4aae4
        throw InternalErr("There is no cache entry for the response given.");
Packit a4aae4
Packit a4aae4
    d_locked_entries.erase(body);
Packit a4aae4
    entry->unlock_read_response();
Packit a4aae4
Packit a4aae4
    if (entry->readers < 0)
Packit a4aae4
        throw InternalErr("An unlocked entry was released");
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
bool HTTPCacheTable::is_locked_read_responses() {
Packit a4aae4
	return !d_locked_entries.empty();
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
} // namespace libdap