|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// -*- mode: c++; c-basic-offset:4 -*-
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
|
|
Packit |
a4aae4 |
// Access Protocol.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Copyright (c) 2002,2008 OPeNDAP, Inc.
|
|
Packit |
a4aae4 |
// Author: James Gallagher <jgallagher@opendap.org>
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is free software; you can redistribute it and/or
|
|
Packit |
a4aae4 |
// modify it under the terms of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License as published by the Free Software Foundation; either
|
|
Packit |
a4aae4 |
// version 2.1 of the License, or (at your option) any later version.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is distributed in the hope that it will be useful,
|
|
Packit |
a4aae4 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
a4aae4 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
a4aae4 |
// Lesser General Public License for more details.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You should have received a copy of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License along with this library; if not, write to the Free Software
|
|
Packit |
a4aae4 |
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifndef _http_cache_h
|
|
Packit |
a4aae4 |
#define _http_cache_h
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <pthread.h>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifdef WIN32
|
|
Packit |
a4aae4 |
#include <io.h> // stat for win32? 09/05/02 jhrg
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <string>
|
|
Packit |
a4aae4 |
#include <vector>
|
|
Packit |
a4aae4 |
#include <map>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include "HTTPCacheTable.h" // included for macros
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include "HTTPCacheDisconnectedMode.h"
|
|
Packit |
a4aae4 |
//using namespace std;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
namespace libdap
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
class HTTPCacheTabe;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This function is exported so the test code can use it too.
|
|
Packit |
a4aae4 |
bool is_hop_by_hop_header(const string &header);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Implements a multi-process MT-safe HTTP 1.1 compliant (mostly) cache.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Clients that run as users lacking a writable HOME directory MUST
|
|
Packit |
a4aae4 |
disable this cache. Use Connect::set_cache_enable(false).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
The original design of this class was taken from the W3C libwww software,
|
|
Packit |
a4aae4 |
written by Henrik Frystyk Nielsen, Copyright MIT
|
|
Packit |
a4aae4 |
1995. See the file MIT_COPYRIGHT. This software is a complete rewrite in
|
|
Packit |
a4aae4 |
C++ with additional features useful to the DODS and OPeNDAP projects.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This cache does not implement range checking. Partial responses should
|
|
Packit |
a4aae4 |
not be cached (HFN's version did, but it doesn't mesh well with the DAP
|
|
Packit |
a4aae4 |
for which this is being written).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
The cache uses the local file system to store responses. If it is being
|
|
Packit |
a4aae4 |
used in a MT application, care should be taken to ensure that the number
|
|
Packit |
a4aae4 |
of available file descriptors is not exceeded.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
In addition, when used in a MT program only one thread should use the
|
|
Packit |
a4aae4 |
mutators to set property values. Even though the methods are robust WRT
|
|
Packit |
a4aae4 |
MT software, having several threads change values of cache's properties
|
|
Packit |
a4aae4 |
will lead to odd behavior on the part of the cache. Many of the public
|
|
Packit |
a4aae4 |
methods lock access to the class' interface. This is noted in the
|
|
Packit |
a4aae4 |
documentation for those methods.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Even though the public interface to the cache is typically locked when
|
|
Packit |
a4aae4 |
accessed, an extra locking mechanism is in place for `entries' which are
|
|
Packit |
a4aae4 |
accessed. If a thread accesses a entry, that response must be locked to
|
|
Packit |
a4aae4 |
prevent it from being updated until the thread tells the cache that it's
|
|
Packit |
a4aae4 |
no longer using it. The method get_cache_response() and
|
|
Packit |
a4aae4 |
get_cache_response_body() both lock an entry; use
|
|
Packit |
a4aae4 |
release_cache_response() to release the lock. Entries are locked using a
|
|
Packit |
a4aae4 |
combination of a counter and a mutex. The following methods block when
|
|
Packit |
a4aae4 |
called on a locked entry: is_url_valid(),
|
|
Packit |
a4aae4 |
get_conditional_request_headers(), update_response(). (The locking scheme
|
|
Packit |
a4aae4 |
could be modified so that a distinction is made between reading from and
|
|
Packit |
a4aae4 |
writing to an entry. In this case is_url_valid() and
|
|
Packit |
a4aae4 |
get_conditional_request_headers() would only lock when an entry is in use
|
|
Packit |
a4aae4 |
for writing. But I haven't done that.)
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@todo Update documentation: get_cache_response() now also serves as
|
|
Packit |
a4aae4 |
is_url_in_cache() and is_url_valid() should only be called after a locked
|
|
Packit |
a4aae4 |
cached response is accessed using get_cahced_response(). These lock the
|
|
Packit |
a4aae4 |
cache for reading. The methods cache_response() and update_response()
|
|
Packit |
a4aae4 |
lock an entry for writing.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@todo Check that the lock-for-write and lock-for-read work together since
|
|
Packit |
a4aae4 |
it's possible that an entry in use might have a stream of readers and never
|
|
Packit |
a4aae4 |
free the 'read-lock' thus blocking a writer.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@author James Gallagher <jgallagher@opendap.org> */
|
|
Packit |
a4aae4 |
class HTTPCache
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
private:
|
|
Packit |
a4aae4 |
string d_cache_root;
|
|
Packit |
a4aae4 |
FILE *d_locked_open_file; // Lock for single process use.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool d_cache_enabled;
|
|
Packit |
a4aae4 |
bool d_cache_protected;
|
|
Packit |
a4aae4 |
CacheDisconnectedMode d_cache_disconnected;
|
|
Packit |
a4aae4 |
bool d_expire_ignored;
|
|
Packit |
a4aae4 |
bool d_always_validate;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
unsigned long d_total_size; // How much can we store?
|
|
Packit |
a4aae4 |
unsigned long d_folder_size; // How much of that is meta data?
|
|
Packit |
a4aae4 |
unsigned long d_gc_buffer; // How much memory needed as buffer?
|
|
Packit |
a4aae4 |
unsigned long d_max_entry_size; // Max individual entry size.
|
|
Packit |
a4aae4 |
int d_default_expiration;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
vector<string> d_cache_control;
|
|
Packit |
a4aae4 |
// these are values read from a request-directive Cache-Control header.
|
|
Packit |
a4aae4 |
// Not to be confused with values read from the response or a cached
|
|
Packit |
a4aae4 |
// response (e.g., CacheEntry has a max_age field, too). These fields are
|
|
Packit |
a4aae4 |
// set when the set_cache_control method is called.
|
|
Packit |
a4aae4 |
time_t d_max_age;
|
|
Packit |
a4aae4 |
time_t d_max_stale; // -1: not set, 0:any response, >0 max time.
|
|
Packit |
a4aae4 |
time_t d_min_fresh;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Lock non-const methods (also ones that use the STL).
|
|
Packit |
a4aae4 |
pthread_mutex_t d_cache_mutex;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCacheTable *d_http_cache_table;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// d_open_files is used by the interrupt handler to clean up
|
|
Packit |
a4aae4 |
vector<string> d_open_files;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
static HTTPCache *_instance;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
friend class HTTPCacheTest; // Unit tests
|
|
Packit |
a4aae4 |
friend class HTTPConnectTest;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
friend class HTTPCacheInterruptHandler;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Private methods
|
|
Packit |
a4aae4 |
HTTPCache(const HTTPCache &);
|
|
Packit |
a4aae4 |
HTTPCache();
|
|
Packit |
a4aae4 |
HTTPCache &operator=(const HTTPCache &);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
HTTPCache(string cache_root, bool force);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
static void delete_instance(); // Run by atexit (hence static)
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void set_cache_root(const string &root = "");
|
|
Packit |
a4aae4 |
void create_cache_root(const string &cache_root);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// These will go away when the cache can be used by multiple processes.
|
|
Packit |
a4aae4 |
bool get_single_user_lock(bool force = false);
|
|
Packit |
a4aae4 |
void release_single_user_lock();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool is_url_in_cache(const string &url;;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// I made these four methods so they could be tested by HTTPCacheTest.
|
|
Packit |
a4aae4 |
// Otherwise they would be static functions. jhrg 10/01/02
|
|
Packit |
a4aae4 |
void write_metadata(const string &cachename, const vector<string> &headers);
|
|
Packit |
a4aae4 |
void read_metadata(const string &cachename, vector<string> &headers);
|
|
Packit |
a4aae4 |
int write_body(const string &cachename, const FILE *src);
|
|
Packit |
a4aae4 |
FILE *open_body(const string &cachename);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool stopGC() const;
|
|
Packit |
a4aae4 |
bool startGC() const;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void perform_garbage_collection();
|
|
Packit |
a4aae4 |
void too_big_gc();
|
|
Packit |
a4aae4 |
void expired_gc();
|
|
Packit |
a4aae4 |
void hits_gc();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
public:
|
|
Packit |
a4aae4 |
static HTTPCache *instance(const string &cache_root, bool force = false);
|
|
Packit |
a4aae4 |
virtual ~HTTPCache();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
string get_cache_root() const;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void set_cache_enabled(bool mode);
|
|
Packit |
a4aae4 |
bool is_cache_enabled() const;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void set_cache_disconnected(CacheDisconnectedMode mode);
|
|
Packit |
a4aae4 |
CacheDisconnectedMode get_cache_disconnected() const;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void set_expire_ignored(bool mode);
|
|
Packit |
a4aae4 |
bool is_expire_ignored() const;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void set_max_size(unsigned long size);
|
|
Packit |
a4aae4 |
unsigned long get_max_size() const;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void set_max_entry_size(unsigned long size);
|
|
Packit |
a4aae4 |
unsigned long get_max_entry_size() const;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void set_default_expiration(int exp_time);
|
|
Packit |
a4aae4 |
int get_default_expiration() const;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void set_always_validate(bool validate);
|
|
Packit |
a4aae4 |
bool get_always_validate() const;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void set_cache_control(const vector<string> &cc);
|
|
Packit |
a4aae4 |
vector<string> get_cache_control();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void lock_cache_interface() {
|
|
Packit |
a4aae4 |
DBG(cerr << "Locking interface... ");
|
|
Packit |
a4aae4 |
LOCK(&d_cache_mutex);
|
|
Packit |
a4aae4 |
DBGN(cerr << "Done" << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
void unlock_cache_interface() {
|
|
Packit |
a4aae4 |
DBG(cerr << "Unlocking interface... " );
|
|
Packit |
a4aae4 |
UNLOCK(&d_cache_mutex);
|
|
Packit |
a4aae4 |
DBGN(cerr << "Done" << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This must lock for writing
|
|
Packit |
a4aae4 |
bool cache_response(const string &url, time_t request_time,
|
|
Packit |
a4aae4 |
const vector<string> &headers, const FILE *body);
|
|
Packit |
a4aae4 |
void update_response(const string &url, time_t request_time,
|
|
Packit |
a4aae4 |
const vector<string> &headers);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This is separate from get_cached_response() because often an invalid
|
|
Packit |
a4aae4 |
// cache entry just needs a header update. That is best left to the HTTP
|
|
Packit |
a4aae4 |
// Connection code.
|
|
Packit |
a4aae4 |
bool is_url_valid(const string &url;;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Lock these for reading
|
|
Packit |
a4aae4 |
vector<string> get_conditional_request_headers(const string &url;;
|
|
Packit |
a4aae4 |
FILE *get_cached_response(const string &url, vector<string> &headers,
|
|
Packit |
a4aae4 |
string &cacheName);
|
|
Packit |
a4aae4 |
FILE *get_cached_response(const string &url, vector<string> &headers);
|
|
Packit |
a4aae4 |
FILE *get_cached_response(const string &url;;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void release_cached_response(FILE *response);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void purge_cache();
|
|
Packit |
a4aae4 |
};
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
} // namespace libdap
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#endif // _http_cache_h
|