Blame HTTPCache.h

Packit a4aae4
Packit a4aae4
// -*- mode: c++; c-basic-offset:4 -*-
Packit a4aae4
Packit a4aae4
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
Packit a4aae4
// Access Protocol.
Packit a4aae4
Packit a4aae4
// Copyright (c) 2002,2008 OPeNDAP, Inc.
Packit a4aae4
// Author: James Gallagher <jgallagher@opendap.org>
Packit a4aae4
//
Packit a4aae4
// This library is free software; you can redistribute it and/or
Packit a4aae4
// modify it under the terms of the GNU Lesser General Public
Packit a4aae4
// License as published by the Free Software Foundation; either
Packit a4aae4
// version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
//
Packit a4aae4
// This library is distributed in the hope that it will be useful,
Packit a4aae4
// but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
// Lesser General Public License for more details.
Packit a4aae4
//
Packit a4aae4
// You should have received a copy of the GNU Lesser General Public
Packit a4aae4
// License along with this library; if not, write to the Free Software
Packit a4aae4
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Packit a4aae4
//
Packit a4aae4
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
Packit a4aae4
Packit a4aae4
#ifndef _http_cache_h
Packit a4aae4
#define _http_cache_h
Packit a4aae4
Packit a4aae4
#include <pthread.h>
Packit a4aae4
Packit a4aae4
#ifdef WIN32
Packit a4aae4
#include <io.h>   // stat for win32? 09/05/02 jhrg
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
#include <string>
Packit a4aae4
#include <vector>
Packit a4aae4
#include <map>
Packit a4aae4
Packit a4aae4
#include "HTTPCacheTable.h" // included for macros
Packit a4aae4
Packit a4aae4
#include "HTTPCacheDisconnectedMode.h"
Packit a4aae4
//using namespace std;
Packit a4aae4
Packit a4aae4
namespace libdap
Packit a4aae4
{
Packit a4aae4
Packit a4aae4
class HTTPCacheTabe;
Packit a4aae4
Packit a4aae4
// This function is exported so the test code can use it too.
Packit a4aae4
bool is_hop_by_hop_header(const string &header);
Packit a4aae4
Packit a4aae4
/** Implements a multi-process MT-safe HTTP 1.1 compliant (mostly) cache.
Packit a4aae4
Packit a4aae4
    Clients that run as users lacking a writable HOME directory MUST
Packit a4aae4
    disable this cache. Use Connect::set_cache_enable(false).
Packit a4aae4
Packit a4aae4
    The original design of this class was taken from the W3C libwww software, 
Packit a4aae4
    written by Henrik Frystyk Nielsen, Copyright MIT
Packit a4aae4
    1995. See the file MIT_COPYRIGHT. This software is a complete rewrite in
Packit a4aae4
    C++ with additional features useful to the DODS and OPeNDAP projects.
Packit a4aae4
Packit a4aae4
    This cache does not implement range checking. Partial responses should
Packit a4aae4
    not be cached (HFN's version did, but it doesn't mesh well with the DAP
Packit a4aae4
    for which this is being written).
Packit a4aae4
Packit a4aae4
    The cache uses the local file system to store responses. If it is being
Packit a4aae4
    used in a MT application, care should be taken to ensure that the number
Packit a4aae4
    of available file descriptors is not exceeded.
Packit a4aae4
Packit a4aae4
    In addition, when used in a MT program only one thread should use the
Packit a4aae4
    mutators to set property values. Even though the methods are robust WRT
Packit a4aae4
    MT software, having several threads change values of cache's properties
Packit a4aae4
    will lead to odd behavior on the part of the cache. Many of the public
Packit a4aae4
    methods lock access to the class' interface. This is noted in the
Packit a4aae4
    documentation for those methods.
Packit a4aae4
Packit a4aae4
    Even though the public interface to the cache is typically locked when
Packit a4aae4
    accessed, an extra locking mechanism is in place for `entries' which are
Packit a4aae4
    accessed. If a thread accesses a entry, that response must be locked to
Packit a4aae4
    prevent it from being updated until the thread tells the cache that it's
Packit a4aae4
    no longer using it. The method get_cache_response() and
Packit a4aae4
    get_cache_response_body() both lock an entry; use
Packit a4aae4
    release_cache_response() to release the lock. Entries are locked using a
Packit a4aae4
    combination of a counter and a mutex. The following methods block when
Packit a4aae4
    called on a locked entry: is_url_valid(),
Packit a4aae4
    get_conditional_request_headers(), update_response(). (The locking scheme
Packit a4aae4
    could be modified so that a distinction is made between reading from and
Packit a4aae4
    writing to an entry. In this case is_url_valid() and
Packit a4aae4
    get_conditional_request_headers() would only lock when an entry is in use
Packit a4aae4
    for writing. But I haven't done that.)
Packit a4aae4
Packit a4aae4
	@todo Update documentation: get_cache_response() now also serves as 
Packit a4aae4
	is_url_in_cache() and is_url_valid() should only be called after a locked
Packit a4aae4
	cached response is accessed using get_cahced_response(). These lock the
Packit a4aae4
	cache for reading. The methods cache_response() and update_response()
Packit a4aae4
	lock an entry for writing.
Packit a4aae4
	
Packit a4aae4
	@todo Check that the lock-for-write and lock-for-read work together since
Packit a4aae4
	it's possible that an entry in use might have a stream of readers and never
Packit a4aae4
	free the 'read-lock' thus blocking a writer.
Packit a4aae4
	
Packit a4aae4
    @author James Gallagher <jgallagher@opendap.org> */
Packit a4aae4
class HTTPCache
Packit a4aae4
{
Packit a4aae4
private:
Packit a4aae4
    string d_cache_root;
Packit a4aae4
    FILE *d_locked_open_file; // Lock for single process use.
Packit a4aae4
Packit a4aae4
    bool d_cache_enabled;
Packit a4aae4
    bool d_cache_protected;
Packit a4aae4
    CacheDisconnectedMode d_cache_disconnected;
Packit a4aae4
    bool d_expire_ignored;
Packit a4aae4
    bool d_always_validate;
Packit a4aae4
Packit a4aae4
    unsigned long d_total_size; // How much can we store?
Packit a4aae4
    unsigned long d_folder_size; // How much of that is meta data?
Packit a4aae4
    unsigned long d_gc_buffer; // How much memory needed as buffer?
Packit a4aae4
    unsigned long d_max_entry_size; // Max individual entry size.
Packit a4aae4
    int d_default_expiration;
Packit a4aae4
Packit a4aae4
    vector<string> d_cache_control;
Packit a4aae4
    // these are values read from a request-directive Cache-Control header.
Packit a4aae4
    // Not to be confused with values read from the response or a cached
Packit a4aae4
    // response (e.g., CacheEntry has a max_age field, too). These fields are
Packit a4aae4
    // set when the set_cache_control method is called.
Packit a4aae4
    time_t d_max_age;
Packit a4aae4
    time_t d_max_stale;  // -1: not set, 0:any response, >0 max time.
Packit a4aae4
    time_t d_min_fresh;
Packit a4aae4
Packit a4aae4
    // Lock non-const methods (also ones that use the STL).
Packit a4aae4
    pthread_mutex_t d_cache_mutex;
Packit a4aae4
    
Packit a4aae4
    HTTPCacheTable *d_http_cache_table;
Packit a4aae4
Packit a4aae4
    // d_open_files is used by the interrupt handler to clean up
Packit a4aae4
    vector<string> d_open_files;
Packit a4aae4
Packit a4aae4
    static HTTPCache *_instance;
Packit a4aae4
Packit a4aae4
    friend class HTTPCacheTest; // Unit tests
Packit a4aae4
    friend class HTTPConnectTest;
Packit a4aae4
Packit a4aae4
    friend class HTTPCacheInterruptHandler;
Packit a4aae4
Packit a4aae4
    // Private methods
Packit a4aae4
    HTTPCache(const HTTPCache &);
Packit a4aae4
    HTTPCache();
Packit a4aae4
    HTTPCache &operator=(const HTTPCache &);
Packit a4aae4
Packit a4aae4
    HTTPCache(string cache_root, bool force);
Packit a4aae4
Packit a4aae4
    static void delete_instance(); // Run by atexit (hence static)
Packit a4aae4
    
Packit a4aae4
    void set_cache_root(const string &root = "");
Packit a4aae4
    void create_cache_root(const string &cache_root);
Packit a4aae4
    
Packit a4aae4
    // These will go away when the cache can be used by multiple processes.
Packit a4aae4
    bool get_single_user_lock(bool force = false);
Packit a4aae4
    void release_single_user_lock();
Packit a4aae4
    
Packit a4aae4
    bool is_url_in_cache(const string &url;;
Packit a4aae4
Packit a4aae4
    // I made these four methods so they could be tested by HTTPCacheTest.
Packit a4aae4
    // Otherwise they would be static functions. jhrg 10/01/02
Packit a4aae4
    void write_metadata(const string &cachename, const vector<string> &headers);
Packit a4aae4
    void read_metadata(const string &cachename, vector<string> &headers);
Packit a4aae4
    int write_body(const string &cachename, const FILE *src);
Packit a4aae4
    FILE *open_body(const string &cachename);
Packit a4aae4
Packit a4aae4
    bool stopGC() const;
Packit a4aae4
    bool startGC() const;
Packit a4aae4
Packit a4aae4
    void perform_garbage_collection();
Packit a4aae4
    void too_big_gc();
Packit a4aae4
    void expired_gc();
Packit a4aae4
    void hits_gc();
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
    static HTTPCache *instance(const string &cache_root, bool force = false);
Packit a4aae4
    virtual ~HTTPCache();
Packit a4aae4
Packit a4aae4
    string get_cache_root() const;
Packit a4aae4
Packit a4aae4
    void set_cache_enabled(bool mode);
Packit a4aae4
    bool is_cache_enabled() const;
Packit a4aae4
Packit a4aae4
    void set_cache_disconnected(CacheDisconnectedMode mode);
Packit a4aae4
    CacheDisconnectedMode get_cache_disconnected() const;
Packit a4aae4
Packit a4aae4
    void set_expire_ignored(bool mode);
Packit a4aae4
    bool is_expire_ignored() const;
Packit a4aae4
Packit a4aae4
    void set_max_size(unsigned long size);
Packit a4aae4
    unsigned long get_max_size() const;
Packit a4aae4
Packit a4aae4
    void set_max_entry_size(unsigned long size);
Packit a4aae4
    unsigned long get_max_entry_size() const;
Packit a4aae4
Packit a4aae4
    void set_default_expiration(int exp_time);
Packit a4aae4
    int get_default_expiration() const;
Packit a4aae4
Packit a4aae4
    void set_always_validate(bool validate);
Packit a4aae4
    bool get_always_validate() const;
Packit a4aae4
Packit a4aae4
    void set_cache_control(const vector<string> &cc);
Packit a4aae4
    vector<string> get_cache_control();
Packit a4aae4
Packit a4aae4
    void lock_cache_interface() {
Packit a4aae4
    	DBG(cerr << "Locking interface... ");
Packit a4aae4
    	LOCK(&d_cache_mutex);
Packit a4aae4
    	DBGN(cerr << "Done" << endl);
Packit a4aae4
    }    	
Packit a4aae4
    void unlock_cache_interface() {
Packit a4aae4
    	DBG(cerr << "Unlocking interface... " );
Packit a4aae4
    	UNLOCK(&d_cache_mutex);
Packit a4aae4
    	DBGN(cerr << "Done" << endl);
Packit a4aae4
    }
Packit a4aae4
    
Packit a4aae4
    // This must lock for writing
Packit a4aae4
    bool cache_response(const string &url, time_t request_time,
Packit a4aae4
                        const vector<string> &headers, const FILE *body);
Packit a4aae4
    void update_response(const string &url, time_t request_time,
Packit a4aae4
                         const vector<string> &headers);
Packit a4aae4
Packit a4aae4
    // This is separate from get_cached_response() because often an invalid
Packit a4aae4
    // cache entry just needs a header update. That is best left to the HTTP
Packit a4aae4
    // Connection code.
Packit a4aae4
    bool is_url_valid(const string &url;;
Packit a4aae4
    
Packit a4aae4
    // Lock these for reading
Packit a4aae4
    vector<string> get_conditional_request_headers(const string &url;;
Packit a4aae4
    FILE *get_cached_response(const string &url, vector<string> &headers,
Packit a4aae4
			      			  string &cacheName);
Packit a4aae4
    FILE *get_cached_response(const string &url, vector<string> &headers);
Packit a4aae4
    FILE *get_cached_response(const string &url;;
Packit a4aae4
Packit a4aae4
    void release_cached_response(FILE *response);
Packit a4aae4
Packit a4aae4
    void purge_cache();
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
} // namespace libdap
Packit a4aae4
Packit a4aae4
#endif // _http_cache_h