Blame HTTPConnect.cc

Packit a4aae4
Packit a4aae4
// -*- mode: c++; c-basic-offset:4 -*-
Packit a4aae4
Packit a4aae4
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
Packit a4aae4
// Access Protocol.
Packit a4aae4
Packit a4aae4
// Copyright (c) 2002,2003 OPeNDAP, Inc.
Packit a4aae4
// Author: James Gallagher <jgallagher@opendap.org>
Packit a4aae4
//
Packit a4aae4
// This library is free software; you can redistribute it and/or
Packit a4aae4
// modify it under the terms of the GNU Lesser General Public
Packit a4aae4
// License as published by the Free Software Foundation; either
Packit a4aae4
// version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
//
Packit a4aae4
// This library is distributed in the hope that it will be useful,
Packit a4aae4
// but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
// Lesser General Public License for more details.
Packit a4aae4
//
Packit a4aae4
// You should have received a copy of the GNU Lesser General Public
Packit a4aae4
// License along with this library; if not, write to the Free Software
Packit a4aae4
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Packit a4aae4
//
Packit a4aae4
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
Packit a4aae4
Packit a4aae4
Packit a4aae4
#include "config.h"
Packit a4aae4
Packit a4aae4
#ifdef HAVE_UNISTD_H
Packit a4aae4
#include <unistd.h>
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
#include <sys/stat.h>
Packit a4aae4
Packit a4aae4
#ifdef WIN32
Packit a4aae4
#include <io.h>
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
#include <string>
Packit a4aae4
#include <vector>
Packit a4aae4
#include <functional>
Packit a4aae4
#include <algorithm>
Packit a4aae4
#include <sstream>
Packit a4aae4
#include <fstream>
Packit a4aae4
#include <iterator>
Packit a4aae4
#include <cstdlib>
Packit a4aae4
#include <cstring>
Packit a4aae4
#include <cerrno>
Packit a4aae4
Packit a4aae4
//#define DODS_DEBUG2
Packit a4aae4
//#define HTTP_TRACE
Packit a4aae4
//#define DODS_DEBUG
Packit a4aae4
Packit a4aae4
#undef USE_GETENV
Packit a4aae4
Packit a4aae4
Packit a4aae4
#include "debug.h"
Packit a4aae4
#include "mime_util.h"
Packit a4aae4
#include "media_types.h"
Packit a4aae4
#include "GNURegex.h"
Packit a4aae4
#include "HTTPCache.h"
Packit a4aae4
#include "HTTPConnect.h"
Packit a4aae4
#include "RCReader.h"
Packit a4aae4
#include "HTTPResponse.h"
Packit a4aae4
#include "HTTPCacheResponse.h"
Packit a4aae4
Packit a4aae4
using namespace std;
Packit a4aae4
Packit a4aae4
namespace libdap {
Packit a4aae4
Packit a4aae4
// These global variables are not MT-Safe, but I'm leaving them as is because
Packit a4aae4
// they are used only for debugging (set them in a debugger like gdb or ddd).
Packit a4aae4
// They are not static because I think that many debuggers cannot access
Packit a4aae4
// static variables. 08/07/02 jhrg
Packit a4aae4
Packit a4aae4
// Set this to 1 to turn on libcurl's verbose mode (for debugging).
Packit a4aae4
int www_trace = 0;
Packit a4aae4
Packit a4aae4
// Keep the temporary files; useful for debugging.
Packit a4aae4
int dods_keep_temps = 0;
Packit a4aae4
Packit a4aae4
#define CLIENT_ERR_MIN 400
Packit a4aae4
#define CLIENT_ERR_MAX 417
Packit a4aae4
static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
Packit a4aae4
    {
Packit a4aae4
        "Bad Request:",
Packit a4aae4
        "Unauthorized: Contact the server administrator.",
Packit a4aae4
        "Payment Required.",
Packit a4aae4
        "Forbidden: Contact the server administrator.",
Packit a4aae4
        "Not Found: The data source or server could not be found.\n\
Packit a4aae4
        Often this means that the OPeNDAP server is missing or needs attention;\n\
Packit a4aae4
        Please contact the server administrator.",
Packit a4aae4
        "Method Not Allowed.",
Packit a4aae4
        "Not Acceptable.",
Packit a4aae4
        "Proxy Authentication Required.",
Packit a4aae4
        "Request Time-out.",
Packit a4aae4
        "Conflict.",
Packit a4aae4
        "Gone:.",
Packit a4aae4
        "Length Required.",
Packit a4aae4
        "Precondition Failed.",
Packit a4aae4
        "Request Entity Too Large.",
Packit a4aae4
        "Request URI Too Large.",
Packit a4aae4
        "Unsupported Media Type.",
Packit a4aae4
        "Requested Range Not Satisfiable.",
Packit a4aae4
        "Expectation Failed."
Packit a4aae4
    };
Packit a4aae4
Packit a4aae4
#define SERVER_ERR_MIN 500
Packit a4aae4
#define SERVER_ERR_MAX 505
Packit a4aae4
static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
Packit a4aae4
    {
Packit a4aae4
        "Internal Server Error.",
Packit a4aae4
        "Not Implemented.",
Packit a4aae4
        "Bad Gateway.",
Packit a4aae4
        "Service Unavailable.",
Packit a4aae4
        "Gateway Time-out.",
Packit a4aae4
        "HTTP Version Not Supported."
Packit a4aae4
    };
Packit a4aae4
Packit a4aae4
/** This function translates the HTTP status codes into error messages. It
Packit a4aae4
    works for those code greater than or equal to 400. */
Packit a4aae4
static string
Packit a4aae4
http_status_to_string(int status)
Packit a4aae4
{
Packit a4aae4
    if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
Packit a4aae4
        return string(http_client_errors[status - CLIENT_ERR_MIN]);
Packit a4aae4
    else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
Packit a4aae4
        return string(http_server_errors[status - SERVER_ERR_MIN]);
Packit a4aae4
    else
Packit a4aae4
        return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static ObjectType
Packit a4aae4
determine_object_type(const string &header_value)
Packit a4aae4
{
Packit a4aae4
    // DAP4 Data: application/vnd.opendap.dap4.data
Packit a4aae4
    // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
Packit a4aae4
Packit a4aae4
    string::size_type plus = header_value.find('+');
Packit a4aae4
    string base_type;
Packit a4aae4
    string type_extension = "";
Packit a4aae4
    if (plus != string::npos) {
Packit a4aae4
        base_type= header_value.substr(0, plus);
Packit a4aae4
        type_extension = header_value.substr(plus+1);
Packit a4aae4
    }
Packit a4aae4
    else
Packit a4aae4
        base_type = header_value;
Packit a4aae4
Packit a4aae4
    if (base_type == DMR_Content_Type
Packit a4aae4
    	|| (base_type.find("application/") != string::npos
Packit a4aae4
    		&& base_type.find("dap4.dataset-metadata") != string::npos)) {
Packit a4aae4
        if (type_extension == "xml")
Packit a4aae4
            return dap4_dmr;
Packit a4aae4
        else
Packit a4aae4
            return unknown_type;
Packit a4aae4
    }
Packit a4aae4
    else if (base_type == DAP4_DATA_Content_Type
Packit a4aae4
    		|| (base_type.find("application/") != string::npos
Packit a4aae4
    			&& base_type.find("dap4.data") != string::npos)) {
Packit a4aae4
        return dap4_data;
Packit a4aae4
    }
Packit a4aae4
    else if (header_value.find("text/html") != string::npos) {
Packit a4aae4
        return web_error;
Packit a4aae4
    }
Packit a4aae4
    else
Packit a4aae4
        return unknown_type;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Functor to parse the headers in the d_headers field. After the headers
Packit a4aae4
    have been read off the wire and written into the d_headers field, scan
Packit a4aae4
    them and set special fields for certain headers special to the DAP. */
Packit a4aae4
Packit a4aae4
class ParseHeader : public unary_function<const string &, void>
Packit a4aae4
{
Packit a4aae4
    ObjectType type;  // What type of object is in the stream?
Packit a4aae4
    string server;  // Server's version string.
Packit a4aae4
    string protocol;            // Server's protocol version.
Packit a4aae4
    string location;            // Url returned by server
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
    ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
Packit a4aae4
    { }
Packit a4aae4
Packit a4aae4
    void operator()(const string &line)
Packit a4aae4
    {
Packit a4aae4
        string name, value;
Packit a4aae4
        parse_mime_header(line, name, value);
Packit a4aae4
Packit a4aae4
        DBG2(cerr << name << ": " << value << endl);
Packit a4aae4
Packit a4aae4
        // Content-Type is used to determine the content of DAP4 responses, but allow the
Packit a4aae4
        // Content-Description header to override CT o preserve operation with DAP2 servers.
Packit a4aae4
        // jhrg 11/12/13
Packit a4aae4
        if (type == unknown_type && name == "content-type") {
Packit a4aae4
            type = determine_object_type(value); // see above
Packit a4aae4
        }
Packit a4aae4
        if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
Packit a4aae4
            type = get_description_type(value); // defined in mime_util.cc
Packit a4aae4
        }
Packit a4aae4
        // The second test (== "dods/0.0") tests if xopendap-server has already
Packit a4aae4
        // been seen. If so, use that header in preference to the old
Packit a4aae4
        // XDODS-Server header. jhrg 2/7/06
Packit a4aae4
        else if (name == "xdods-server" && server == "dods/0.0") {
Packit a4aae4
            server = value;
Packit a4aae4
        }
Packit a4aae4
        else if (name == "xopendap-server") {
Packit a4aae4
            server = value;
Packit a4aae4
        }
Packit a4aae4
        else if (name == "xdap") {
Packit a4aae4
            protocol = value;
Packit a4aae4
        }
Packit a4aae4
        else if (server == "dods/0.0" && name == "server") {
Packit a4aae4
            server = value;
Packit a4aae4
        }
Packit a4aae4
       	else if (name == "location") {
Packit a4aae4
       	    location = value;
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    ObjectType get_object_type()
Packit a4aae4
    {
Packit a4aae4
        return type;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    string get_server()
Packit a4aae4
    {
Packit a4aae4
        return server;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    string get_protocol()
Packit a4aae4
    {
Packit a4aae4
        return protocol;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    string get_location() {
Packit a4aae4
	   return location;
Packit a4aae4
    }
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
/** A libcurl callback function used to read response headers. Read headers,
Packit a4aae4
    line by line, from ptr. The fourth param is really supposed to be a FILE
Packit a4aae4
    *, but libcurl just holds the pointer and passes it to this function
Packit a4aae4
    without using it itself. I use that to pass in a pointer to a vector
Packit a4aae4
    of strings so that there's some place to
Packit a4aae4
    dump the headers. Later on the code (see fetch_url()) parses the headers
Packit a4aae4
    special to the DAP.
Packit a4aae4
Packit a4aae4
    @param ptr A pointer to one line of character data; one header.
Packit a4aae4
    @param size Size of each character (nominally one byte).
Packit a4aae4
    @param nmemb Number of bytes.
Packit a4aae4
    @param resp_hdrs A pointer to a vector<string>. Set in read_url.
Packit a4aae4
    @return The number of bytes processed. Must be equal to size * nmemb or
Packit a4aae4
    libcurl will report an error. */
Packit a4aae4
Packit a4aae4
static size_t
Packit a4aae4
save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
Packit a4aae4
{
Packit a4aae4
    DBG2(cerr << "Inside the header parser." << endl);
Packit a4aae4
    vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
Packit a4aae4
Packit a4aae4
    // Grab the header, minus the trailing newline. Or \r\n pair.
Packit a4aae4
    string complete_line;
Packit a4aae4
    if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
Packit a4aae4
        complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
Packit a4aae4
    else
Packit a4aae4
        complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
Packit a4aae4
Packit a4aae4
    // Store all non-empty headers that are not HTTP status codes
Packit a4aae4
    if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
Packit a4aae4
        DBG(cerr << "Header line: " << complete_line << endl);
Packit a4aae4
        hdrs->push_back(complete_line);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    return size * nmemb;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** A libcurl callback for debugging protocol issues. */
Packit a4aae4
static int
Packit a4aae4
curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void  *)
Packit a4aae4
{
Packit a4aae4
    string message(msg, size);
Packit a4aae4
Packit a4aae4
    switch (info) {
Packit a4aae4
    case CURLINFO_TEXT:
Packit a4aae4
        cerr << "Text: " << message; break;
Packit a4aae4
    case CURLINFO_HEADER_IN:
Packit a4aae4
        cerr << "Header in: " << message; break;
Packit a4aae4
    case CURLINFO_HEADER_OUT:
Packit a4aae4
        cerr << "Header out: " << message; break;
Packit a4aae4
    case CURLINFO_DATA_IN:
Packit a4aae4
        cerr << "Data in: " << message; break;
Packit a4aae4
    case CURLINFO_DATA_OUT:
Packit a4aae4
        cerr << "Data out: " << message; break;
Packit a4aae4
    case CURLINFO_END:
Packit a4aae4
        cerr << "End: " << message; break;
Packit a4aae4
#ifdef CURLINFO_SSL_DATA_IN
Packit a4aae4
    case CURLINFO_SSL_DATA_IN:
Packit a4aae4
        cerr << "SSL Data in: " << message; break;
Packit a4aae4
#endif
Packit a4aae4
#ifdef CURLINFO_SSL_DATA_OUT
Packit a4aae4
    case CURLINFO_SSL_DATA_OUT:
Packit a4aae4
        cerr << "SSL Data out: " << message; break;
Packit a4aae4
#endif
Packit a4aae4
    default:
Packit a4aae4
        cerr << "Curl info: " << message; break;
Packit a4aae4
    }
Packit a4aae4
    return 0;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Initialize libcurl. Create a libcurl handle that can be used for all of
Packit a4aae4
    the HTTP requests made through this instance. */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPConnect::www_lib_init()
Packit a4aae4
{
Packit a4aae4
    d_curl = curl_easy_init();
Packit a4aae4
    if (!d_curl)
Packit a4aae4
        throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
Packit a4aae4
Packit a4aae4
    // Now set options that will remain constant for the duration of this
Packit a4aae4
    // CURL object.
Packit a4aae4
Packit a4aae4
    // Set the proxy host.
Packit a4aae4
    if (!d_rcr->get_proxy_server_host().empty()) {
Packit a4aae4
        DBG(cerr << "Setting up a proxy server." << endl);
Packit a4aae4
        DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
Packit a4aae4
            << endl);
Packit a4aae4
        DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
Packit a4aae4
            << endl);
Packit a4aae4
        DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
Packit a4aae4
            << endl);
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_PROXY,
Packit a4aae4
                         d_rcr->get_proxy_server_host().c_str());
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
Packit a4aae4
                         d_rcr->get_proxy_server_port());
Packit a4aae4
Packit a4aae4
	// As of 4/21/08 only NTLM, Digest and Basic work.
Packit a4aae4
#ifdef CURLOPT_PROXYAUTH
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
        // Password might not be required. 06/21/04 jhrg
Packit a4aae4
        if (!d_rcr->get_proxy_server_userpw().empty())
Packit a4aae4
            curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
Packit a4aae4
                             d_rcr->get_proxy_server_userpw().c_str());
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
Packit a4aae4
    // We have to set FailOnError to false for any of the non-Basic
Packit a4aae4
    // authentication schemes to work. 07/28/03 jhrg
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
Packit a4aae4
Packit a4aae4
    // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
Packit a4aae4
    // choosing the the 'safest' one supported by the server.
Packit a4aae4
    // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
Packit a4aae4
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
Packit a4aae4
    // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
Packit a4aae4
    // param of save_raw_http_headers to a vector<string> object.
Packit a4aae4
Packit a4aae4
    // Follow 302 (redirect) responses
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
Packit a4aae4
Packit a4aae4
    // If the user turns off SSL validation...
Packit a4aae4
    if (d_rcr->get_validate_ssl() == 0) {
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    // Look to see if cookies are turned on in the .dodsrc file. If so,
Packit a4aae4
    // activate here. We honor 'session cookies' (cookies without an
Packit a4aae4
    // expiration date) here so that session-based SSO systems will work as
Packit a4aae4
    // expected.
Packit a4aae4
    if (!d_cookie_jar.empty()) {
Packit a4aae4
	DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    if (www_trace) {
Packit a4aae4
        cerr << "Curl version: " << curl_version() << endl;
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Functor to add a single string to a curl_slist. This is used to transfer
Packit a4aae4
    a list of headers from a vector<string> object to a curl_slist. */
Packit a4aae4
Packit a4aae4
class BuildHeaders : public unary_function<const string &, void>
Packit a4aae4
{
Packit a4aae4
    struct curl_slist *d_cl;
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
    BuildHeaders() : d_cl(0)
Packit a4aae4
    {}
Packit a4aae4
Packit a4aae4
    void operator()(const string &header)
Packit a4aae4
    {
Packit a4aae4
        DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
Packit a4aae4
            << endl);
Packit a4aae4
        d_cl = curl_slist_append(d_cl, header.c_str());
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    struct curl_slist *get_headers()
Packit a4aae4
    {
Packit a4aae4
        return d_cl;
Packit a4aae4
    }
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
/** Use libcurl to dereference a URL. Read the information referenced by \c
Packit a4aae4
    url into the file pointed to by \c stream.
Packit a4aae4
Packit a4aae4
    @param url The URL to dereference.
Packit a4aae4
    @param stream The destination for the data; the caller can assume that
Packit a4aae4
    the body of the response can be found by reading from this pointer. A
Packit a4aae4
    value/result parameter
Packit a4aae4
    @param resp_hdrs Value/result parameter for the HTTP Response Headers.
Packit a4aae4
    @param headers A pointer to a vector of HTTP request headers. Default is
Packit a4aae4
    null. These headers will be appended to the list of default headers.
Packit a4aae4
    @return The HTTP status code.
Packit a4aae4
    @exception Error Thrown if libcurl encounters a problem; the libcurl
Packit a4aae4
    error message is stuffed into the Error object. */
Packit a4aae4
Packit a4aae4
long
Packit a4aae4
HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
Packit a4aae4
{
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
Packit a4aae4
Packit a4aae4
#ifdef WIN32
Packit a4aae4
    //  See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
Packit a4aae4
    //  and the CURLOPT_WRITEFUNCTION option.  Quote: "If you are using libcurl as
Packit a4aae4
    //  a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
Packit a4aae4
    //  CURLOPT_WRITEDATA option or you will experience crashes".  At the root of
Packit a4aae4
    //  this issue is that one should not pass a FILE * to a windows DLL.  Close
Packit a4aae4
    //  inspection of libcurl yields that their default write function when using
Packit a4aae4
    //  the CURLOPT_WRITEDATA is just "fwrite".
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
Packit a4aae4
#else
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
    DBG(copy(d_request_headers.begin(), d_request_headers.end(),
Packit a4aae4
             ostream_iterator<string>(cerr, "\n")));
Packit a4aae4
Packit a4aae4
    BuildHeaders req_hdrs;
Packit a4aae4
    req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
Packit a4aae4
                        req_hdrs);
Packit a4aae4
    if (headers)
Packit a4aae4
        req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
Packit a4aae4
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
Packit a4aae4
Packit a4aae4
    // Turn off the proxy for this URL?
Packit a4aae4
    bool temporary_proxy = false;
Packit a4aae4
    if ((temporary_proxy = url_uses_no_proxy_for(url))) {
Packit a4aae4
        DBG(cerr << "Suppress proxy for url: " << url << endl);
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    string::size_type at_sign = url.find('@');
Packit a4aae4
    // Assume username:password present *and* assume it's an HTTP URL; it *is*
Packit a4aae4
    // HTTPConnect, after all. 7 is position after "http://"; the second arg
Packit a4aae4
    // to substr() is the sub string length.
Packit a4aae4
    if (at_sign != url.npos)
Packit a4aae4
        d_upstring = url.substr(7, at_sign - 7);
Packit a4aae4
Packit a4aae4
    if (!d_upstring.empty())
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
Packit a4aae4
Packit a4aae4
    // Pass save_raw_http_headers() a pointer to the vector<string> where the
Packit a4aae4
    // response headers may be stored. Callers can use the resp_hdrs
Packit a4aae4
    // value/result parameter to get the raw response header information .
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
Packit a4aae4
Packit a4aae4
    // This is the call that causes curl to go and get the remote resource and "write it down"
Packit a4aae4
    // utilizing the configuration state that has been previously conditioned by various perturbations
Packit a4aae4
    // of calls to curl_easy_setopt().
Packit a4aae4
    CURLcode res = curl_easy_perform(d_curl);
Packit a4aae4
Packit a4aae4
    // Free the header list and null the value in d_curl.
Packit a4aae4
    curl_slist_free_all(req_hdrs.get_headers());
Packit a4aae4
    curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
Packit a4aae4
Packit a4aae4
    // Reset the proxy?
Packit a4aae4
    if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
Packit a4aae4
        curl_easy_setopt(d_curl, CURLOPT_PROXY,
Packit a4aae4
                         d_rcr->get_proxy_server_host().c_str());
Packit a4aae4
Packit a4aae4
    if (res != 0)
Packit a4aae4
        throw Error(d_error_buffer);
Packit a4aae4
Packit a4aae4
    long status;
Packit a4aae4
    res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
Packit a4aae4
    if (res != 0)
Packit a4aae4
        throw Error(d_error_buffer);
Packit a4aae4
Packit a4aae4
    char *ct_ptr = 0;
Packit a4aae4
    res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
Packit a4aae4
    if (res == CURLE_OK && ct_ptr)
Packit a4aae4
        d_content_type = ct_ptr;
Packit a4aae4
    else
Packit a4aae4
        d_content_type = "";
Packit a4aae4
Packit a4aae4
    return status;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** If the .dodsrc file gives a value for PROXY_FOR, return true if the
Packit a4aae4
    current URL matches the regular expression. */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPConnect::url_uses_proxy_for(const string &url)
Packit a4aae4
{
Packit a4aae4
    if (d_rcr->is_proxy_for_used()) {
Packit a4aae4
        Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
Packit a4aae4
        int index = 0, matchlen;
Packit a4aae4
        return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    return false;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** If the NO_PROXY option is used in the dodsrc file, does this URL match
Packit a4aae4
    the no proxy URL regex? */
Packit a4aae4
Packit a4aae4
bool
Packit a4aae4
HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
Packit a4aae4
{
Packit a4aae4
    return d_rcr->is_no_proxy_for_used()
Packit a4aae4
           && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
// Public methods. Mostly...
Packit a4aae4
Packit a4aae4
/** Build a virtual connection to a remote data source that will be
Packit a4aae4
    accessed using HTTP.
Packit a4aae4
Packit a4aae4
    @param rcr A pointer to the RCReader object which holds configuration
Packit a4aae4
    file information to be used by this virtual connection. */
Packit a4aae4
Packit a4aae4
HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
Packit a4aae4
		d_dap_client_protocol_major(2),	d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
Packit a4aae4
Packit a4aae4
{
Packit a4aae4
    d_accept_deflate = rcr->get_deflate();
Packit a4aae4
    d_rcr = rcr;
Packit a4aae4
Packit a4aae4
    // Load in the default headers to send with a request. The empty Pragma
Packit a4aae4
    // headers overrides libcurl's default Pragma: no-cache header (which
Packit a4aae4
    // will disable caching by Squid, et c.). The User-Agent header helps
Packit a4aae4
    // make server logs more readable. 05/05/03 jhrg
Packit a4aae4
    d_request_headers.push_back(string("Pragma:"));
Packit a4aae4
    string user_agent = string("User-Agent: ") + string(CNAME)
Packit a4aae4
                        + string("/") + string(CVER);
Packit a4aae4
    d_request_headers.push_back(user_agent);
Packit a4aae4
    if (d_accept_deflate)
Packit a4aae4
        d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
Packit a4aae4
Packit a4aae4
    // HTTPCache::instance returns a valid ptr or 0.
Packit a4aae4
    if (d_rcr->get_use_cache())
Packit a4aae4
        d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
Packit a4aae4
    else
Packit a4aae4
        d_http_cache = 0;
Packit a4aae4
Packit a4aae4
    DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
Packit a4aae4
         << ")" << endl);
Packit a4aae4
Packit a4aae4
    if (d_http_cache) {
Packit a4aae4
        d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
Packit a4aae4
        d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
Packit a4aae4
        d_http_cache->set_max_size(d_rcr->get_max_cache_size());
Packit a4aae4
        d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
Packit a4aae4
        d_http_cache->set_default_expiration(d_rcr->get_default_expires());
Packit a4aae4
        d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    d_cookie_jar = rcr->get_cookie_jar();
Packit a4aae4
Packit a4aae4
    www_lib_init();  // This may throw either Error or InternalErr
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
HTTPConnect::~HTTPConnect()
Packit a4aae4
{
Packit a4aae4
    DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
Packit a4aae4
Packit a4aae4
    curl_easy_cleanup(d_curl);
Packit a4aae4
Packit a4aae4
    DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Look for a certain header */
Packit a4aae4
class HeaderMatch : public unary_function<const string &, bool> {
Packit a4aae4
    const string &d_header;
Packit a4aae4
    public:
Packit a4aae4
        HeaderMatch(const string &header) : d_header(header) {}
Packit a4aae4
        bool operator()(const string &arg) { return arg.find(d_header) == 0; }
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
/** Dereference a URL. This method dereferences a URL and stores the result
Packit a4aae4
    (i.e., it formulates an HTTP request and processes the HTTP server's
Packit a4aae4
    response). After this method is successfully called, the value of
Packit a4aae4
    is_response_present() will be true and the methods
Packit a4aae4
    get_response_headers(), type() and server_version() may be called.
Packit a4aae4
Packit a4aae4
    @param url The URL to dereference.
Packit a4aae4
    @return A pointer to the stream.
Packit a4aae4
    @exception Error Thrown if the URL could not be dereferenced.
Packit a4aae4
    @exception InternalErr Thrown if a temporary file to hold the response
Packit a4aae4
    could not be opened. */
Packit a4aae4
Packit a4aae4
HTTPResponse *
Packit a4aae4
HTTPConnect::fetch_url(const string &url)
Packit a4aae4
{
Packit a4aae4
#ifdef HTTP_TRACE
Packit a4aae4
    cout << "GET " << url << " HTTP/1.0" << endl;
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
    HTTPResponse *stream;
Packit a4aae4
Packit a4aae4
    if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
Packit a4aae4
        stream = caching_fetch_url(url);
Packit a4aae4
    }
Packit a4aae4
    else {
Packit a4aae4
        stream = plain_fetch_url(url);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
#ifdef HTTP_TRACE
Packit a4aae4
	stringstream ss;
Packit a4aae4
	ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
Packit a4aae4
	for (size_t i = 0; i < stream->get_headers()->size(); i++) {
Packit a4aae4
		ss << stream->get_headers()->at(i) << endl;
Packit a4aae4
	}
Packit a4aae4
	cout << ss.str();
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
    ParseHeader parser;
Packit a4aae4
Packit a4aae4
    // An apparent quirk of libcurl is that it does not pass the Content-type
Packit a4aae4
    // header to the callback used to save them, but check and add it from the
Packit a4aae4
    // saved state variable only if it's not there (without this a test failed
Packit a4aae4
    // in HTTPCacheTest). jhrg 11/12/13
Packit a4aae4
    if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
Packit a4aae4
    									   HeaderMatch("Content-Type:")) == stream->get_headers()->end())
Packit a4aae4
        stream->get_headers()->push_back("Content-Type: " + d_content_type);
Packit a4aae4
Packit a4aae4
    parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
Packit a4aae4
Packit a4aae4
#ifdef HTTP_TRACE
Packit a4aae4
    cout << endl << endl;
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
    // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
Packit a4aae4
    if (parser.get_location() != "" &&
Packit a4aae4
	    url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
Packit a4aae4
    	delete stream;
Packit a4aae4
        return fetch_url(parser.get_location());
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    stream->set_type(parser.get_object_type()); // uses the value of content-description
Packit a4aae4
Packit a4aae4
    stream->set_version(parser.get_server());
Packit a4aae4
    stream->set_protocol(parser.get_protocol());
Packit a4aae4
Packit a4aae4
    if (d_use_cpp_streams) {
Packit a4aae4
    	stream->transform_to_cpp();
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    return stream;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
// Look around for a reasonable place to put a temporary file. Check first
Packit a4aae4
// the value of the TMPDIR env var. If that does not yeild a path that's
Packit a4aae4
// writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
Packit a4aae4
// defined in stdio.h. If both come up empty, then use `./'.
Packit a4aae4
Packit a4aae4
// Change this to a version that either returns a string or an open file
Packit a4aae4
// descriptor. Use information from https://buildsecurityin.us-cert.gov/
Packit a4aae4
// (see open()) to make it more secure. Ideal solution: get deserialize()
Packit a4aae4
// methods to read from a stream returned by libcurl, not from a temporary
Packit a4aae4
// file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
Packit a4aae4
static string
Packit a4aae4
get_tempfile_template(const string &file_template)
Packit a4aae4
{
Packit a4aae4
    string c;
Packit a4aae4
Packit a4aae4
    // Windows has one idea of the standard name(s) for a temporary files dir
Packit a4aae4
#ifdef WIN32
Packit a4aae4
    // white list for a WIN32 directory
Packit a4aae4
    Regex directory("[-a-zA-Z0-9_:\\]*");
Packit a4aae4
Packit a4aae4
    // If we're OK to use getenv(), try it.
Packit a4aae4
#ifdef USE_GETENV
Packit a4aae4
    c = getenv("TEMP");
Packit a4aae4
    if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
Packit a4aae4
	goto valid_temp_directory;
Packit a4aae4
Packit a4aae4
    c= getenv("TMP");
Packit a4aae4
    if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
Packit a4aae4
	goto valid_temp_directory;
Packit a4aae4
#endif // USE_GETENV
Packit a4aae4
Packit a4aae4
    // The windows default
Packit a4aae4
    c = "c:\tmp";
Packit a4aae4
    if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
Packit a4aae4
	goto valid_temp_directory;
Packit a4aae4
Packit a4aae4
#else	// Unix/Linux/OSX has another...
Packit a4aae4
    // white list for a directory
Packit a4aae4
    Regex directory("[-a-zA-Z0-9_/]*");
Packit a4aae4
#ifdef USE_GETENV
Packit a4aae4
    c = getenv("TMPDIR");
Packit a4aae4
    if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
Packit a4aae4
	goto valid_temp_directory;
Packit a4aae4
#endif // USE_GETENV
Packit a4aae4
Packit a4aae4
    // Unix defines this sometimes - if present, use it.
Packit a4aae4
#ifdef P_tmpdir
Packit a4aae4
    if (access(P_tmpdir, W_OK | R_OK) == 0) {
Packit a4aae4
	c = P_tmpdir;
Packit a4aae4
	goto valid_temp_directory;
Packit a4aae4
    }
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
    // The Unix default
Packit a4aae4
    c = "/tmp";
Packit a4aae4
    if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
Packit a4aae4
	goto valid_temp_directory;
Packit a4aae4
Packit a4aae4
#endif  // WIN32
Packit a4aae4
Packit a4aae4
    // If we found nothing useful, use the current directory
Packit a4aae4
    c = ".";
Packit a4aae4
Packit a4aae4
valid_temp_directory:
Packit a4aae4
Packit a4aae4
#ifdef WIN32
Packit a4aae4
    c += "\\" + file_template;
Packit a4aae4
#else
Packit a4aae4
    c += "/" + file_template;
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
    return c;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Open a temporary file and return its name. This method opens a temporary
Packit a4aae4
    file using get_tempfile_template(). The FILE* \c stream is opened for
Packit a4aae4
    both reads and writes; if it already exists (highly unlikely), it is
Packit a4aae4
    truncated. If used on Unix, it's the callers responsibility to unlink the
Packit a4aae4
    named file so that when all descriptors to it are closed, it will be
Packit a4aae4
    deleted. On Win32 platforms, this method pushes the name of the temporary
Packit a4aae4
    file onto a vector which is used during object destruction to delete all
Packit a4aae4
    the temporary files.
Packit a4aae4
Packit a4aae4
    @note Delete the returned char* using delete[].
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param stream A value-result parameter; the open file descriptor is
Packit a4aae4
    returned via this parameter.
Packit a4aae4
    @return The name of the temporary file.
Packit a4aae4
    @exception InternalErr thrown if the FILE* could not be opened. */
Packit a4aae4
Packit a4aae4
string
Packit a4aae4
get_temp_file(FILE *&stream) throw(Error)
Packit a4aae4
{
Packit a4aae4
    string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
Packit a4aae4
Packit a4aae4
    vector<char> pathname(dods_temp.length() + 1);
Packit a4aae4
Packit a4aae4
    strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
Packit a4aae4
Packit a4aae4
    DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
Packit a4aae4
Packit a4aae4
    // Open truncated for update. NB: mkstemp() returns a file descriptor.
Packit a4aae4
#if defined(WIN32) || defined(TEST_WIN32_TEMPS)
Packit a4aae4
    stream = fopen(_mktemp(&pathname[0]), "w+b");
Packit a4aae4
#else
Packit a4aae4
    // Make sure that temp files are accessible only by the owner.
Packit a4aae4
    int mask = umask(077);
Packit a4aae4
    if (mask < 0)
Packit a4aae4
        throw Error("Could not set the file creation mask: " + string(strerror(errno)));
Packit a4aae4
    int fd = mkstemp(&pathname[0]);
Packit a4aae4
    if (fd < 0)
Packit a4aae4
        throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
Packit a4aae4
Packit a4aae4
    stream = fdopen(fd, "w+");
Packit a4aae4
    umask(mask);
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
    if (!stream)
Packit a4aae4
    	throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
Packit a4aae4
Packit a4aae4
    dods_temp = &pathname[0];
Packit a4aae4
    return dods_temp;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
Packit a4aae4
/**
Packit a4aae4
 * close temporary files - used here and in ~HTTPResponse
Packit a4aae4
 * @param s
Packit a4aae4
 * @param name
Packit a4aae4
 */
Packit a4aae4
void
Packit a4aae4
close_temp(FILE *s, const string &name)
Packit a4aae4
{
Packit a4aae4
    int res = fclose(s);
Packit a4aae4
    if (res)
Packit a4aae4
	throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
Packit a4aae4
Packit a4aae4
    res = unlink(name.c_str());
Packit a4aae4
    if (res != 0)
Packit a4aae4
    	throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Dereference a URL. This method looks first in the HTTP cache to see if a
Packit a4aae4
    cached response may be used. It may get the response from the cache, it
Packit a4aae4
    may validate a response in the cache and/or update the response from the
Packit a4aae4
    cache or it may get a new response from the network. In any of those
Packit a4aae4
    cases, the information returned by dereferencing the URL will be stored
Packit a4aae4
    in the cache.
Packit a4aae4
Packit a4aae4
    Return a Response pointer to fetch_url() which, in turn, uses
Packit a4aae4
    ParseHeaders to read stuff from d_headers and fills in the Response
Packit a4aae4
    version and type fields. Thus this method and plain_fetch_url() only have
Packit a4aae4
    to get the stream pointer set, the resources to release and d_headers.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @note This method assumes that d_http_cache is not null!
Packit a4aae4
    @param url The URL to dereference.
Packit a4aae4
    @return A pointer to the open stream.
Packit a4aae4
    @exception Error Thrown if the URL could not be dereferenced.
Packit a4aae4
    @exception InternalErr Thrown if a temporary file to hold the response
Packit a4aae4
    could not be opened. */
Packit a4aae4
Packit a4aae4
HTTPResponse *
Packit a4aae4
HTTPConnect::caching_fetch_url(const string &url)
Packit a4aae4
{
Packit a4aae4
    DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
Packit a4aae4
Packit a4aae4
    vector<string> *headers = new vector<string>;
Packit a4aae4
    string file_name;
Packit a4aae4
    FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
Packit a4aae4
    if (!s) {
Packit a4aae4
        // url not in cache; get it and cache it
Packit a4aae4
        DBGN(cerr << "no; getting response and caching." << endl);
Packit a4aae4
        delete headers; headers = 0;
Packit a4aae4
        time_t now = time(0);
Packit a4aae4
        HTTPResponse *rs = plain_fetch_url(url);
Packit a4aae4
        d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
Packit a4aae4
Packit a4aae4
        return rs;
Packit a4aae4
    }
Packit a4aae4
    else { // url in cache
Packit a4aae4
        DBGN(cerr << "yes... ");
Packit a4aae4
Packit a4aae4
        if (d_http_cache->is_url_valid(url)) { // url in cache and valid
Packit a4aae4
            DBGN(cerr << "and it's valid; using cached response." << endl);
Packit a4aae4
            HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
Packit a4aae4
            return crs;
Packit a4aae4
        }
Packit a4aae4
        else { // url in cache but not valid; validate
Packit a4aae4
            DBGN(cerr << "but it's not valid; validating... ");
Packit a4aae4
Packit a4aae4
            d_http_cache->release_cached_response(s); // This closes 's'
Packit a4aae4
            headers->clear();
Packit a4aae4
            vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
Packit a4aae4
            FILE *body = 0;
Packit a4aae4
            string dods_temp = get_temp_file(body);
Packit a4aae4
            time_t now = time(0); // When was the request made (now).
Packit a4aae4
            long http_status;
Packit a4aae4
Packit a4aae4
            try {
Packit a4aae4
                http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
Packit a4aae4
                rewind(body);
Packit a4aae4
            }
Packit a4aae4
            catch (Error &e) {
Packit a4aae4
                close_temp(body, dods_temp);
Packit a4aae4
                delete headers;
Packit a4aae4
                throw ;
Packit a4aae4
            }
Packit a4aae4
Packit a4aae4
            switch (http_status) {
Packit a4aae4
                case 200: { // New headers and new body
Packit a4aae4
                    DBGN(cerr << "read a new response; caching." << endl);
Packit a4aae4
Packit a4aae4
                    d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
Packit a4aae4
                    HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
Packit a4aae4
Packit a4aae4
                    return rs;
Packit a4aae4
                }
Packit a4aae4
Packit a4aae4
                case 304: { // Just new headers, use cached body
Packit a4aae4
                    DBGN(cerr << "cached response valid; updating." << endl);
Packit a4aae4
Packit a4aae4
                    close_temp(body, dods_temp);
Packit a4aae4
                    d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
Packit a4aae4
                    string file_name;
Packit a4aae4
                    FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
Packit a4aae4
                    HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
Packit a4aae4
                    return crs;
Packit a4aae4
                }
Packit a4aae4
Packit a4aae4
                default: { // Oops.
Packit a4aae4
                    close_temp(body, dods_temp);
Packit a4aae4
                    if (http_status >= 400) {
Packit a4aae4
                	delete headers; headers = 0;
Packit a4aae4
                        string msg = "Error while reading the URL: ";
Packit a4aae4
                        msg += url;
Packit a4aae4
                        msg
Packit a4aae4
                        += ".\nThe OPeNDAP server returned the following message:\n";
Packit a4aae4
                        msg += http_status_to_string(http_status);
Packit a4aae4
                        throw Error(msg);
Packit a4aae4
                    }
Packit a4aae4
                    else {
Packit a4aae4
                	delete headers; headers = 0;
Packit a4aae4
                        throw InternalErr(__FILE__, __LINE__,
Packit a4aae4
                                "Bad response from the HTTP server: " + long_to_string(http_status));
Packit a4aae4
                    }
Packit a4aae4
                }
Packit a4aae4
            }
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    throw InternalErr(__FILE__, __LINE__, "Should never get here");
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Dereference a URL and load its body into a temporary file. This
Packit a4aae4
    method ignores the HTTP cache.
Packit a4aae4
Packit a4aae4
    A private method.
Packit a4aae4
Packit a4aae4
    @param url The URL to dereference.
Packit a4aae4
    @return A pointer to the open stream.
Packit a4aae4
    @exception Error Thrown if the URL could not be dereferenced.
Packit a4aae4
    @exception InternalErr Thrown if a temporary file to hold the response
Packit a4aae4
    could not be opened. */
Packit a4aae4
Packit a4aae4
HTTPResponse *
Packit a4aae4
HTTPConnect::plain_fetch_url(const string &url)
Packit a4aae4
{
Packit a4aae4
	DBG(cerr << "Getting URL: " << url << endl);
Packit a4aae4
	FILE *stream = 0;
Packit a4aae4
	string dods_temp = get_temp_file(stream);
Packit a4aae4
	vector<string> *resp_hdrs = new vector<string>;
Packit a4aae4
Packit a4aae4
	int status = -1;
Packit a4aae4
	try {
Packit a4aae4
		status = read_url(url, stream, resp_hdrs); // Throws Error.
Packit a4aae4
		if (status >= 400) {
Packit a4aae4
			// delete resp_hdrs; resp_hdrs = 0;
Packit a4aae4
			string msg = "Error while reading the URL: ";
Packit a4aae4
			msg += url;
Packit a4aae4
			msg += ".\nThe OPeNDAP server returned the following message:\n";
Packit a4aae4
			msg += http_status_to_string(status);
Packit a4aae4
			throw Error(msg);
Packit a4aae4
		}
Packit a4aae4
	}
Packit a4aae4
Packit a4aae4
	catch (Error &e) {
Packit a4aae4
		delete resp_hdrs;
Packit a4aae4
		close_temp(stream, dods_temp);
Packit a4aae4
		throw;
Packit a4aae4
	}
Packit a4aae4
Packit a4aae4
#if 0
Packit a4aae4
	if (d_use_cpp_streams) {
Packit a4aae4
		fclose(stream);
Packit a4aae4
		fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
Packit a4aae4
		return new HTTPResponse(in, status, resp_hdrs, dods_temp);
Packit a4aae4
	}
Packit a4aae4
	else {
Packit a4aae4
#endif
Packit a4aae4
	rewind(stream);
Packit a4aae4
	return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
Packit a4aae4
#if 0
Packit a4aae4
}
Packit a4aae4
#endif
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Set the accept deflate property. If true, the DAP client
Packit a4aae4
    announces to a server that it can accept responses compressed using the
Packit a4aae4
    \c deflate algorithm. This property is automatically set using a value
Packit a4aae4
    from the .dodsrc configuration file. This method provides a
Packit a4aae4
    way to override that behavior.
Packit a4aae4
Packit a4aae4
    @note If the configuration file is not present or does not include a
Packit a4aae4
    value for this property, it is set to \c false.
Packit a4aae4
Packit a4aae4
    @param deflate True sets the accept deflate property, False clears
Packit a4aae4
    it. */
Packit a4aae4
void
Packit a4aae4
HTTPConnect::set_accept_deflate(bool deflate)
Packit a4aae4
{
Packit a4aae4
    d_accept_deflate = deflate;
Packit a4aae4
Packit a4aae4
    if (d_accept_deflate) {
Packit a4aae4
        if (find(d_request_headers.begin(), d_request_headers.end(),
Packit a4aae4
                 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
Packit a4aae4
            d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
Packit a4aae4
        DBG(copy(d_request_headers.begin(), d_request_headers.end(),
Packit a4aae4
                 ostream_iterator<string>(cerr, "\n")));
Packit a4aae4
    }
Packit a4aae4
    else {
Packit a4aae4
        vector<string>::iterator i;
Packit a4aae4
        i = remove_if(d_request_headers.begin(), d_request_headers.end(),
Packit a4aae4
                      bind2nd(equal_to<string>(),
Packit a4aae4
                              string("Accept-Encoding: deflate, gzip, compress")));
Packit a4aae4
        d_request_headers.erase(i, d_request_headers.end());
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Set the xdap_accept property/HTTP-header. This sets the value
Packit a4aae4
    of the DAP which the client advertises to servers that it understands.
Packit a4aae4
    The information (client protocol major and minor versions) are recorded
Packit a4aae4
    in the instance and the information is sent to servers using the
Packit a4aae4
    XDAP-Accept HTTP request header.
Packit a4aae4
Packit a4aae4
    @param major The dap client major protocol version
Packit a4aae4
    @param minor The dap client minor protocol version */
Packit a4aae4
void
Packit a4aae4
HTTPConnect::set_xdap_protocol(int major, int minor)
Packit a4aae4
{
Packit a4aae4
    // Look for, and remove if one exists, an XDAP-Accept header
Packit a4aae4
    vector<string>::iterator i;
Packit a4aae4
    i = find_if(d_request_headers.begin(), d_request_headers.end(),
Packit a4aae4
                HeaderMatch("XDAP-Accept:"));
Packit a4aae4
    if (i != d_request_headers.end())
Packit a4aae4
        d_request_headers.erase(i);
Packit a4aae4
Packit a4aae4
    // Record and add the new header value
Packit a4aae4
    d_dap_client_protocol_major = major;
Packit a4aae4
    d_dap_client_protocol_minor = minor;
Packit a4aae4
    ostringstream xdap_accept;
Packit a4aae4
    xdap_accept << "XDAP-Accept: " << major << "." << minor;
Packit a4aae4
Packit a4aae4
    d_request_headers.push_back(xdap_accept.str());
Packit a4aae4
Packit a4aae4
    DBG(copy(d_request_headers.begin(), d_request_headers.end(),
Packit a4aae4
             ostream_iterator<string>(cerr, "\n")));
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/** Set the credentials for responding to challenges while dereferencing
Packit a4aae4
    URLs. Alternatively, these can be embedded in the URL. This method
Packit a4aae4
    provides a way for clients of HTTPConnect to get credentials from users
Packit a4aae4
    (say using a pop up dialog) and to not hack the URL to pass that
Packit a4aae4
    information to libcurl. Note that the 'credentials in the URL' scheme \e
Packit a4aae4
    is part of the URL standard.
Packit a4aae4
Packit a4aae4
    This method does nothing if \c u, the username, is empty.
Packit a4aae4
Packit a4aae4
    @param u The username.
Packit a4aae4
    @param p The password.
Packit a4aae4
    @exception InternalErr The credentials could not be registered with
Packit a4aae4
    libcurl.
Packit a4aae4
    @see extract_auth_info() */
Packit a4aae4
Packit a4aae4
void
Packit a4aae4
HTTPConnect::set_credentials(const string &u, const string &p)
Packit a4aae4
{
Packit a4aae4
    if (u.empty())
Packit a4aae4
        return;
Packit a4aae4
Packit a4aae4
    // Store the credentials locally.
Packit a4aae4
    d_username = u;
Packit a4aae4
    d_password = p;
Packit a4aae4
Packit a4aae4
    d_upstring = u + ":" + p;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
} // namespace libdap