Blob Blame History Raw
/* 
   Handling of compressed HTTP responses
   Copyright (C) 2001-2006, Joe Orton <joe@manyfish.co.uk>

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.
   
   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with this library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
   MA 02111-1307, USA

*/

#include "config.h"

#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif

#include "ne_request.h"
#include "ne_compress.h"
#include "ne_utils.h"
#include "ne_internal.h"

#ifdef NE_HAVE_ZLIB

#include <zlib.h>

/* Adds support for the 'gzip' Content-Encoding in HTTP.  gzip is a
 * file format which wraps the DEFLATE compression algorithm.  zlib
 * implements DEFLATE: we have to unwrap the gzip format (specified in
 * RFC1952) as it comes off the wire, and hand off chunks of data to
 * be inflated. */

struct ne_decompress_s {
    ne_request *request; /* associated request. */
    ne_session *session; /* associated session. */
    /* temporary buffer for holding inflated data. */
    char outbuf[NE_BUFSIZ];
    z_stream zstr;
    int zstrinit; /* non-zero if zstr has been initialized */

    /* pass blocks back to this. */
    ne_block_reader reader;
    ne_accept_response acceptor;
    void *userdata;

    /* buffer for gzip header bytes. */
    unsigned char header[10];
    size_t hdrcount;    /* bytes in header */

    unsigned char footer[8];
    size_t footcount; /* bytes in footer. */

    /* CRC32 checksum: odd that zlib uses uLong for this since it is a
     * 64-bit integer on LP64 platforms. */
    uLong checksum;

    /* current state. */
    enum state {
	NE_Z_BEFORE_DATA, /* not received any response blocks yet. */
	NE_Z_PASSTHROUGH, /* response not compressed: passing through. */
	NE_Z_IN_HEADER, /* received a few bytes of response data, but not
			 * got past the gzip header yet. */
	NE_Z_POST_HEADER, /* waiting for the end of the NUL-terminated bits. */
	NE_Z_INFLATING, /* inflating response bytes. */
	NE_Z_AFTER_DATA, /* after data; reading CRC32 & ISIZE */
	NE_Z_FINISHED /* stream is finished. */
    } state;
};

/* Convert 'buf' to unsigned int; 'buf' must be 'unsigned char *' */
#define BUF2UINT(buf) (((buf)[3]<<24) + ((buf)[2]<<16) + ((buf)[1]<<8) + (buf)[0])

#define ID1 0x1f
#define ID2 0x8b

#define HDR_DONE 0
#define HDR_EXTENDED 1
#define HDR_ERROR 2

#define HDR_ID1(ctx) ((ctx)->header[0])
#define HDR_ID2(ctx) ((ctx)->header[1])
#define HDR_CMETH(ctx) ((ctx)->header[2])
#define HDR_FLAGS(ctx) ((ctx)->header[3])
#define HDR_MTIME(ctx) (BUF2UINT(&(ctx)->header[4]))
#define HDR_XFLAGS(ctx) ((ctx)->header[8])
#define HDR_OS(ctx) ((ctx)->header[9])

/* parse_header parses the gzip header, sets the next state and returns
 *   HDR_DONE: all done, bytes following are raw DEFLATE data.
 *   HDR_EXTENDED: all done, expect a NUL-termianted string
 *                 before the DEFLATE data
 *   HDR_ERROR: invalid header, give up (session error is set).
 */
static int parse_header(ne_decompress *ctx)
{
    NE_DEBUG(NE_DBG_HTTP, "ID1: %d  ID2: %d, cmeth %d, flags %d\n", 
             HDR_ID1(ctx), HDR_ID2(ctx), HDR_CMETH(ctx), HDR_FLAGS(ctx));
    
    if (HDR_ID1(ctx) != ID1 || HDR_ID2(ctx) != ID2 || HDR_CMETH(ctx) != 8) {
	ne_set_error(ctx->session, "Compressed stream invalid");
	return HDR_ERROR;
    }

    NE_DEBUG(NE_DBG_HTTP, "mtime: %d, xflags: %d, os: %d\n",
	     HDR_MTIME(ctx), HDR_XFLAGS(ctx), HDR_OS(ctx));
    
    /* TODO: we can only handle one NUL-terminated extensions field
     * currently.  Really, we should count the number of bits set, and
     * skip as many fields as bits set (bailing if any reserved bits
     * are set. */
    if (HDR_FLAGS(ctx) == 8) {
	ctx->state = NE_Z_POST_HEADER;
	return HDR_EXTENDED;
    } else if (HDR_FLAGS(ctx) != 0) {
	ne_set_error(ctx->session, "Compressed stream not supported");
	return HDR_ERROR;
    }

    NE_DEBUG(NE_DBG_HTTP, "compress: Good stream.\n");
    
    ctx->state = NE_Z_INFLATING;
    return HDR_DONE;
}

/* Process extra 'len' bytes of 'buf' which were received after the
 * DEFLATE data. */
static int process_footer(ne_decompress *ctx, 
			   const unsigned char *buf, size_t len)
{
    if (len + ctx->footcount > 8) {
        ne_set_error(ctx->session, 
                     "Too many bytes (%" NE_FMT_SIZE_T ") in gzip footer",
                     len);
        return -1;
    } else {
	memcpy(ctx->footer + ctx->footcount, buf, len);
	ctx->footcount += len;
	if (ctx->footcount == 8) {
	    uLong crc = BUF2UINT(ctx->footer) & 0xFFFFFFFF;
	    if (crc == ctx->checksum) {
		ctx->state = NE_Z_FINISHED;
		NE_DEBUG(NE_DBG_HTTP, "compress: End of response; checksum match.\n");
	    } else {
		NE_DEBUG(NE_DBG_HTTP, "compress: End of response; checksum mismatch: "
			 "given %lu vs computed %lu\n", crc, ctx->checksum);
		ne_set_error(ctx->session, 
			     "Checksum invalid for compressed stream");
                return -1;
	    }
	}
    }
    return 0;
}

/* A zlib function failed with 'code'; set the session error string
 * appropriately. */
static void set_zlib_error(ne_decompress *ctx, const char *msg, int code)
{
    if (ctx->zstr.msg)
        ne_set_error(ctx->session, "%s: %s", msg, ctx->zstr.msg);
    else {
        const char *err;
        switch (code) {
        case Z_STREAM_ERROR: err = "stream error"; break;
        case Z_DATA_ERROR: err = "data corrupt"; break;
        case Z_MEM_ERROR: err = "out of memory"; break;
        case Z_BUF_ERROR: err = "buffer error"; break;
        case Z_VERSION_ERROR: err = "library version mismatch"; break;
        default: err = "unknown error"; break;
        }
        ne_set_error(ctx->session, _("%s: %s (code %d)"), msg, err, code);
    }
}

/* Inflate response buffer 'buf' of length 'len'. */
static int do_inflate(ne_decompress *ctx, const char *buf, size_t len)
{
    int ret;

    ctx->zstr.avail_in = len;
    ctx->zstr.next_in = (unsigned char *)buf;
    ctx->zstr.total_in = 0;
    
    do {
	ctx->zstr.avail_out = sizeof ctx->outbuf;
	ctx->zstr.next_out = (unsigned char *)ctx->outbuf;
	ctx->zstr.total_out = 0;
	
	ret = inflate(&ctx->zstr, Z_NO_FLUSH);
	
	NE_DEBUG(NE_DBG_HTTP, 
		 "compress: inflate %d, %ld bytes out, %d remaining\n",
		 ret, ctx->zstr.total_out, ctx->zstr.avail_in);
#if 0
	NE_DEBUG(NE_DBG_HTTPBODY,
		 "Inflated body block (%ld):\n[%.*s]\n", 
		 ctx->zstr.total_out, (int)ctx->zstr.total_out, 
		 ctx->outbuf);
#endif
	/* update checksum. */
	ctx->checksum = crc32(ctx->checksum, (unsigned char *)ctx->outbuf, 
			      ctx->zstr.total_out);

	/* pass on the inflated data, if any */
        if (ctx->zstr.total_out > 0) {
            int rret = ctx->reader(ctx->userdata, ctx->outbuf,
                                   ctx->zstr.total_out);
            if (rret) return rret;
        }	
    } while (ret == Z_OK && ctx->zstr.avail_in > 0);
    
    if (ret == Z_STREAM_END) {
	NE_DEBUG(NE_DBG_HTTP, "compress: end of data stream, %d bytes remain.\n",
		 ctx->zstr.avail_in);
	/* process the footer. */
	ctx->state = NE_Z_AFTER_DATA;
	return process_footer(ctx, ctx->zstr.next_in, ctx->zstr.avail_in);
    } else if (ret != Z_OK) {
        set_zlib_error(ctx, _("Could not inflate data"), ret);
        return NE_ERROR;
    }
    return 0;
}

/* Callback which is passed blocks of the response body. */
static int gz_reader(void *ud, const char *buf, size_t len)
{
    ne_decompress *ctx = ud;
    const char *zbuf;
    size_t count;
    const char *hdr;

    if (len == 0) {
        /* End of response: */
        switch (ctx->state) {
        case NE_Z_BEFORE_DATA:
            hdr = ne_get_response_header(ctx->request, "Content-Encoding");
            if (hdr && ne_strcasecmp(hdr, "gzip") == 0) {
                /* response was truncated: return error. */
                break;
            }
            /* else, fall through */
        case NE_Z_FINISHED: /* complete gzip response */
        case NE_Z_PASSTHROUGH: /* complete uncompressed response */
            return ctx->reader(ctx->userdata, buf, 0);
        default:
            /* invalid state: truncated response. */
            break;
        }
	/* else: truncated response, fail. */
	ne_set_error(ctx->session, "Compressed response was truncated");
	return NE_ERROR;
    }        

    switch (ctx->state) {
    case NE_Z_PASSTHROUGH:
	/* move along there. */
	return ctx->reader(ctx->userdata, buf, len);

    case NE_Z_FINISHED:
	/* Could argue for tolerance, and ignoring trailing content;
	 * but it could mean something more serious. */
	if (len > 0) {
	    ne_set_error(ctx->session,
			 "Unexpected content received after compressed stream");
            return NE_ERROR;
	}
        break;

    case NE_Z_BEFORE_DATA:
	/* work out whether this is a compressed response or not. */
        hdr = ne_get_response_header(ctx->request, "Content-Encoding");
        if (hdr && ne_strcasecmp(hdr, "gzip") == 0) {
            int ret;
	    NE_DEBUG(NE_DBG_HTTP, "compress: got gzipped stream.\n");

            /* inflateInit2() works here where inflateInit() doesn't. */
            ret = inflateInit2(&ctx->zstr, -MAX_WBITS);
            if (ret != Z_OK) {
                set_zlib_error(ctx, _("Could not initialize zlib"), ret);
                return -1;
            }
	    ctx->zstrinit = 1;

	} else {
	    /* No Content-Encoding header: pass it on.  TODO: we could
	     * hack it and register the real callback now. But that
	     * would require add_resp_body_rdr to have defined
	     * ordering semantics etc etc */
	    ctx->state = NE_Z_PASSTHROUGH;
	    return ctx->reader(ctx->userdata, buf, len);
	}

	ctx->state = NE_Z_IN_HEADER;
	/* FALLTHROUGH */

    case NE_Z_IN_HEADER:
	/* copy as many bytes as possible into the buffer. */
	if (len + ctx->hdrcount > 10) {
	    count = 10 - ctx->hdrcount;
	} else {
	    count = len;
	}
	memcpy(ctx->header + ctx->hdrcount, buf, count);
	ctx->hdrcount += count;
	/* have we got the full header yet? */
	if (ctx->hdrcount != 10) {
	    return 0;
	}

	buf += count;
	len -= count;

	switch (parse_header(ctx)) {
	case HDR_EXTENDED:
	    if (len == 0)
		return 0;
	    break;
        case HDR_ERROR:
            return NE_ERROR;
	case HDR_DONE:
	    if (len > 0) {
		return do_inflate(ctx, buf, len);
	    }
            break;
	}

	/* FALLTHROUGH */

    case NE_Z_POST_HEADER:
	/* eating the filename string. */
	zbuf = memchr(buf, '\0', len);
	if (zbuf == NULL) {
	    /* not found it yet. */
	    return 0;
	}

	NE_DEBUG(NE_DBG_HTTP,
		 "compresss: skipped %" NE_FMT_SIZE_T " header bytes.\n", 
		 zbuf - buf);
	/* found end of string. */
	len -= (1 + zbuf - buf);
	buf = zbuf + 1;
	ctx->state = NE_Z_INFLATING;
	if (len == 0) {
	    /* end of string was at end of buffer. */
	    return 0;
	}

	/* FALLTHROUGH */

    case NE_Z_INFLATING:
	return do_inflate(ctx, buf, len);

    case NE_Z_AFTER_DATA:
	return process_footer(ctx, (unsigned char *)buf, len);
    }

    return 0;
}

/* Prepare for a compressed response; may be called many times per
 * request, for auth retries etc. */
static void gz_pre_send(ne_request *r, void *ud, ne_buffer *req)
{
    ne_decompress *ctx = ud;

    if (ctx->request == r) {
        NE_DEBUG(NE_DBG_HTTP, "compress: Initialization.\n");
        
        /* (Re-)Initialize the context */
        ctx->state = NE_Z_BEFORE_DATA;
        if (ctx->zstrinit) inflateEnd(&ctx->zstr);
        ctx->zstrinit = 0;
        ctx->hdrcount = ctx->footcount = 0;
        ctx->checksum = crc32(0L, Z_NULL, 0);
    }
}

/* Wrapper for user-passed acceptor function. */
static int gz_acceptor(void *userdata, ne_request *req, const ne_status *st)
{
    ne_decompress *ctx = userdata;
    return ctx->acceptor(ctx->userdata, req, st);
}

/* A slightly ugly hack: the pre_send hook is scoped per-session, so
 * must check that the invoking request is this one, before doing
 * anything, and must be unregistered when the context is
 * destroyed. */
ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
				    ne_block_reader rdr, void *userdata)
{
    ne_decompress *ctx = ne_calloc(sizeof *ctx);

    ne_add_request_header(req, "Accept-Encoding", "gzip");

    ne_add_response_body_reader(req, gz_acceptor, gz_reader, ctx);

    ctx->reader = rdr;
    ctx->userdata = userdata;
    ctx->session = ne_get_session(req);
    ctx->request = req;
    ctx->acceptor = acpt;

    ne_hook_pre_send(ne_get_session(req), gz_pre_send, ctx);

    return ctx;    
}

void ne_decompress_destroy(ne_decompress *ctx)
{
    if (ctx->zstrinit) inflateEnd(&ctx->zstr);

    ne_unhook_pre_send(ctx->session, gz_pre_send, ctx);

    ne_free(ctx);
}

#else /* !NE_HAVE_ZLIB */

/* Pass-through interface present to provide ABI compatibility. */

ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
				    ne_block_reader rdr, void *userdata)
{
    ne_add_response_body_reader(req, acpt, rdr, userdata);
    /* an arbitrary return value: don't confuse them by returning NULL. */
    return (ne_decompress *)req;
}

void ne_decompress_destroy(ne_decompress *dc)
{
}

#endif /* NE_HAVE_ZLIB */