Tree - source-git/bogofilter - CentOS Git server

source-git / bogofilter

Files

Blob Blame History Raw
/**
 * \file mime.c - lexer MIME processing
 *
 * NOTES:
 *
 * RFC2045:
 * Header fields occur in at least two contexts:
 *-# As part of a regular RFC 822 message header.
 *-# In a MIME body part header within a multipart construct.
 *
 * \author Matthias Andree <matthias.andree@gmx.de>
 * \author David Relson <relson@osagesoftware.com>
 * \author Gyepi Sam <gyepi@praxis-sw.com>
 */

#include "common.h"

#include <assert.h>
#include <ctype.h>
#include <stdlib.h>

#include "base64.h"
#include "lexer.h"
#include "mime.h"
#include "qp.h"
#include "uudecode.h"
#include "xstrdup.h"
#include "xmalloc.h"

/* Global Variables */

mime_t *msg_state = NULL;
static mime_t *mime_stack_top = NULL;
static mime_t *mime_stack_bot = NULL;

/** MIME media types (or prefixes thereof) that we detect. */
static const struct type_s {
    enum mimetype type;	/**< internal representation of MIME type */
    const char *name;	/**< prefix of MIME type to match */
} mime_type_table[] = {
    { MIME_TEXT_HTML,	"text/html"	},
    { MIME_TEXT_PLAIN,	"text/plain"	},
    { MIME_TEXT,	"text"		}, /* NON-COMPLIANT; should be "text/"*/
    { MIME_APPLICATION,	"application/"	},
    { MIME_MESSAGE,	"message/"	},
    { MIME_MULTIPART,	"multipart/"	},
    { MIME_IMAGE,	"image/"	},
    { MIME_AUDIO,	"audio/"	},
    { MIME_VIDEO,	"video/"	},
};

/** MIME encodings that we detect. */
static const struct encoding_s {
    enum mimeencoding encoding;	/**< internal representation of encoding */
    const char *name;		/**< encoding name to match */
} mime_encoding_table[] = {
    { MIME_7BIT,	"7BIT" },
    { MIME_8BIT,	"8BIT" },
    { MIME_BINARY,	"BINARY" },
    { MIME_QP,		"QUOTED-PRINTABLE" },
    { MIME_BASE64,	"BASE64" },
    { MIME_UUENCODE,	"X-UUENCODE" },
};

/** MIME content dispositions that we detect. */
static const struct disposition_s {
    enum mimedisposition disposition;	/**< internal representation of disposition */
    const char *name;			/**< disposition name to match */
} mime_disposition_table[] = {
    { MIME_INLINE,	"inline" },
    { MIME_ATTACHMENT,	"attachment" },
};

/** properties of a MIME boundary */
typedef struct {
    bool is_valid;	/**< valid boundary of an enclosing MIME container */
    bool is_final;	/**< boundary is a closing one (two trailing dashes) */
    int depth;		/**< stack level the boundary was found at */
} boundary_t;

/* Function Prototypes */

static void mime_disposition(word_t * text);
static void mime_encoding(word_t * text);
static void mime_type(word_t * text);

static void mime_push(mime_t * parent);
static void mime_pop(void);

/* Function Definitions */

#if	0			/* Unused */
const char *mime_type_name(enum mimetype type)
{
    size_t i;
    for (i = 0; i < COUNTOF(mime_type_table); i += 1) {
	struct type_s *typ = mime_type_table + i;
	if (typ->type == type)
	    return typ->name;
    }
    return "unknown";
}
#endif

static const char *str_mime_type(enum mimetype m)
{
    switch (m) {
	case MIME_TYPE_UNKNOWN:	return "unknown";
	case MIME_MULTIPART:	return "multipart/*";
	case MIME_MESSAGE:	return "message/*";
	case MIME_TEXT:		return "text/*";
	case MIME_TEXT_PLAIN:	return "text/plain";
	case MIME_TEXT_HTML:	return "text/html";
	case MIME_APPLICATION:	return "application/*";
	case MIME_IMAGE:	return "image/*";
	case MIME_AUDIO:	return "audio/*";
	case MIME_VIDEO:	return "video/*";
}
    return "INTERNAL_ERROR";
}

static const char *str_mime_enc(enum mimeencoding e)
{
    switch (e) {
	case MIME_ENCODING_UNKNOWN:	return "unknown";
	case MIME_7BIT:			return "7bit";
	case MIME_8BIT:			return "8bit";
	case MIME_BINARY:		return "binary";
	case MIME_QP:			return "quoted-printable";
	case MIME_BASE64:		return "base64";
	case MIME_UUENCODE:		return "x-uuencode";
    }
    return "INTERNAL_ERROR";
}

/** Dump the current MIME boundary stack. For debugging. */
#ifndef	NODEBUG
static void mime_stack_dump(void)
{
    mime_t *ptr;
    fprintf(dbgout, "**** MIME stack is:\n");

    for (ptr = mime_stack_top; ptr != NULL; ptr = ptr->child)
    {
	fprintf(dbgout, "**** %3d type: %-16s enc: %-16s chr: %-8.8s bnd: %s\n",
		ptr->depth,
		str_mime_type(ptr->mime_type),
		str_mime_enc(ptr->mime_encoding),
		ptr->charset,
		ptr->boundary ? ptr->boundary : "NIL");
    }
}
#endif

static void mime_init(mime_t * parent)
{
    msg_state->mime_type = MIME_TEXT;
    msg_state->mime_encoding = MIME_7BIT;
    msg_state->boundary = NULL;
    msg_state->boundary_len = 0;
    msg_state->parent = parent;
    msg_state->charset = xstrdup("US-ASCII");
    msg_state->depth = (parent == NULL) ? 0 : msg_state->parent->depth + 1;
    msg_state->child  = NULL;
    msg_state->mime_dont_decode = false;
    msg_state->mime_disposition = MIME_DISPOSITION_UNKNOWN;

    if (parent)
	parent->child = msg_state;

    return;
}

static void mime_free(mime_t * t)
{
    if (t == NULL)
	return;

    if (mime_stack_bot == t)
	mime_stack_bot = t->parent;

    if (mime_stack_top == t)
	mime_stack_top = t->child;

    if (t->boundary) {
	xfree(t->boundary);
	t->boundary = NULL;
    }

    if (t->charset) {
	xfree(t->charset);
	t->charset = NULL;
    }

    t->parent = NULL;

    xfree(t);
}

void mime_cleanup()
{
    if (DEBUG_MIME(0))
	fprintf(dbgout, "*** mime_cleanup\n");

    if (msg_state == NULL)
	return;

    if (DEBUG_MIME(2))
	mime_stack_dump();

    while (mime_stack_top->child)
	mime_pop();
    mime_pop();
    if (DEBUG_MIME(2))
	mime_stack_dump();

    msg_state = NULL;

    mime_stack_top = NULL;
    mime_stack_bot = NULL;
}

static void mime_push(mime_t * parent)
{
    msg_state = (mime_t *) xmalloc(sizeof(mime_t));

    if (parent == NULL)
	mime_stack_top = msg_state;

    mime_stack_bot = msg_state;

    mime_init(parent);

    if (DEBUG_MIME(1))
	fprintf(dbgout, "*** mime_push. stackp: %d\n", msg_state->depth);

    if (DEBUG_MIME(2))
	mime_stack_dump();
}

static void mime_pop(void)
{
    if (DEBUG_MIME(1))
	fprintf(dbgout, "*** mime_pop. stackp: %d\n", msg_state->depth);

    if (msg_state)
    {
	mime_t *parent = msg_state->parent;

	mime_free(msg_state);

	msg_state = parent;
	if (msg_state)
	    msg_state->child = NULL;
    } else {
	fprintf(stderr, "Attempt to underflow mime stack\n");
    }

    if (DEBUG_MIME(2))
	mime_stack_dump();
}

/**
 * check if the media type of the MIME entity \a m is a container type
 * (message/anything or multipart/anything)
 */
static bool is_mime_container(mime_t * m)
{
    return (m
	    && ((m->mime_type == MIME_MESSAGE)
		|| (m->mime_type == MIME_MULTIPART)));
}

void mime_reset(void)
{
    if (DEBUG_MIME(0))
	fprintf(dbgout, "*** mime_reset\n");

    if (DEBUG_MIME(2))
	mime_stack_dump();

    mime_cleanup();

    mime_push(NULL);
}

void mime_add_child(mime_t * parent)
{
    mime_push(parent);
}

/**
 * Check if the boundary in \a ins of length \a inlen is a final (instead
 * of initial or intermediate) boundary of a MIME multipart,
 * when the boundary we are currently looking at has length \a blen.
 * Returns true if it is a final boundary, false otherwise.
 */
static bool is_final_boundary(
	const byte	*ins,
	size_t		 inlen,
	size_t		 blen
)
{
    if (inlen >= 5
	    && inlen >= blen + 2
	    && ins[0] == '-'
	    && ins[1] == '-'
	    && ins[blen+2] == '-'
	    && ins[blen+3] == '-')
    {
	return true;
    }
    return false;
}


/**
 * Check if the line given in \a boundary is a boundary of one of the
 * outer MIME containers and store the results in \a b.
 * container we'd previously seen. \return a copy of b->is_valid
 */
static bool get_boundary_props(const word_t * boundary, /**< input line */
	boundary_t * b /*@out@*/ /**< output properties, must be pre-allocated by caller */)
{
    mime_t *ptr;
    const byte *buf = boundary->u.text;
    size_t blen = boundary->leng;

    b->is_valid = false;

    /* a boundary line must begin with two dashes */
    if (blen > 2 && buf[0] == '-' && buf[1] == '-') {

	/* strip EOL characters */
	while (blen > 2 &&
	       (buf[blen - 1] == '\r' || buf[blen - 1] == '\n'))
	    blen--;

	/* search stack for matching boundary, in reverse order */
	for (ptr = mime_stack_bot; ptr != NULL; ptr = ptr->parent)
	{
	    if (is_mime_container(ptr)
		&& ptr->boundary != NULL
		&& (ptr->boundary_len + 2 == blen
		    || ptr->boundary_len + 4 == blen)
		&& (memcmp(ptr->boundary, buf + 2, ptr->boundary_len) == 0))
	    {
		b->depth = ptr->depth;
		b->is_valid = true;
		b->is_final = is_final_boundary(buf, blen, ptr->boundary_len);
		break;
	    }
	}
    }

    return b->is_valid;
}

bool mime_is_boundary(word_t * boundary)
{
    boundary_t b;
    return get_boundary_props(boundary, &b);
}

bool got_mime_boundary(word_t * boundary)
{
    mime_t *parent = NULL;
    boundary_t b;

    get_boundary_props(boundary, &b);

    if (!b.is_valid)
	return false;

    if (DEBUG_MIME(0))
	fprintf(dbgout,
		"*** got_mime_boundary:  stackp: %d, boundary: '%s'\n",
		mime_stack_top->depth, boundary->u.text);

    if (msg_state != NULL)
    {
	/* This handles explicit and implicit boundaries - pop stack
	 * until we reach the boundary level on the stack */
	while (msg_state->depth > b.depth)
	    mime_pop();

	/* explicit end boundary */
	if (b.is_final)
	    return true;

	parent = is_mime_container(msg_state) ? msg_state : msg_state->parent;
    }

    if (parent != NULL)
	mime_push(parent); /* push for the next part */
    else
	mime_push(msg_state); /* push for the next part */
    return true;
}

/** Skip leading whitespace from t.
 * \return - pointer to first non-whitespace character,
 *         - NULL if the string is all whitespace or empty */
static const byte *skipws(
	const byte * t, /**< string to find non-whitespace in */
	const byte * e  /**< pointer to the byte after the last byte in \a t */)
{
    while (t < e && (*t == ' ' || *t == '\t'))
	t++;
    if (t < e)
	return t;
    return NULL;
}

/**
 * get next MIME word, \return malloc'd NUL-terminated string containing
 * a copy of the word, or NULL when none found. It is the caller's
 * responsibility to xfree() the returned string!
 */
static byte *getword(
	const byte * t, /**< string to extract word from */
	const byte * e  /**< pointer to byte after last byte in \a t */)
{
    int quote = 0;
    int l;
    const byte *ts;
    byte *n;

    t = skipws(t, e);
    if (!t)
	return NULL;
    if (*t == '"') {
	quote++;
	t++;
    }
    ts = t;
    while ((t < e) && (quote ? *t != '"' : (*t != ' ' && *t != '\t'))) {
	t++;
    }
    l = t - ts;
    n = (byte *) xmalloc(l + 1);
    memcpy(n, ts, l);
    n[l] = (byte) '\0';
    return n;
}

void mime_content(word_t * text)
{
    char *key = (char *) text->u.text;
    switch (tolower((unsigned char)key[9])) {
    case 'r':			/*  Content-Transfer-Encoding: */
	mime_encoding(text);
	break;
    case 'y':			/*  Content-Type: */
	mime_type(text);
	break;
    case 'i':			/*  Content-Disposition: */
	mime_disposition(text);
	break;
    }
}

static void mime_disposition(word_t * text)
{
    size_t i;
    const size_t l = sizeof("Content-Disposition:") - 1;
    byte *w = getword(text->u.text + l, text->u.text + text->leng);

    if (!w)
	return;

    msg_state->mime_disposition = MIME_DISPOSITION_UNKNOWN;
    for (i = 0; i < COUNTOF(mime_disposition_table); i += 1) {
	const struct disposition_s *dis = mime_disposition_table + i;
	if (strcasecmp((const char *)w, dis->name) == 0) {
	    msg_state->mime_disposition = dis->disposition;
	    if (DEBUG_MIME(1))
		fprintf(dbgout, "*** mime_disposition: %s\n", text->u.text);
	    break;
	}
    }

    if (DEBUG_MIME(0)
	&& msg_state->mime_disposition == MIME_DISPOSITION_UNKNOWN)
	fprintf(stderr, "Unknown mime disposition - '%s'\n", w);

    xfree(w);

    return;
}

/*********
**
** RFC2045, Section 6.1.  Content-Transfer-Encoding Syntax
**
**     encoding := "Content-Transfer-Encoding" ":" mechanism
**
**     mechanism := "7bit" / "8bit" / "binary" /
**                  "quoted-printable" / "base64" /
**                  ietf-token / x-token
**
*********/

static void mime_encoding(word_t * text)
{
    size_t i;
    const size_t l =  sizeof("Content-Transfer-Encoding:") - 1;
    byte *w = getword(text->u.text + l, text->u.text + text->leng);

    if (!w)
	return;

    msg_state->mime_encoding = MIME_ENCODING_UNKNOWN;
    for (i = 0; i < COUNTOF(mime_encoding_table); i += 1) {
	const struct encoding_s *enc = mime_encoding_table + i;
	if (strcasecmp((const char *)w, enc->name) == 0) {
	    msg_state->mime_encoding = enc->encoding;
	    if (DEBUG_MIME(1))
		fprintf(dbgout, "*** mime_encoding: %s\n", text->u.text);
	    break;
	}
    }

    if (DEBUG_MIME(0)
	&& msg_state->mime_encoding == MIME_ENCODING_UNKNOWN)
	fprintf(stderr, "Unknown mime encoding - '%s'\n", w);

    xfree(w);

    return;
}

static void mime_type(word_t * text)
{
    const struct type_s *typ;
    const size_t l = sizeof("Content-Type:") - 1;
    byte *w = getword(text->u.text + l, text->u.text + text->leng);

    if (!w)
	return;

    msg_state->mime_type = MIME_TYPE_UNKNOWN;
    for (typ = mime_type_table;
	 typ < mime_type_table + COUNTOF(mime_type_table); typ += 1) {
	if (strncasecmp((const char *)w, typ->name, strlen(typ->name)) == 0) {
	    msg_state->mime_type = typ->type;
	    if (DEBUG_MIME(1) || DEBUG_LEXER(1))
		fprintf(dbgout, "*** mime_type: %s\n", text->u.text);
	    break;
	}
    }
    if (DEBUG_MIME(0) && msg_state->mime_type == MIME_TYPE_UNKNOWN)
	fprintf(stderr, "Unknown mime type - '%s'\n", w);
    xfree(w);

    switch (msg_state->mime_type) {
    case MIME_TEXT:		return;	/* XXX: read charset */
    case MIME_TEXT_PLAIN:	return;	/* XXX: read charset */
    case MIME_TEXT_HTML:	return;
    case MIME_TYPE_UNKNOWN:	return;
    case MIME_MULTIPART:	return;	/* XXX: read boundary */
    case MIME_MESSAGE:		return;
    case MIME_APPLICATION:
    case MIME_IMAGE:
    case MIME_AUDIO:
    case MIME_VIDEO:		msg_state->mime_dont_decode = true;	return;
    }

    return;
}

void mime_boundary_set(word_t * text)
{
    byte *boundary = text->u.text;
    size_t blen = text->leng;

    if (DEBUG_MIME(1)) {
	int len = blen;
	if (blen > INT_MAX)
	    len = INT_MAX;
	fprintf(dbgout, "*** --> mime_boundary_set: %d '%-.*s'\n",
		msg_state->depth, len, boundary);
    }

    boundary = getword(boundary + strlen("boundary="), boundary + blen);
    xfree(msg_state->boundary);
    msg_state->boundary = (char *) boundary;
    msg_state->boundary_len = strlen((char *) boundary);

    if (DEBUG_MIME(1))
	fprintf(dbgout, "*** <-- mime_boundary_set: %d '%s'\n",
		msg_state->depth, boundary);

    return;
}

uint mime_decode(word_t * text)
{
    uint count = text->leng;

    /* early out for the identity codings */
    if (msg_state->mime_encoding == MIME_7BIT ||
	msg_state->mime_encoding == MIME_8BIT ||
	msg_state->mime_encoding == MIME_BINARY ||
	msg_state->mime_encoding == MIME_ENCODING_UNKNOWN)
	return count;

    if (DEBUG_MIME(3))
	fprintf(dbgout, "*** mime_decode %lu \"%-.*s\"\n",
		(unsigned long) count,
		count > INT_MAX ? INT_MAX : (int) (count - 1), text->u.text);

    /* Do not decode "real" boundary lines */
    if (mime_is_boundary(text) == true)
	return count;

    switch (msg_state->mime_encoding) {
    case MIME_QP:
	count = qp_decode(text, RFC2045);
	break;
    case MIME_BASE64:
	if (count > 4)
	    count = base64_decode(text);
	break;
    case MIME_UUENCODE:
	count = uudecode(text);
	break;
    case MIME_7BIT:
    case MIME_8BIT:
    case MIME_BINARY:
    case MIME_ENCODING_UNKNOWN:
	break;
    }

    return count;
}

enum mimetype get_content_type(void)
{
    return msg_state->mime_type;
}
source-git / bogofilter

Source Code

Files