Blob Blame History Raw
/*****************************************************************************

NAME:
datastore.c -- contains database independent components of data storage.

AUTHORS:
Gyepi Sam <gyepi@praxis-sw.com>   2002 - 2003
Matthias Andree <matthias.andree@gmx.de> 2003
David Relson <relson@osagesoftware.com>  2003

******************************************************************************/

#include "common.h"

#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>

#include "datastore.h"
#include "datastore_db.h"
#include "datastore_db_private.h"

#include "error.h"
#include "maint.h"
#include "rand_sleep.h"
#include "swap.h"
#include "word.h"
#include "xmalloc.h"

#define struct_init(s) memset(&s, 0, sizeof(s))

YYYYMMDD today;			/* date as YYYYMMDD */

/* OO function list */

static dsm_t dsm_dummies = {
    /* public -- used in datastore.c */
    NULL,	/* dsm_begin           */
    NULL,	/* dsm_abort           */
    NULL,	/* dsm_commit          */
    /* private -- used in datastore_db_*.c */
    NULL,	/* dsm_env_init         */
    NULL,	/* dsm_cleanup          */
    NULL,	/* dsm_cleanup_lite     */
    NULL,	/* dsm_get_env_dbe      */
    NULL,	/* dsm_database_name    */
    NULL,	/* dsm_recover_open     */
    NULL,	/* dsm_auto_commit_flags*/
    NULL,	/* dsm_get_rmw_flag     */
    NULL,	/* dsm_lock             */
    NULL,	/* dsm_common_close     */
    NULL,	/* dsm_sync             */
    NULL,	/* dsm_log_flush        */
    NULL,	/* dsm_pagesize         */
    NULL,	/* dsm_checkpoint       */
    NULL,	/* dsm_purgelogs        */
    NULL,	/* dsm_recover          */
    NULL,	/* dsm_remove           */
    NULL,	/* dsm_verify           */
    NULL,	/* dsm_list_logfiles    */
    NULL	/* dsm_leafpages        */
};

/* Function definitions */

static
YYYYMMDD time_to_date(YYYYMMDD days)
{
    time_t t = time(NULL) - days * 86400;
    struct tm *tm = localtime( &t );
    YYYYMMDD date = ((((tm->tm_year + (YYYYMMDD)1900) * 100 + tm->tm_mon + 1) * 100) + tm->tm_mday);
    return date;
}

YYYYMMDD string_to_date(const char *s)
{
    YYYYMMDD date = (YYYYMMDD) atol(s);
    if (date < 20020801 && date != 0) {
	date = time_to_date(date);
    }
    return date;
}

void set_date(YYYYMMDD date)
{
    today = date;
}

void set_today(void)
{
    today = time_to_date(0);
}

static void convert_external_to_internal(dsh_t *dsh, dbv_const_t *ex_data, dsv_t *in_data)
{
    size_t i = 0;
    const uint32_t *cv = (const uint32_t *)ex_data->data;

    in_data->spamcount = !dsh->is_swapped ? cv[i++] : swap_32bit(cv[i++]);

    if (ex_data->leng <= i * sizeof(uint32_t))
	in_data->goodcount = 0;
    else
	in_data->goodcount = !dsh->is_swapped ? cv[i++] : swap_32bit(cv[i++]);

    if (ex_data->leng <= i * sizeof(uint32_t))
	in_data->date = 0;
    else
	in_data->date = !dsh->is_swapped ? cv[i] : swap_32bit(cv[i]);

    return;
}

static void convert_internal_to_external(dsh_t *dsh, dsv_t *in_data, dbv_t *ex_data)
{
    size_t i = 0;
    uint32_t *cv = (uint32_t *)ex_data->data;

    /* Writing requires extra magic since the counts may need to be
    ** separated for output to different wordlists.
    */

    cv[i++] = !dsh->is_swapped ? in_data->spamcount : swap_32bit(in_data->spamcount);
    cv[i++] = !dsh->is_swapped ? in_data->goodcount : swap_32bit(in_data->goodcount);

    if (timestamp_tokens && in_data->date != 0)
	cv[i++] = !dsh->is_swapped ? in_data->date : swap_32bit(in_data->date);

    ex_data->leng = i * sizeof(cv[0]);

    return;
}

dsh_t *dsh_init(void *dbh)		/* database handle from db_open() */
{
    dsh_t *val = (dsh_t *)xmalloc(sizeof(*val));
    val->dbh = dbh;
    val->is_swapped = db_is_swapped(dbh);
    return val;
}

void dsh_free(void *vhandle)
{
    dsh_t *dsh = (dsh_t *)vhandle;
    xfree(dsh);
    return;
}

void *ds_open(void *dbe, bfpath *bfp, dbmode_t open_mode)
{
    dsh_t *dsh;
    void *v;

    v = db_open(dbe, bfp, open_mode); /* FIXME */

    if (!v)
	return NULL;

    dsh = dsh_init(v);

    if (db_created(v) && ! (open_mode & DS_LOAD) && (open_mode & DS_WRITE)) {
	if (DST_OK != ds_txn_begin(dsh))
	    exit(EX_ERROR);
	ds_set_wordlist_version(dsh, NULL);

	/* enforce use of default value */
	if (encoding == E_UNKNOWN)
	    encoding = E_DEFAULT;

	ds_set_wordlist_encoding(dsh, (int) encoding);
	if (DST_OK != ds_txn_commit(dsh))
	    exit(EX_ERROR);
    }

    return dsh;
}

void ds_close(/*@only@*/ void *vhandle)
{
    dsh_t *dsh = (dsh_t *)vhandle;
    db_close(dsh->dbh);
    xfree(dsh);
}

void ds_flush(void *vhandle)
{
    dsh_t *dsh = (dsh_t *)vhandle;
    db_flush(dsh->dbh);
}

int ds_read(void *vhandle, const word_t *word, /*@out@*/ dsv_t *val)
{
    int ret;
    dsh_t *dsh = (dsh_t *)vhandle;
    dbv_t ex_key;
    dbv_t ex_data;
    uint32_t cv[3];

    struct_init(ex_key);
    struct_init(ex_data);

    ex_key.data = word->u.text;
    ex_key.leng = word->leng;

    memset(val, 0, sizeof(*val));

    /* init ex_data inside loop since first db_get_value()
    ** call can change it and cause the second call to fail.
    */
    ex_data.data = cv;
    ex_data.leng = sizeof(cv);

    ret = db_get_dbvalue(dsh->dbh, &ex_key, &ex_data);

    switch (ret) {
    case 0:
	convert_external_to_internal(dsh, (dbv_const_t *)&ex_data, val);

	if (DEBUG_DATABASE(3)) {
	    fprintf(dbgout, "ds_read: [%.*s] -- %lu,%lu\n",
		    CLAMP_INT_MAX(word->leng), (const char *)word->u.text,
		    (unsigned long)val->spamcount,
		    (unsigned long)val->goodcount);
	}
	return 0;

    case DS_NOTFOUND:
	if (DEBUG_DATABASE(3)) {
	    fprintf(dbgout, "ds_read: [%.*s] not found\n", 
		    CLAMP_INT_MAX(word->leng), (char *) word->u.text);
	}
	return 1;

    case DS_ABORT_RETRY:
	if (DEBUG_DATABASE(1)) {
	    print_error(__FILE__, __LINE__, "ds_read('%.*s') was aborted to recover from a deadlock.",
		    CLAMP_INT_MAX(word->leng), (char *) word->u.text);
	}
	break;

    default:
	fprintf(dbgout, "ret=%d, DS_NOTFOUND=%d\n", ret, DS_NOTFOUND);
	print_error(__FILE__, __LINE__, "ds_read( '%.*s' ), err: %d, %s", 
		    CLAMP_INT_MAX(word->leng), (char *) word->u.text, ret, db_str_err(ret));
	exit(EX_ERROR);
    }

    return ret;
}

int ds_write(void *vhandle, const word_t *word, dsv_t *val)
{
    int ret = 0;
    dsh_t *dsh = (dsh_t *)vhandle;
    dbv_t ex_key;
    dbv_t ex_data;
    uint32_t cv[3];

    struct_init(ex_key);
    struct_init(ex_data);

    ex_key.data = word->u.text;
    ex_key.leng = word->leng;

    ex_data.data = cv;
    ex_data.leng = sizeof(cv);

    if (timestamp_tokens && today != 0)
	val->date = today;

    convert_internal_to_external(dsh, val, &ex_data);

    ret = db_set_dbvalue(dsh->dbh, &ex_key, &ex_data);

    if (DEBUG_DATABASE(3)) {
	fprintf(dbgout, "ds_write: [%.*s] -- %lu,%lu,%lu\n",
		CLAMP_INT_MAX(word->leng), (const char *)word->u.text,
		(unsigned long)val->spamcount,
		(unsigned long)val->goodcount,
		(unsigned long)val->date);
    }

    return ret;		/* 0 if ok */
}

int ds_delete(void *vhandle, const word_t *word)
{
    dsh_t *dsh = (dsh_t *)vhandle;
    int ret;
    dbv_t ex_key;

    struct_init(ex_key);
    ex_key.data = word->u.text;
    ex_key.leng = word->leng;

    ret = db_delete(dsh->dbh, &ex_key);

    return ret;		/* 0 if ok */
}

int ds_txn_begin(void *vhandle) {
    dsh_t *dsh = (dsh_t *)vhandle;
    if (dsm->dsm_begin == NULL)
	return 0;
    else
	return dsm->dsm_begin(dsh->dbh);
}

int ds_txn_abort(void *vhandle) {
    dsh_t *dsh = (dsh_t *)vhandle;
    if (dsm->dsm_abort == NULL)
	return 0;
    else
	return dsm->dsm_abort(dsh->dbh);
}

int ds_txn_commit(void *vhandle) {
    dsh_t *dsh = (dsh_t *)vhandle;
    if (dsm->dsm_commit == NULL)
	return 0;
    else
	return dsm->dsm_commit(dsh->dbh);
}

typedef struct {
    ds_foreach_t *hook;
    dsh_t	 *dsh;
    void         *data;
} ds_userdata_t;

static ex_t ds_hook(dbv_t *ex_key,
		    dbv_const_t *ex_data,
		    void *userdata)
{
    ex_t ret;
    word_t w_key;
    dsv_t in_data;
    ds_userdata_t *ds_data = (ds_userdata_t *)userdata;
    dsh_t *dsh = ds_data->dsh;

    w_key.u.text = (byte *)ex_key->data;
    w_key.leng = ex_key->leng;

    memset(&in_data, 0, sizeof(in_data));
    convert_external_to_internal(dsh, ex_data, &in_data);

    ret = (*ds_data->hook)(&w_key, &in_data, ds_data->data);

    return ret;		/* EX_OK if ok */
}

ex_t ds_foreach(void *vhandle, ds_foreach_t *hook, void *userdata)
{
    dsh_t *dsh = (dsh_t *)vhandle;
    ex_t ret;
    ds_userdata_t ds_data;
    ds_data.hook = hook;
    ds_data.dsh  = dsh;
    ds_data.data = userdata;

    ret = db_foreach(dsh->dbh, ds_hook, &ds_data);

    return ret;
}

/* Wrapper for ds_foreach that opens and closes file */

ex_t ds_oper(void *env, bfpath *bfp, dbmode_t open_mode, 
	     ds_foreach_t *hook, void *userdata)
{
    ex_t ret = EX_OK;
    void *dsh;
    
    dsh = ds_open(env, bfp, open_mode);

    if (dsh == NULL) {
	fprintf(stderr, "Can't open file '%s'\n", bfp->filepath);
	exit(EX_ERROR);
    }

    if (DST_OK == ds_txn_begin(dsh)) {
	if (ret == EX_OK)
	    ret = ds_foreach(dsh, hook, userdata);

	if (ret != EX_OK) { ds_txn_abort(dsh); }
	else
	    if (ds_txn_commit(dsh) != DST_OK)
		ret = EX_ERROR;
    }

    ds_close(dsh);

    return ret;
}

static word_t  *msg_count_tok;
static word_t  *wordlist_version_tok;
static word_t  *wordlist_encoding_tok;

void *ds_init(bfpath *bfp)
{
    void *dbe;

    dbe = dbe_init(bfp);

    if (dsm == NULL)
	dsm = &dsm_dummies;

    if (msg_count_tok == NULL) {
	msg_count_tok = word_news(MSG_COUNT);
    }

    if (wordlist_version_tok == NULL) {
	wordlist_version_tok = word_news(WORDLIST_VERSION);
    }

    if (wordlist_encoding_tok == NULL) {
	wordlist_encoding_tok = word_news(WORDLIST_ENCODING);
    }

    return dbe;
}

/* Cleanup storage allocation */
void ds_cleanup(void *dbe)
{
    if (dsm->dsm_cleanup != NULL)
	dsm->dsm_cleanup((dbe_t *)dbe);
    xfree(msg_count_tok);
    xfree(wordlist_version_tok);
    msg_count_tok = NULL;
    wordlist_version_tok = NULL;
}

/*
  Get the number of messages associated with database.
*/
int ds_get_msgcounts(void *vhandle, dsv_t *val)
{
    dsh_t *dsh = (dsh_t *)vhandle;

    return ds_read(dsh, msg_count_tok, val);
}

/*
 Set the number of messages associated with database.
*/
int  ds_set_msgcounts(void *vhandle, dsv_t *val)
{
    dsh_t *dsh = (dsh_t *)vhandle;

    val->date = today;

    return ds_write(dsh, msg_count_tok, val);
}

void *ds_get_dbenv(void *vhandle)
{
    dsh_t *dsh = (dsh_t *)vhandle;
    return db_get_env(dsh->dbh);
}

/*
  Get the wordlist encoding associated with database.
*/
int ds_get_wordlist_encoding(void *vhandle, dsv_t *val)
{
    dsh_t *dsh = (dsh_t *)vhandle;

    return ds_read(dsh, wordlist_encoding_tok, val);
}

/*
 Set the wordlist encoding associated with database.
*/
int ds_set_wordlist_encoding(void *vhandle, int enc)
{
    dsh_t *dsh = (dsh_t *)vhandle;
    dsv_t  val;

    val.count[0] = enc;
    val.count[1] = 0;
    val.date = today;

    return ds_write(dsh, wordlist_encoding_tok, &val);
}

/*
  Get the wordlist version associated with database.
*/
int ds_get_wordlist_version(void *vhandle, dsv_t *val)
{
    dsh_t *dsh = (dsh_t *)vhandle;

    return ds_read(dsh, wordlist_version_tok, val);
}

/*
 Set the wordlist version associated with database.
*/
int ds_set_wordlist_version(void *vhandle, dsv_t *val)
{
    dsh_t *dsh = (dsh_t *)vhandle;
    dsv_t  tmp;

    if (val == NULL)
    {
	val = &tmp;
	val->count[0] = CURRENT_VERSION;
	val->count[1] = 0;
    }

    val->date = today;

    return ds_write(dsh, wordlist_version_tok, val);
}

const char *ds_version_str(void)
{
    return db_version_str();
}

ex_t ds_recover(bfpath *bfp, bool catastrophic)
{
    if (dsm->dsm_recover == NULL)
	return EX_OK;
    else
	return dsm->dsm_recover(bfp, catastrophic, true);
}

ex_t ds_remove(bfpath *bfp)
{
    if (dsm->dsm_remove == NULL)
	return EX_OK;
    else
	return dsm->dsm_remove(bfp);
}

ex_t ds_checkpoint(bfpath *bfp)
{
    if (dsm->dsm_checkpoint == NULL)
	return EX_OK;
    else
	return dsm->dsm_checkpoint(bfp);
}

ex_t ds_purgelogs(bfpath *bfp)
{
    if (dsm->dsm_purgelogs == NULL)
	return EX_OK;
    else
	return dsm->dsm_purgelogs(bfp);
}

ex_t ds_verify(bfpath *bfp)
{
    if (dsm->dsm_verify == NULL)
	return EX_OK;
    else
	return dsm->dsm_verify(bfp);
}

u_int32_t ds_leafpages(bfpath *bfp)
{
    if (dsm->dsm_leafpages == NULL)
	return 0;
    else
	return dsm->dsm_leafpages(bfp);
}

u_int32_t ds_pagesize(bfpath *bfp)
{
    if (dsm->dsm_pagesize == NULL)
	return 0;
    else
	return dsm->dsm_pagesize(bfp);
}

ex_t ds_list_logfiles(bfpath *bfp, int argc, char **argv)
{
    if (dsm->dsm_list_logfiles == NULL)
	return EX_OK;
    else
	return dsm->dsm_list_logfiles(bfp, argc, argv);
}