#include "common.h"
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <assert.h>
#include "bogofilter.h"
#include "datastore.h"
#include "msgcounts.h"
#include "mxcat.h"
#include "paths.h"
#include "rand_sleep.h"
#include "wordlists.h"
#include "xmalloc.h"
#include "xstrdup.h"
#include "bsdqueue.h"
#define MIN_SLEEP 0.5e+3 /* 0.5 milliseconds */
#define MAX_SLEEP 2.0e+6 /* 2.0 seconds */
/* Function Definitions */
#define MAX_ENVS 10
/* Idea: - build dirname
* - search list of known environments if we have that dirname,
* if so, return it
* - create and insert new environment
*
* for destruction: - when closed an environment, NULL it in word_lists
* past the current one that match our address
*/
static LIST_HEAD(envlist, envnode) envs;
struct envlist envlisthead;
struct envnode {
LIST_ENTRY(envnode) entries;
void *dbe;
char directory[1];
};
static void *list_searchinsert(bfpath *bfp)
{
uint l;
struct envnode *i, *n;
for (i = envlisthead.lh_first ; i ; i = i->entries.le_next) {
if (strcmp(bfp->dirname, &i->directory[0]) == 0)
return i->dbe;
}
l = strlen(bfp->dirname) + 1;
n = (struct envnode *)xmalloc(sizeof(struct envnode) + l);
n->dbe = ds_init(bfp);
memcpy(&n->directory[0], bfp->dirname, l);
LIST_INSERT_HEAD(&envlisthead, n, entries);
return n->dbe;
}
void begin_wordlist(wordlist_t *list)
{
dsv_t val;
while (1) {
if (ds_txn_begin(list->dsh)) {
rand_sleep(1000,1000000);
continue;
}
switch (ds_get_msgcounts(list->dsh, &val)) {
case 0:
case 1:
list->msgcount[IX_GOOD] = val.goodcount;
list->msgcount[IX_SPAM] = val.spamcount;
if (wordlist_version == 0 &&
ds_get_wordlist_version(list->dsh, &val) == 0)
wordlist_version = val.count[0];
break;
case DS_ABORT_RETRY:
continue;
}
switch (ds_get_wordlist_encoding(list->dsh, &val)) {
case 0: /* found */
list->encoding = (e_enc)val.spamcount; /* FIXME: is the cast correct? */
break;
case 1: /* not found */
break;
case DS_ABORT_RETRY:
continue;
}
break;
}
}
static bool open_wordlist(wordlist_t *list, dbmode_t mode)
{
bool retry = false;
void *dbe;
bfpath *bfp = list->bfp;
if (list->dsh != NULL) /* Only need to open the list once. */
return retry;
if (list->type == WL_IGNORE) /* open ignore list in read-only mode */
mode = DS_READ;
/* FIXME: create or reuse environment from filepath */
dbe = list_searchinsert(bfp);
if (dbe == NULL)
exit(EX_ERROR);
list->dsh = (dsh_t *)ds_open(dbe, bfp, mode); /* FIXME -- euh, what is here to fix? */
if (list->dsh == NULL) {
int err = errno;
close_wordlists(false); /* unlock and close */
switch (err) {
/* F_SETLK can't obtain lock */
case EAGAIN:
#ifdef __EMX__
case EACCES:
#endif
rand_sleep(MIN_SLEEP, MAX_SLEEP);
retry = true;
break;
default:
if (query) /* If "-Q", failure is OK */
return false;
fprintf(stderr,
"Can't open file '%s' in directory '%s'.\n",
bfp->filename, bfp->dirname);
if (err != 0)
fprintf(stderr,
"error #%d - %s.\n", err, strerror(err));
/* print error and exit */
wordlist_error(err);
} /* switch */
} else { /* ds_open */
begin_wordlist(list);
}
if (list->encoding != E_UNKNOWN) {
if (encoding == E_UNKNOWN)
encoding = list->encoding;
if (encoding != list->encoding) {
fprintf(stderr, "Can't mix database encodings, i.e. utf-8 and any other.\n");
exit(EX_ERROR);
}
}
return retry;
}
/* Note: seems like some cleanup around here should be possible....
** It seems wrong to call set_bogohome() from outside of paths.c.
** (DR 04/03/05)
*/
static void check_wordlist_path(wordlist_t *list)
{
bfpath *bfp = list->bfp;
bfpath_mode mode;
if (((run_type & UNREG_SPAM) != 0) ||
((run_type & UNREG_GOOD) != 0))
mode = BFP_MUST_EXIST;
else
mode = BFP_MAY_CREATE;
bfpath_check_mode(bfp, mode);
bfpath_set_bogohome(bfp);
}
/* set bogohome using first wordlist's directory */
void set_wordlist_directory(void)
{
wordlist_t *list;
/* return if bogohome already set */
if (bogohome != NULL)
return;
list = word_lists; /* get first wordlist */
if (list != NULL) {
bfpath *bfp = list->bfp;
char *dir = get_directory_from_path(bfp->filepath);
if (dir != NULL) {
check_wordlist_path(list);
set_bogohome(dir);
free(dir);
}
}
}
void open_wordlists(dbmode_t mode)
{
wordlist_t *list;
bool retry = true;
/* set default wordlist if none specified */
if (word_lists == NULL)
init_wordlist("word", WORDLIST, 0, WL_REGULAR);
/* set bogohome using first wordlist's directory */
set_wordlist_directory();
chk_bogohome();
/* add bogohome value to path structs */
for (list = word_lists; list != NULL; list = list->next)
bfpath_set_bogohome(list->bfp);
LIST_INIT(&envs);
while (retry) {
retry = false;
for (list = word_lists; list != NULL ; list = list->next) {
check_wordlist_path(list);
retry |= open_wordlist(list, list->type == WL_IGNORE ? DS_READ : mode);
if (list == word_lists && encoding == E_UNKNOWN)
encoding = list->encoding;
if (list->encoding != E_UNKNOWN &&
list->encoding != encoding) {
fprintf(stderr, "Can't mix database encodings, i.e. utf-8 and any other.\n");
exit(EX_ERROR);
}
}
}
}
/** close all open word lists */
bool close_wordlists(bool commit /** if unset, abort */)
/* FIXME: we really need to look at the list's environments */
{
bool err = false;
wordlist_t *list;
struct envnode *i;
for (list = word_lists; list != NULL ; list = list->next) {
void *vhandle = list->dsh;
list->dsh = NULL;
if (vhandle) {
if (commit) {
if (ds_txn_commit(vhandle))
err = true;
} else {
(void)ds_txn_abort(vhandle);
}
ds_close(vhandle);
}
}
while ((i = envlisthead.lh_first)) {
ds_cleanup(i->dbe);
LIST_REMOVE(i, entries);
xfree(i);
}
return err;
}
#ifdef COMPILE_DEAD_CODE
/* some sanity checking of lists is needed because we may
allow users to specify lists eventually and there are
opportunities to generate divide-by-zero exceptions or
follow bogus pointers. */
static void sanitycheck_lists(void)
{
wordlist_t* list=word_lists;
int listcount=0;
while (1) {
if (list == NULL) break;
if (list->name == NULL) {
fprintf(stderr, "A list has no name.\n");
exit(EX_ERROR);
}
if (list->msgcount==0) {
fprintf(stderr, "list %s has zero message count.\n", list->name);
exit(EX_ERROR);
}
listcount++;
list=list->next;
}
if (0==listcount) {
fprintf(stderr, "No wordlists available!\n");
exit(EX_ERROR);
}
if (DEBUG_WORDLIST(1))
fprintf(dbgout, "%d lists look OK.\n", listcount);
}
#endif
/** skips to the end of a work (comma, blank, tab separated), replaces
* the delimiter by 0, skips over whitespace, and returns a pointer to
* the next character. */
static char *spanword(char *p)
{
const char *delim = ", \t";
p += strcspn(p, delim); /* skip to end of word */
if (*p) {
*p++ = '\0';
p += strspn(p, " \t"); /* skip trailing whitespace */
}
return p;
}
/* type - 'n', or 'i' (normal or ignore)
* name - 'user', 'system', 'ignore', etc
* path - 'wordlist.db', 'ignorelist.db', etc
* override - 1,2,...
*/
/* returns true for success, false for error */
bool configure_wordlist(const char *val)
{
char ch;
WL_TYPE type;
char* listname;
char* filename;
int precedence;
char *tmp = xstrdup(val);
char *ftmp = tmp;
ch= tmp[0]; /* save wordlist type (good/spam) */
tmp = spanword(tmp);
switch (toupper((unsigned char)ch))
{
case 'R':
type = WL_REGULAR;
break;
case 'I':
type = WL_IGNORE;
break;
default:
fprintf( stderr, "Unknown wordlist type - '%c'\n", ch);
xfree(ftmp);
return (false);
}
listname=tmp; /* name of wordlist */
tmp = spanword(tmp);
filename=tmp; /* path to wordlist */
tmp = spanword(tmp);
precedence=atoi(tmp);
(void)spanword(tmp);
init_wordlist(listname, filename, precedence, type);
xfree(ftmp);
return true;
}
/* print error and exit */
void wordlist_error(int err)
{
if (err == ENOENT)
fprintf(stderr,
"\n"
"Remember to register some spam and ham messages before you\n"
"use bogofilter to evaluate mail for its probable spam status!\n");
if (err == EINVAL)
fprintf(stderr,
"\n"
"Make sure that the database version this program is linked against\n"
"can handle the format of the data base file (after updates in particular).\n");
exit(EX_ERROR);
}