/*
Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "glusterfs/common-utils.h"
#include "glusterfs/trie.h"
#define DISTANCE_EDIT 1
#define DISTANCE_INS 1
#define DISTANCE_DEL 1
struct trienode {
char id;
char eow;
int depth;
void *data;
struct trie *trie;
struct trienode *parent;
struct trienode *subnodes[255];
};
struct trie {
struct trienode root;
int nodecnt;
size_t len;
};
trie_t *
trie_new()
{
trie_t *trie = NULL;
trie = GF_CALLOC(1, sizeof(*trie), gf_common_mt_trie_trie);
if (!trie)
return NULL;
trie->root.trie = trie;
return trie;
}
static trienode_t *
trie_subnode(trienode_t *node, int id)
{
trienode_t *subnode = NULL;
subnode = node->subnodes[id];
if (!subnode) {
subnode = GF_CALLOC(1, sizeof(*subnode), gf_common_mt_trie_node);
if (!subnode)
return NULL;
subnode->id = id;
subnode->depth = node->depth + 1;
node->subnodes[id] = subnode;
subnode->parent = node;
subnode->trie = node->trie;
node->trie->nodecnt++;
}
return subnode;
}
int
trie_add(trie_t *trie, const char *dword)
{
trienode_t *node = NULL;
int i = 0;
char id = 0;
trienode_t *subnode = NULL;
node = &trie->root;
for (i = 0; i < strlen(dword); i++) {
id = dword[i];
subnode = trie_subnode(node, id);
if (!subnode)
return -1;
node = subnode;
}
node->eow = 1;
return 0;
}
static void
trienode_free(trienode_t *node)
{
trienode_t *trav = NULL;
int i = 0;
for (i = 0; i < 255; i++) {
trav = node->subnodes[i];
if (trav)
trienode_free(trav);
}
GF_FREE(node->data);
GF_FREE(node);
}
void
trie_destroy(trie_t *trie)
{
trienode_free((trienode_t *)trie);
}
void
trie_destroy_bynode(trienode_t *node)
{
trie_destroy(node->trie);
}
static int
trienode_walk(trienode_t *node, int (*fn)(trienode_t *node, void *data),
void *data, int eowonly)
{
trienode_t *trav = NULL;
int i = 0;
int cret = 0;
int ret = 0;
if (!eowonly || node->eow)
ret = fn(node, data);
if (ret)
goto out;
for (i = 0; i < 255; i++) {
trav = node->subnodes[i];
if (!trav)
continue;
cret = trienode_walk(trav, fn, data, eowonly);
if (cret < 0) {
ret = cret;
goto out;
}
ret += cret;
}
out:
return ret;
}
static int
trie_walk(trie_t *trie, int (*fn)(trienode_t *node, void *data), void *data,
int eowonly)
{
return trienode_walk(&trie->root, fn, data, eowonly);
}
static void
print_node(trienode_t *node, char **buf)
{
if (!node->parent)
return;
if (node->parent) {
print_node(node->parent, buf);
*(*buf)++ = node->id;
}
}
int
trienode_get_word(trienode_t *node, char **bufp)
{
char *buf = NULL;
buf = GF_CALLOC(1, node->depth + 1, gf_common_mt_trie_buf);
if (!buf)
return -1;
*bufp = buf;
print_node(node, &buf);
return 0;
}
static int
calc_dist(trienode_t *node, void *data)
{
const char *word = NULL;
int i = 0;
int *row = NULL;
int *uprow = NULL;
int distu = 0;
int distl = 0;
int distul = 0;
word = data;
node->data = GF_CALLOC(node->trie->len, sizeof(int),
gf_common_mt_trie_data);
if (!node->data)
return -1;
row = node->data;
if (!node->parent) {
for (i = 0; i < node->trie->len; i++)
row[i] = i + 1;
return 0;
}
uprow = node->parent->data;
distu = node->depth; /* up node */
distul = node->parent->depth; /* up-left node */
for (i = 0; i < node->trie->len; i++) {
distl = uprow[i]; /* left node */
if (word[i] == node->id)
row[i] = distul;
else
row[i] = min((distul + DISTANCE_EDIT),
min((distu + DISTANCE_DEL), (distl + DISTANCE_INS)));
distu = row[i];
distul = distl;
}
return 0;
}
int
trienode_get_dist(trienode_t *node)
{
int *row = NULL;
row = node->data;
return row[node->trie->len - 1];
}
struct trienodevec_w {
struct trienodevec *vec;
const char *word;
};
static void
trienodevec_clear(struct trienodevec *nodevec)
{
memset(nodevec->nodes, 0, sizeof(*nodevec->nodes) * nodevec->cnt);
}
static int
collect_closest(trienode_t *node, void *data)
{
struct trienodevec_w *nodevec_w = NULL;
struct trienodevec *nodevec = NULL;
int dist = 0;
int i = 0;
nodevec_w = data;
nodevec = nodevec_w->vec;
if (calc_dist(node, (void *)nodevec_w->word))
return -1;
if (!node->eow || !nodevec->cnt)
return 0;
dist = trienode_get_dist(node);
/*
* I thought that when descending further after some dictionary word dw,
* if we see that child's distance is bigger than it was for dw, then we
* can prune this branch, as it can contain only worse nodes.
*
* This conjecture fails, see eg:
*
* d("AB", "B") = 1;
* d("AB", "BA") = 2;
* d("AB", "BAB") = 1;
*
* -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
* miss "BAB".
*
* (example courtesy of Richard Bann <richardbann at gmail.com>)
if (node->parent->eow && dist > trienode_get_dist (node->parent))
return 1;
*/
if (nodevec->nodes[0] && dist < trienode_get_dist(nodevec->nodes[0])) {
/* improving over the findings so far */
trienodevec_clear(nodevec);
nodevec->nodes[0] = node;
} else if (!nodevec->nodes[0] ||
dist == trienode_get_dist(nodevec->nodes[0])) {
/* as good as the best so far, add if there is free space */
for (i = 0; i < nodevec->cnt; i++) {
if (!nodevec->nodes[i]) {
nodevec->nodes[i] = node;
break;
}
}
}
return 0;
}
int
trie_measure(trie_t *trie, const char *word, trienode_t **nodes, int nodecnt)
{
struct trienodevec nodevec = {
0,
};
nodevec.nodes = nodes;
nodevec.cnt = nodecnt;
return trie_measure_vec(trie, word, &nodevec);
}
int
trie_measure_vec(trie_t *trie, const char *word, struct trienodevec *nodevec)
{
struct trienodevec_w nodevec_w = {
0,
};
int ret = 0;
trie->len = strlen(word);
trienodevec_clear(nodevec);
nodevec_w.vec = nodevec;
nodevec_w.word = word;
ret = trie_walk(trie, collect_closest, &nodevec_w, 0);
if (ret > 0)
ret = 0;
return ret;
}
static int
trienode_reset(trienode_t *node, void *data)
{
GF_FREE(node->data);
return 0;
}
void
trie_reset_search(trie_t *trie)
{
trie->len = 0;
trie_walk(trie, trienode_reset, NULL, 0);
}