/* dzl-trie.c
*
* Copyright (C) 2012 Christian Hergert <christian@hergert.me>
*
* This file is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This file is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#include <string.h>
#include <glib/gi18n.h>
#include "search/dzl-trie.h"
#include "util/dzl-macros.h"
/**
* SECTION:trie
* @title: DzlTrie
* @short_description: A generic prefix tree.
*
* The #DzlTrie struct and its associated functions provide a DzlTrie data structure,
* where nodes in the tree can contain arbitrary data.
*
* To create a new #DzlTrie use dzl_trie_new(). You can free it with dzl_trie_free().
* To insert a key and value pair into the #DzlTrie use dzl_trie_insert().
* To remove a key from the #DzlTrie use dzl_trie_remove().
* To traverse all children of the #DzlTrie from a given key use dzl_trie_traverse().
*/
typedef struct _DzlTrieNode DzlTrieNode;
typedef struct _DzlTrieNodeChunk DzlTrieNodeChunk;
G_DEFINE_BOXED_TYPE (DzlTrie, dzl_trie, dzl_trie_ref, dzl_trie_unref)
/*
* Try to optimize for 64 bit vs 32 bit pointers. We are assuming that the
* 32 bit also has a 32 byte cacheline. This is very likely not the case
* everwhere, such as x86_64 compiled with -m32. However, accomidating that
* will require some changes to the layout of each chunk.
*/
#if GLIB_SIZEOF_VOID_P == 8
# define FIRST_CHUNK_KEYS 4
# define TRIE_NODE_SIZE 64
# define TRIE_NODE_CHUNK_SIZE 64
# define TRIE_NODE_CHUNK_KEYS(c) (((c)->is_inline) ? 4 : 6)
#else
# define FIRST_CHUNK_KEYS 3
# define TRIE_NODE_SIZE 32
# define TRIE_NODE_CHUNK_SIZE 32
# define TRIE_NODE_CHUNK_KEYS(c) (((c)->is_inline) ? 3 : 5)
#endif
/**
* DzlTrieNodeChunk:
* @flags: Flags describing behaviors of the DzlTrieNodeChunk.
* @count: The number of items added to this chunk.
* @keys: The keys for @children.
* @next: The next #DzlTrieNodeChunk if there is one.
* @children: The children #DzlTrieNodeChunk or %NULL. If the chunk is
* inline the DzlTrieNode, then there will be fewer items.
*/
DZL_ALIGNED_BEGIN(1)
struct _DzlTrieNodeChunk
{
DzlTrieNodeChunk *next;
gboolean is_inline : 1;
guint flags : 7;
guint count : 8;
guint8 keys[6];
DzlTrieNode *children[0];
} DZL_ALIGNED_END(1);
/**
* DzlTrieNode:
* @parent: The parent #DzlTrieNode. When a node is destroyed, it may need to
* walk up to the parent node and unlink itself.
* @value: A pointer to the user provided value, or %NULL.
* @chunk: The first chunk in the chain. Inline chunks have fewer children
* elements than extra allocated chunks so that they are cache aligned.
*/
DZL_ALIGNED_BEGIN(1)
struct _DzlTrieNode
{
DzlTrieNode *parent;
gpointer value;
DzlTrieNodeChunk chunk;
} DZL_ALIGNED_END(1);
/**
* DzlTrie:
* @value_destroy: A #GDestroyNotify to free data pointers.
* @root: The root DzlTrieNode.
*/
struct _DzlTrie
{
volatile gint ref_count;
GDestroyNotify value_destroy;
DzlTrieNode *root;
};
#if GLIB_SIZEOF_VOID_P == 8
G_STATIC_ASSERT (sizeof(gpointer) == 8);
G_STATIC_ASSERT ((FIRST_CHUNK_KEYS-1) == 3);
G_STATIC_ASSERT (((FIRST_CHUNK_KEYS-1) * sizeof(gpointer)) == 24);
G_STATIC_ASSERT(sizeof(DzlTrieNode) == 32);
G_STATIC_ASSERT(sizeof(DzlTrieNodeChunk) == 16);
#else
G_STATIC_ASSERT (sizeof(gpointer) == 4);
G_STATIC_ASSERT ((FIRST_CHUNK_KEYS-1) == 2);
G_STATIC_ASSERT (((FIRST_CHUNK_KEYS-1) * sizeof(gpointer)) == 8);
G_STATIC_ASSERT(sizeof(DzlTrieNode) == 20);
G_STATIC_ASSERT(sizeof(DzlTrieNodeChunk) == 12);
#endif
/**
* dzl_trie_malloc0:
* @trie: A #DzlTrie
* @size: Number of bytes to allocate.
*
* Wrapper function to allocate a memory chunk. This gives us somewhere to
* perform the abstraction if we want to use mmap()'d I/O at some point.
* The memory will be zero'd before being returned.
*
* Returns: A pointer to the allocation.
*/
static gpointer
dzl_trie_malloc0 (DzlTrie *trie,
gsize size)
{
return g_malloc0(size);
}
/**
* dzl_trie_free:
* @trie: A #DzlTrie.
* @data: The data to free.
*
* Frees a portion of memory allocated by @trie.
*/
static void
dzl_trie_free (DzlTrie *trie,
gpointer data)
{
g_free(data);
}
/**
* dzl_trie_node_new:
* @trie: A #DzlTrie.
* @parent: The nodes parent or %NULL.
*
* Create a new node that can be placed in a DzlTrie. The node contains a chunk
* embedded in it that may contain only 4 pointers instead of the full 6 do
* to the overhead of the DzlTrieNode itself.
*
* Returns: A newly allocated DzlTrieNode that should be freed with g_free().
*/
DzlTrieNode *
dzl_trie_node_new (DzlTrie *trie,
DzlTrieNode *parent)
{
DzlTrieNode *node;
node = dzl_trie_malloc0(trie, TRIE_NODE_SIZE);
node->chunk.is_inline = TRUE;
node->parent = parent;
return node;
}
/**
* dzl_trie_node_chunk_is_full:
* @chunk: A #DzlTrieNodeChunk.
*
* Checks to see if all children slots are full in @chunk.
*
* Returns: %TRUE if there are no free slots in @chunk.
*/
G_INLINE_FUNC gboolean
dzl_trie_node_chunk_is_full (DzlTrieNodeChunk *chunk)
{
g_assert(chunk);
return (chunk->count == TRIE_NODE_CHUNK_KEYS(chunk));
}
/**
* dzl_trie_node_chunk_new:
* @trie: The DzlTrie that the chunk belongs to.
*
* Creates a new #DzlTrieNodeChunk with empty state.
*
* Returns: (transfer full): A #DzlTrieNodeChunk.
*/
DzlTrieNodeChunk *
dzl_trie_node_chunk_new (DzlTrie *trie)
{
return dzl_trie_malloc0(trie, TRIE_NODE_CHUNK_SIZE);
}
/**
* dzl_trie_append_to_node:
* @chunk: A #DzlTrieNodeChunk.
* @key: The key to append.
* @child: A #DzlTrieNode to append.
*
* Appends @child to the chunk. If there is not room in the chunk,
* then a new chunk will be added and append to @chunk.
*
* @chunk MUST be the last chunk in the chain (therefore chunk->next
* is %NULL).
*/
static void
dzl_trie_append_to_node (DzlTrie *trie,
DzlTrieNode *node,
DzlTrieNodeChunk *chunk,
guint8 key,
DzlTrieNode *child)
{
g_assert(trie);
g_assert(node);
g_assert(chunk);
g_assert(child);
if (dzl_trie_node_chunk_is_full(chunk)) {
chunk->next = dzl_trie_node_chunk_new(trie);
chunk = chunk->next;
}
chunk->keys[chunk->count] = key;
chunk->children[chunk->count] = child;
chunk->count++;
}
/**
* dzl_trie_node_move_to_front:
* @node: A #DzlTrieNode.
* @chunk: A #DzlTrieNodeChunk.
* @idx: The index of the item to move.
*
* Moves the key and child found at @idx to the beginning of the first chunk
* to achieve better cache locality.
*/
static void
dzl_trie_node_move_to_front (DzlTrieNode *node,
DzlTrieNodeChunk *chunk,
guint idx)
{
DzlTrieNodeChunk *first;
DzlTrieNode *child;
guint8 offset;
guint8 key;
g_assert(node);
g_assert(chunk);
first = &node->chunk;
key = chunk->keys[idx];
child = chunk->children[idx];
offset = ((first == chunk) ? first->count : FIRST_CHUNK_KEYS) - 1;
chunk->keys[idx] = first->keys[offset];
chunk->children[idx] = first->children[offset];
memmove(&first->keys[1], &first->keys[0], (FIRST_CHUNK_KEYS-1));
memmove(&first->children[1], &first->children[0], (FIRST_CHUNK_KEYS-1) * sizeof(DzlTrieNode *));
first->keys[0] = key;
first->children[0] = child;
}
/**
* dzl_trie_find_node:
* @trie: The #DzlTrie we are searching.
* @node: A #DzlTrieNode.
* @key: The key to find in this node.
*
* Searches the chunk chain of the current node for the key provided. If
* found, the child node for that key is returned.
*
* Returns: (transfer none): A #DzlTrieNode or %NULL.
*/
static DzlTrieNode *
dzl_trie_find_node (DzlTrie *trie,
DzlTrieNode *node,
guint8 key)
{
DzlTrieNodeChunk *iter;
guint i;
g_assert(node);
for (iter = &node->chunk; iter; iter = iter->next) {
for (i = 0; i < iter->count; i++) {
if (iter->keys[i] == key) {
if (iter != &node->chunk) {
dzl_trie_node_move_to_front(node, iter, i);
__builtin_prefetch(node->chunk.children[0]);
return node->chunk.children[0];
}
__builtin_prefetch(iter->children[i]);
return iter->children[i];
}
}
}
return NULL;
}
/**
* dzl_trie_find_or_create_node:
* @trie: A #DzlTrie.
* @node: A #DzlTrieNode.
* @key: The key to insert.
*
* Attempts to find key within @node. If @key is not found, it is added to the
* node. The child for the key is returned.
*
* Returns: (transfer none): The child #DzlTrieNode for @key.
*/
static DzlTrieNode *
dzl_trie_find_or_create_node (DzlTrie *trie,
DzlTrieNode *node,
guint8 key)
{
DzlTrieNodeChunk *iter;
DzlTrieNodeChunk *last = NULL;
guint i;
g_assert(node);
for (iter = &node->chunk; iter; iter = iter->next) {
for (i = 0; i < iter->count; i++) {
if (iter->keys[i] == key) {
if (iter != &node->chunk) {
dzl_trie_node_move_to_front(node, iter, i);
__builtin_prefetch(node->chunk.children[0]);
return node->chunk.children[0];
}
__builtin_prefetch(iter->children[i]);
return iter->children[i];
}
}
last = iter;
}
g_assert(last);
node = dzl_trie_node_new(trie, node);
dzl_trie_append_to_node(trie, node->parent, last, key, node);
return node;
}
/**
* dzl_trie_node_remove_fast:
* @node: A #DzlTrieNode.
* @chunk: A #DzlTrieNodeChunk.
* @idx: The child within the chunk.
*
* Removes child at index @idx from the chunk. The last item in the
* chain of chunks will be moved to the slot indicated by @idx.
*/
G_INLINE_FUNC void
dzl_trie_node_remove_fast (DzlTrieNode *node,
DzlTrieNodeChunk *chunk,
guint idx)
{
DzlTrieNodeChunk *iter;
g_assert(node);
g_assert(chunk);
for (iter = chunk; iter->next && iter->next->count; iter = iter->next) { }
g_assert(iter->count);
chunk->keys[idx] = iter->keys[iter->count-1];
chunk->children[idx] = iter->children[iter->count-1];
iter->count--;
iter->keys[iter->count] = '\0';
iter->children[iter->count] = NULL;
}
/**
* dzl_trie_node_unlink:
* @node: A #DzlTrieNode.
*
* Unlinks @node from the DzlTrie. The parent node has its pointer to @node
* removed.
*/
static void
dzl_trie_node_unlink (DzlTrieNode *node)
{
DzlTrieNodeChunk *iter;
DzlTrieNode *parent;
guint i;
g_assert(node);
if ((parent = node->parent)) {
node->parent = NULL;
for (iter = &parent->chunk; iter; iter = iter->next) {
for (i = 0; i < iter->count; i++) {
if (iter->children[i] == node) {
dzl_trie_node_remove_fast(node, iter, i);
g_assert(iter->children[i] != node);
return;
}
}
}
}
}
/**
* dzl_trie_destroy_node:
* @trie: A #DzlTrie.
* @node: A #DzlTrieNode.
* @value_destroy: A #GDestroyNotify or %NULL.
*
* Removes @node from the #DzlTrie and releases all memory associated with it.
* If the nodes value is set, @value_destroy will be called to release it.
*
* The reclaimation happens as such:
*
* 1) the node is unlinked from its parent.
* 2) each of the children are destroyed, leaving us an empty chain.
* 3) each of the allocated chain links are freed.
* 4) the value pointer is freed.
* 5) the structure itself is freed.
*/
static void
dzl_trie_destroy_node (DzlTrie *trie,
DzlTrieNode *node,
GDestroyNotify value_destroy)
{
DzlTrieNodeChunk *iter;
DzlTrieNodeChunk *tmp;
g_assert(node);
dzl_trie_node_unlink(node);
while (node->chunk.count) {
dzl_trie_destroy_node(trie, node->chunk.children[0], value_destroy);
}
for (iter = node->chunk.next; iter;) {
tmp = iter;
iter = iter->next;
dzl_trie_free(trie, tmp);
}
if (node->value && value_destroy) {
value_destroy(node->value);
}
dzl_trie_free(trie, node);
}
/**
* dzl_trie_new:
* @value_destroy: A #GDestroyNotify, or %NULL.
*
* Creates a new #DzlTrie. When a value is removed from the trie, @value_destroy
* will be called to allow you to release any resources.
*
* Returns: (transfer full): A newly allocated #DzlTrie that should be freed
* with dzl_trie_unref().
*/
DzlTrie *
dzl_trie_new (GDestroyNotify value_destroy)
{
DzlTrie *trie;
trie = g_new0(DzlTrie, 1);
trie->ref_count = 1;
trie->root = dzl_trie_node_new(trie, NULL);
trie->value_destroy = value_destroy;
return trie;
}
/**
* dzl_trie_insert:
* @trie: A #DzlTrie.
* @key: The key to insert.
* @value: The value to insert.
*
* Inserts @value into @trie located with @key.
*/
void
dzl_trie_insert (DzlTrie *trie,
const gchar *key,
gpointer value)
{
DzlTrieNode *node;
g_return_if_fail(trie);
g_return_if_fail(key);
g_return_if_fail(value);
node = trie->root;
while (*key) {
node = dzl_trie_find_or_create_node(trie, node, *key);
key++;
}
if (node->value && trie->value_destroy) {
trie->value_destroy(node->value);
}
node->value = value;
}
/**
* dzl_trie_lookup:
* @trie: A #DzlTrie.
* @key: The key to lookup.
*
* Looks up @key in @trie and returns the value associated.
*
* Returns: (transfer none): The value inserted or %NULL.
*/
gpointer
dzl_trie_lookup (DzlTrie *trie,
const gchar *key)
{
DzlTrieNode *node;
__builtin_prefetch(trie);
__builtin_prefetch(key);
g_return_val_if_fail(trie, NULL);
g_return_val_if_fail(key, NULL);
node = trie->root;
while (*key && node) {
node = dzl_trie_find_node(trie, node, *key);
key++;
}
return node ? node->value : NULL;
}
/**
* dzl_trie_remove:
* @trie: A #DzlTrie.
* @key: The key to remove.
*
* Removes @key from @trie, possibly destroying the value associated with
* the key.
*
* Returns: %TRUE if @key was found, otherwise %FALSE.
*/
gboolean
dzl_trie_remove (DzlTrie *trie,
const gchar *key)
{
DzlTrieNode *node;
g_return_val_if_fail(trie, FALSE);
g_return_val_if_fail(key, FALSE);
node = trie->root;
while (*key && node) {
node = dzl_trie_find_node(trie, node, *key);
key++;
}
if (node && node->value) {
if (trie->value_destroy) {
trie->value_destroy(node->value);
}
node->value = NULL;
if (!node->chunk.count) {
while (node->parent &&
node->parent->parent &&
!node->parent->value &&
(node->parent->chunk.count == 1)) {
node = node->parent;
}
dzl_trie_destroy_node(trie, node, trie->value_destroy);
}
return TRUE;
}
return FALSE;
}
/**
* dzl_trie_traverse_node_pre_order:
* @trie: A #DzlTrie.
* @node: A #DzlTrieNode.
* @str: The prefix for this node.
* @flags: The flags for which nodes to callback.
* @max_depth: the maximum depth to process.
* @func: (scope call) (closure user_data): The func to execute for each matching node.
* @user_data: User data for @func.
*
* Traverses node and all of its children according to the parameters
* provided. @func is called for each matching node.
*
* This assumes that the order is %G_POST_ORDER, and therefore does not
* have the conditionals to check pre-vs-pre ordering.
*
* Returns: %TRUE if traversal was cancelled; otherwise %FALSE.
*/
static gboolean
dzl_trie_traverse_node_pre_order (DzlTrie *trie,
DzlTrieNode *node,
GString *str,
GTraverseFlags flags,
gint max_depth,
DzlTrieTraverseFunc func,
gpointer user_data)
{
DzlTrieNodeChunk *iter;
gboolean ret = FALSE;
guint i;
g_assert(trie);
g_assert(node);
g_assert(str);
if (max_depth) {
if ((!node->value && (flags & G_TRAVERSE_NON_LEAVES)) ||
(node->value && (flags & G_TRAVERSE_LEAVES))) {
if (func(trie, str->str, node->value, user_data)) {
return TRUE;
}
}
for (iter = &node->chunk; iter; iter = iter->next) {
for (i = 0; i < iter->count; i++) {
g_string_append_c(str, iter->keys[i]);
if (dzl_trie_traverse_node_pre_order(trie,
iter->children[i],
str,
flags,
max_depth - 1,
func,
user_data)) {
return TRUE;
}
g_string_truncate(str, str->len - 1);
}
}
}
return ret;
}
/**
* dzl_trie_traverse_node_post_order:
* @trie: A #DzlTrie.
* @node: A #DzlTrieNode.
* @str: The prefix for this node.
* @flags: The flags for which nodes to callback.
* @max_depth: the maximum depth to process.
* @func: (scope call) (closure user_data): The func to execute for each matching node.
* @user_data: User data for @func.
*
* Traverses node and all of its children according to the parameters
* provided. @func is called for each matching node.
*
* This assumes that the order is %G_POST_ORDER, and therefore does not
* have the conditionals to check pre-vs-post ordering.
*
* Returns: %TRUE if traversal was cancelled; otherwise %FALSE.
*/
static gboolean
dzl_trie_traverse_node_post_order (DzlTrie *trie,
DzlTrieNode *node,
GString *str,
GTraverseFlags flags,
gint max_depth,
DzlTrieTraverseFunc func,
gpointer user_data)
{
DzlTrieNodeChunk *iter;
gboolean ret = FALSE;
guint i;
g_assert(trie);
g_assert(node);
g_assert(str);
if (max_depth) {
for (iter = &node->chunk; iter; iter = iter->next) {
for (i = 0; i < iter->count; i++) {
g_string_append_c(str, iter->keys[i]);
if (dzl_trie_traverse_node_post_order(trie,
iter->children[i],
str,
flags,
max_depth - 1,
func,
user_data)) {
return TRUE;
}
g_string_truncate(str, str->len - 1);
}
}
if ((!node->value && (flags & G_TRAVERSE_NON_LEAVES)) ||
(node->value && (flags & G_TRAVERSE_LEAVES))) {
ret = func(trie, str->str, node->value, user_data);
}
}
return ret;
}
/**
* dzl_trie_traverse:
* @trie: A #DzlTrie.
* @key: The key to start traversal from.
* @order: The order to traverse.
* @flags: The flags for which nodes to callback.
* @max_depth: the maximum depth to process.
* @func: (scope call) (closure user_data): The func to execute for each matching node.
* @user_data: User data for @func.
*
* Traverses all nodes of @trie according to the parameters. For each node
* matching the traversal parameters, @func will be executed.
*
* Only %G_PRE_ORDER and %G_POST_ORDER are supported for @order.
*
* If @max_depth is less than zero, the entire tree will be traversed.
* If max_depth is 1, then only the root will be traversed.
*/
void
dzl_trie_traverse (DzlTrie *trie,
const gchar *key,
GTraverseType order,
GTraverseFlags flags,
gint max_depth,
DzlTrieTraverseFunc func,
gpointer user_data)
{
DzlTrieNode *node;
GString *str;
g_return_if_fail(trie);
g_return_if_fail(func);
node = trie->root;
key = key ? key : "";
str = g_string_new(key);
while (*key && node) {
node = dzl_trie_find_node(trie, node, *key);
key++;
}
if (node) {
if (order == G_PRE_ORDER) {
dzl_trie_traverse_node_pre_order(trie, node, str, flags, max_depth, func, user_data);
} else if (order == G_POST_ORDER) {
dzl_trie_traverse_node_post_order(trie, node, str, flags, max_depth, func, user_data);
} else {
g_warning(_("Traversal order %u is not supported on DzlTrie."), order);
}
}
g_string_free(str, TRUE);
}
/**
* dzl_trie_unref:
* @trie: A #DzlTrie or %NULL.
*
* Drops the reference count by one on @trie. When it reaches zero, the
* structure is freed.
*/
void
dzl_trie_unref (DzlTrie *trie)
{
g_return_if_fail(trie != NULL);
g_return_if_fail(trie->ref_count > 0);
if (g_atomic_int_dec_and_test(&trie->ref_count)) {
dzl_trie_destroy_node(trie, trie->root, trie->value_destroy);
trie->root = NULL;
trie->value_destroy = NULL;
g_free(trie);
}
}
DzlTrie *
dzl_trie_ref (DzlTrie *trie)
{
g_return_val_if_fail(trie != NULL, NULL);
g_return_val_if_fail(trie->ref_count > 0, NULL);
g_atomic_int_inc(&trie->ref_count);
return trie;
}
/**
* dzl_trie_destroy:
* @trie: A #DzlTrie or %NULL.
*
* This is an alias for dzl_trie_unref().
*/
void
dzl_trie_destroy (DzlTrie *trie)
{
if (trie != NULL)
dzl_trie_unref (trie);
}