Blame tries.c

Packit d394d9
/* Copyright (C) 1995 Bjoern Beutel. */
Packit d394d9
Packit d394d9
/* Description. =============================================================*/
Packit d394d9
Packit d394d9
/* This module implements a static trie structure */
Packit d394d9
Packit d394d9
/* Includes. ================================================================*/
Packit d394d9
Packit d394d9
#include <stdlib.h>
Packit d394d9
#include <stdio.h>
Packit d394d9
#include <string.h>
Packit d394d9
#include <setjmp.h>
Packit d394d9
#include <glib.h>
Packit d394d9
#include "basic.h"
Packit d394d9
#include "pools.h"
Packit d394d9
#include "tries.h"
Packit d394d9
Packit d394d9
/* Macros. ==================================================================*/
Packit d394d9
Packit d394d9
#define ALIGN(addr, n) (((ptr_t) (addr) + (ptr_t) (n - 1)) & ~ (ptr_t) (n - 1))
Packit d394d9
/* Align ADDR to next multiple of N. */
Packit d394d9
Packit d394d9
#define INTS(n) (((n) + (ptr_t ) 3) >> 2) 
Packit d394d9
/* Number of ints needed to store N bytes. */ 
Packit d394d9
Packit d394d9
/* Types. ===================================================================*/
Packit d394d9
Packit d394d9
typedef struct /* A node in a list of keys. */
Packit d394d9
{ 
Packit d394d9
  list_node_t *next;
Packit d394d9
  u_short_t key; /* Key for this node. */
Packit d394d9
  int_t subnode; /* Index of node for KEY in trie. */
Packit d394d9
  int_t content; /* Content associated with KEY. */
Packit d394d9
} key_node_t;
Packit d394d9
Packit d394d9
typedef struct /* A dynamic trie node. */
Packit d394d9
{ 
Packit d394d9
  string_t prefix; /* The prefix of this node. */
Packit d394d9
  int_t prefix_len; /* The length of PREFIX in bytes. */
Packit d394d9
  list_t key_list; /* List of keys. */
Packit d394d9
} trie_node_t;
Packit d394d9
Packit d394d9
/* The trie is a vector of int_t that contains compact trie nodes.
Packit d394d9
 * A compact trie node is int_t-aligned and looks as follows:
Packit d394d9
 *   u_byte_t prefix_len;
Packit d394d9
 *   char_t prefix[ prefix_len ];
Packit d394d9
 *   0 or 1 pad bytes of value 0;
Packit d394d9
 *   u_short_t subnode_count;
Packit d394d9
 *   u_short_t content_count;
Packit d394d9
 *   u_short_t subnode_keys[ subnode_count ];
Packit d394d9
 *   u_short_t content_keys[ content_count ];
Packit d394d9
 *   0 or 2 pad bytes of value 0;
Packit d394d9
 *   int_t subnodes[ subnode_count ];
Packit d394d9
 *   int_t contents[ content_count ]; */
Packit d394d9
Packit d394d9
typedef struct /* Pointers to the items of a compact trie node. */
Packit d394d9
{ 
Packit d394d9
  u_byte_t *prefix_len; /* Number of chars that precede the keys. */
Packit d394d9
  char_t *prefix; /* The chars that precede the keys. */ 
Packit d394d9
  u_short_t *subnode_count; /* The number of subnodes in this node. */
Packit d394d9
  u_short_t *content_count; /* The number of contents in this node. */
Packit d394d9
  u_short_t *subnode_keys; /* The keys for the subnodes in this node. */
Packit d394d9
  u_short_t *content_keys; /* The keys for the contents in this node. */
Packit d394d9
  int_t *subnodes; /* Indexes of the subnodes. */
Packit d394d9
  int_t *contents; /* Contents. */
Packit d394d9
} compact_node_t;
Packit d394d9
Packit d394d9
/*---------------------------------------------------------------------------*/
Packit d394d9
Packit d394d9
static int_t 
Packit d394d9
store_trie_node( pool_t trie_pool, trie_node_t *node )
Packit d394d9
/* Store NODE in TRIE_POOL and return its index. */
Packit d394d9
{
Packit d394d9
  int_t node_size; /* Size of the trie node in TRIE_POOL. */
Packit d394d9
  int_t node_index; /* Index of the trie node in TRIE_POOL. */
Packit d394d9
  int_t i, j, subnode_count, content_count;
Packit d394d9
  key_node_t *key_node;
Packit d394d9
  int_t *compact_node;
Packit d394d9
  compact_node_t n; /* Pointers to the items of the compactified NODE. */
Packit d394d9
  string_t s;
Packit d394d9
  char_t *d;
Packit d394d9
Packit d394d9
  /* Count the number of entries in NODE->KEY_LIST. */
Packit d394d9
  subnode_count = content_count = 0;
Packit d394d9
  FOREACH( key_node, node->key_list )
Packit d394d9
  {
Packit d394d9
    if (key_node->subnode != -1) 
Packit d394d9
      subnode_count++;
Packit d394d9
    if (key_node->content != -1) 
Packit d394d9
      content_count++;
Packit d394d9
  }
Packit d394d9
Packit d394d9
  /* Get pool space for the new node. */
Packit d394d9
  node_size = (INTS( sizeof( u_byte_t ) + sizeof( char_t ) * node->prefix_len 
Packit d394d9
                     + sizeof( u_short_t ) 
Packit d394d9
		     + sizeof( u_short_t ) * subnode_count
Packit d394d9
                     + sizeof( u_short_t ) 
Packit d394d9
		     + sizeof( u_short_t ) * content_count )
Packit d394d9
               + subnode_count + content_count);
Packit d394d9
  compact_node = get_pool_space( trie_pool, node_size, &node_index );
Packit d394d9
Packit d394d9
  /* Set up the pointers to the items of the node. */
Packit d394d9
  n.prefix_len = (u_byte_t *) compact_node;
Packit d394d9
  n.prefix = (char_t *) (n.prefix_len + 1);
Packit d394d9
  n.subnode_count = (u_short_t *) ALIGN(n.prefix + node->prefix_len, 2 );
Packit d394d9
  n.content_count = (u_short_t *) (n.subnode_count + 1);
Packit d394d9
  n.subnode_keys = (u_short_t *) (n.content_count + 1);
Packit d394d9
  n.content_keys = (u_short_t *) (n.subnode_keys + subnode_count);
Packit d394d9
  n.subnodes = (int_t *) ALIGN( n.content_keys + content_count, 4 );
Packit d394d9
  n.contents = (int_t *) n.subnodes + subnode_count;
Packit d394d9
Packit d394d9
  /* Copy the items. */
Packit d394d9
  *n.prefix_len = node->prefix_len;
Packit d394d9
  d = n.prefix;
Packit d394d9
  for (s = node->prefix; 
Packit d394d9
       s < node->prefix + node->prefix_len; 
Packit d394d9
       s = g_utf8_next_char( s ))
Packit d394d9
  {
Packit d394d9
    d += g_unichar_to_utf8( g_unichar_tolower( g_utf8_get_char( s ) ), d );
Packit d394d9
  }
Packit d394d9
  *n.subnode_count = subnode_count;
Packit d394d9
  *n.content_count = content_count;
Packit d394d9
  i = j = 0;
Packit d394d9
  FOREACH( key_node, node->key_list ) 
Packit d394d9
  { 
Packit d394d9
    if (key_node->subnode != -1)
Packit d394d9
    {
Packit d394d9
      n.subnode_keys[i] = key_node->key;
Packit d394d9
      n.subnodes[i] = key_node->subnode;
Packit d394d9
      i++;
Packit d394d9
    }
Packit d394d9
    if (key_node->content != -1)
Packit d394d9
    {
Packit d394d9
      n.content_keys[j] = key_node->key;
Packit d394d9
      n.contents[j] = key_node->content;
Packit d394d9
      j++;
Packit d394d9
    }
Packit d394d9
  }
Packit d394d9
Packit d394d9
  return node_index;
Packit d394d9
}
Packit d394d9
Packit d394d9
/*---------------------------------------------------------------------------*/
Packit d394d9
Packit d394d9
static int_t 
Packit d394d9
new_subtrie( pool_t trie_pool, trie_entry_t entries[], 
Packit d394d9
             int_t start, int_t end, int_t index )
Packit d394d9
/* Create a subtrie for ENTRIES[ START ]..ENTRIES[ END - 1 ], 
Packit d394d9
 * store it in TRIE_POOL and return the index of its root node.
Packit d394d9
 * The first INDEX bytes in the keys will be ignored, so they
Packit d394d9
 * must match for the keys in ENTRIES[ START ]..ENTRIES[ END - 1 ]. */
Packit d394d9
{
Packit d394d9
  trie_node_t node;
Packit d394d9
  int_t key_idx; /* The key index. */
Packit d394d9
  int_t node_index;
Packit d394d9
  int_t sub_start, sub_end; /* Start and end of a subtrie. */
Packit d394d9
  string_t first_key, last_key; /* Used as abbreviations. */
Packit d394d9
  string_t f, l, e;
Packit d394d9
  key_node_t *key_node;
Packit d394d9
Packit d394d9
  if (start >= end) 
Packit d394d9
    return -1;
Packit d394d9
Packit d394d9
  /* Look how many chars from INDEX onward are equal in the first and 
Packit d394d9
   * the last entry. This will be the prefix of the new node.
Packit d394d9
   * If the key of the first entry is as long as the prefix, shorten the prefix
Packit d394d9
   * by one character. */
Packit d394d9
  first_key = entries[ start ].key;
Packit d394d9
  last_key = entries[ end - 1 ].key;
Packit d394d9
  for (f = first_key + index, l = last_key + index; 
Packit d394d9
       *g_utf8_next_char( f ) != EOS; 
Packit d394d9
       f = g_utf8_next_char( f ), l = g_utf8_next_char( l ))
Packit d394d9
  {
Packit d394d9
    if (g_unichar_tolower( g_utf8_get_char( f ) ) 
Packit d394d9
	!= g_unichar_tolower( g_utf8_get_char( l ) ))
Packit d394d9
    {
Packit d394d9
      break;
Packit d394d9
    }
Packit d394d9
  }
Packit d394d9
  key_idx = f - first_key;
Packit d394d9
  node.prefix = first_key + index;
Packit d394d9
  node.prefix_len = key_idx - index;
Packit d394d9
Packit d394d9
  /* Scan list of entries for the first char behind the prefix. For each key,
Packit d394d9
   * find the corresponding lower and upper index in the list of entries. */
Packit d394d9
  clear_list( &node.key_list );
Packit d394d9
  for (sub_start = start; sub_start < end; sub_start = sub_end) 
Packit d394d9
  { 
Packit d394d9
    /* Create a new node for the current key. */
Packit d394d9
    key_node = new_node( &node.key_list, sizeof( key_node_t ), LIST_END );
Packit d394d9
    f = entries[ sub_start ].key + key_idx;
Packit d394d9
    key_node->key = g_unichar_tolower( g_utf8_get_char(f) );
Packit d394d9
    e = g_utf8_next_char(f);
Packit d394d9
    if (*e == EOS) 
Packit d394d9
    { 
Packit d394d9
      key_node->content = entries[ sub_start ].content;
Packit d394d9
      sub_start++;
Packit d394d9
    } 
Packit d394d9
    else 
Packit d394d9
      key_node->content = -1;
Packit d394d9
Packit d394d9
    /* Find last entry with same key. */
Packit d394d9
    for (sub_end = sub_start; sub_end < end; sub_end++) 
Packit d394d9
    {
Packit d394d9
      l = entries[ sub_end ].key + key_idx;
Packit d394d9
      if (g_unichar_tolower( g_utf8_get_char(l) ) != key_node->key)
Packit d394d9
	break;
Packit d394d9
    }
Packit d394d9
    key_node->subnode = new_subtrie( trie_pool, entries, sub_start, sub_end, 
Packit d394d9
                                     key_idx + e - f );
Packit d394d9
  }
Packit d394d9
Packit d394d9
  /* Store compact node and clear the key list. */
Packit d394d9
  node_index = store_trie_node( trie_pool, &node );
Packit d394d9
  while (node.key_list.first != NULL) 
Packit d394d9
    free_first_node( &node.key_list );
Packit d394d9
Packit d394d9
  return node_index;
Packit d394d9
}
Packit d394d9
Packit d394d9
/*---------------------------------------------------------------------------*/
Packit d394d9
Packit d394d9
void 
Packit d394d9
new_trie( int_t n, trie_entry_t *entries, 
Packit d394d9
          pool_t *trie_pool, int_t *root_node_index )
Packit d394d9
/* Take N ENTRIES and build a trie of them.
Packit d394d9
 * Return the trie in *TRIE_POOL 
Packit d394d9
 * and the index of its root node in *ROOT_NODE_INDEX.
Packit d394d9
 * The keys in *ENTRIES must be non-empty, unique, and sorted. */
Packit d394d9
{
Packit d394d9
  *trie_pool = new_pool( sizeof( int_t ) );
Packit d394d9
  *root_node_index = new_subtrie( *trie_pool, entries, 0, n, 0 );
Packit d394d9
}
Packit d394d9
Packit d394d9
/*---------------------------------------------------------------------------*/
Packit d394d9
Packit d394d9
bool_t
Packit d394d9
lookup_trie( int_t *trie, int_t *node_index, string_t *input, int_t *content )
Packit d394d9
/* Test if a prefix of *INPUT matches the node at *NODE_INDEX in TRIE.
Packit d394d9
 * If it does, return TRUE (else return FALSE) and:
Packit d394d9
 *   *CONTENT contains the associated content,
Packit d394d9
 *   *NODE contains the subnode for the matched input, and
Packit d394d9
 *   *INPUT points to the first char behind the prefix. */
Packit d394d9
{
Packit d394d9
  int_t lower, upper, middle;
Packit d394d9
  compact_node_t r; /* Pointers to the items of the root node; */
Packit d394d9
  gunichar c;
Packit d394d9
Packit d394d9
  while (*node_index != -1) 
Packit d394d9
  { 
Packit d394d9
    /* Test if node's prefix matches the given key. */
Packit d394d9
    r.prefix_len = (u_byte_t *) (trie + *node_index);
Packit d394d9
    r.prefix = (char_t *) (r.prefix_len + 1);
Packit d394d9
    if (strncmp_no_case( *input, r.prefix, *r.prefix_len ) != 0) 
Packit d394d9
      return FALSE;
Packit d394d9
    (*input) += *r.prefix_len;
Packit d394d9
Packit d394d9
    /* Get the rest of the node. */
Packit d394d9
    r.subnode_count = (u_short_t *) ALIGN(r.prefix + *r.prefix_len, 2);
Packit d394d9
    r.content_count = (u_short_t *) (r.subnode_count + 1);
Packit d394d9
    r.subnode_keys = (u_short_t *) (r.content_count + 1);
Packit d394d9
    r.content_keys = (u_short_t *) (r.subnode_keys + *r.subnode_count);
Packit d394d9
    r.subnodes = (int_t *) ALIGN( r.content_keys + *r.content_count, 4 );
Packit d394d9
    r.contents = (int_t *) (r.subnodes + *r.subnode_count);
Packit d394d9
Packit d394d9
    /* Perform binary search for subnode with given key. */
Packit d394d9
    c = g_unichar_tolower( g_utf8_get_char( *input ) );
Packit d394d9
    *node_index = -1;
Packit d394d9
    lower = 0;
Packit d394d9
    upper = *r.subnode_count - 1;
Packit d394d9
    while (lower <= upper) 
Packit d394d9
    { 
Packit d394d9
      middle = (lower + upper) / 2;
Packit d394d9
      if (c < r.subnode_keys[ middle ]) 
Packit d394d9
	upper = middle - 1;
Packit d394d9
      else if (c > r.subnode_keys[ middle ]) 
Packit d394d9
	lower = middle + 1;
Packit d394d9
      else /* This entry matches. */
Packit d394d9
      { 
Packit d394d9
	*node_index = r.subnodes[ middle ];
Packit d394d9
	break;
Packit d394d9
      }
Packit d394d9
    }
Packit d394d9
Packit d394d9
    /* Perform binary search for content with given key. */
Packit d394d9
    *content = -1;
Packit d394d9
    lower = 0;
Packit d394d9
    upper = *r.content_count - 1;
Packit d394d9
    while (lower <= upper) 
Packit d394d9
    { 
Packit d394d9
      middle = (lower + upper) / 2;
Packit d394d9
      if (c < r.content_keys[ middle ]) 
Packit d394d9
	upper = middle - 1;
Packit d394d9
      else if (c > r.content_keys[ middle ]) 
Packit d394d9
	lower = middle + 1;
Packit d394d9
      else /* This entry matches. */
Packit d394d9
      { 
Packit d394d9
        *content = r.contents[ middle ];
Packit d394d9
        break;
Packit d394d9
      }
Packit d394d9
    }
Packit d394d9
Packit d394d9
    *input = g_utf8_next_char( *input );
Packit d394d9
    if (*content != -1) 
Packit d394d9
      return TRUE;
Packit d394d9
  }
Packit d394d9
  return FALSE;
Packit d394d9
}
Packit d394d9
Packit d394d9
/* End of file. =============================================================*/