Blob Blame History Raw
/* Copyright (C) 1995 Bjoern Beutel. */

/* Description. =============================================================*/

/* This file contains basic types, macros and functions used everywhere. */

/* Includes. ================================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <setjmp.h>
#include <glib.h>
#include "basic.h"

/* Global variables. ========================================================*/

char_t malaga_version[] = "7.12";
string_t program_name;
bool_t in_emacs_malaga_mode;
bool_t split_hangul_syllables;
text_t *error_text;
jmp_buf *current_error_handler;
bool_t user_break_requested;

/* List functions. ==========================================================*/

void 
clear_list( list_t *list )
/* Initialise LIST to be an list */
{ 
  list->first = list->last = NULL; 
}

/*---------------------------------------------------------------------------*/

void add_node( list_t *list, list_node_t *node, position_t position )
/* Add NODE to LIST.
 * If POSITION = LIST_START, add it at the start of the list;
 * If POSITION = LIST_END, add it at the end. */
{ 
  if (list->first == NULL) 
  {
    node->next = NULL;
    list->first = list->last = node;
  } 
  else if (position == LIST_START) 
  { 
    node->next = list->first;
    list->first = node;
  } 
  else /* position == LIST_END */
  { 
    node->next = NULL;
    list->last->next = node;
    list->last = node;
  }
}

/*---------------------------------------------------------------------------*/

void insert_node( list_t *list, list_node_t *node, list_node_t *prev )
/* Insert NODE in LIST, behind PREV.
 * If PREV == NULL, insert NODE at the beginning of the list. */
{ 
  if (prev == NULL) 
  {
    node->next = list->first;
    list->first = node;
  } 
  else 
  { 
    node->next = prev->next;
    prev->next = node;
  }
  if (node->next == NULL) 
    list->last = node;
}

/*---------------------------------------------------------------------------*/

void *
remove_first_node( list_t *list )
/* Remove the first node in LIST and return it.
 * Return NULL if LIST is empty. */
{
  list_node_t *node;
    
  if (list->first == NULL) 
    return NULL;
  node = list->first;
  list->first = node->next;
  if (node == list->last) 
    list->last = NULL;
  return node;
}

/*---------------------------------------------------------------------------*/

void
remove_node( list_t *list, list_node_t *node )
/* Remove NODE in LIST. */
{ 
  list_node_t *prev_node;

  if (list->first == node) 
  { 
    list->first = node->next;
    prev_node = NULL;
  } 
  else 
  { 
    prev_node = list->first;
    while (prev_node->next != node) 
      prev_node = prev_node->next;
    prev_node->next = node->next;
  }
  if (node->next == NULL) 
    list->last = prev_node;
}

/*---------------------------------------------------------------------------*/

void
combine_lists( list_t *list1, list_t *list2 )
/* Append LIST2 to LIST1.
 * LIST1 will contain the concatenation; LIST2 will be empty. */
{ 
  if (list1->first == NULL) 
    list1->first = list2->first;
  else 
    list1->last->next = list2->first;
  if (list2->first != NULL) 
  {
    list1->last = list2->last;
    list2->first = list2->last = NULL;
  }
}

/*---------------------------------------------------------------------------*/

void *
new_node( list_t *list, int_t size, position_t position )
/* Add a node of size SIZE to LIST.
 * If POSITION = LIST_START, add the element at the start of the list;
 * If POSITION = LIST_END, add the element at the end. 
 * Return the newly created node. */
{ 
  list_node_t *node;

  node = new_mem( size );
  add_node( list, node, position );
  return node;
}

/*---------------------------------------------------------------------------*/

void 
free_first_node( list_t *list )
/* Remove first node in LIST and free it. */
{ 
  list_node_t *node;

  node = remove_first_node( list );
  if (node != NULL) 
    free_mem( &node );
}

/*---------------------------------------------------------------------------*/

void 
free_node( list_t *list, list_node_t *node )
/* Remove NODE from LIST and free it. */
{ 
  remove_node( list, node );
  free_mem( &node );
}

/* Memory functions. ========================================================*/

void *
new_mem( int_t item_size )
/* Allocate a memory block of ITEM_SIZE bytes, clear it and return it.
 * If memory is out, call the function "complain". */
{ 
  void *block;

  if (item_size == 0) 
    return NULL;
  block = calloc( 1, item_size );
  if (block == NULL) 
    complain( "Out of memory." );
  return block;
}

/*---------------------------------------------------------------------------*/

void *
new_vector( int_t item_size, int_t item_count )
/* Allocate a memory block to contain ITEM_COUNT items of size ITEM_SIZE,
 * clear it and return it.
 * If memory is out, call the function "complain". */
{ 
  void *block;

  if (item_size == 0 || item_count == 0) 
    return NULL;
  block = calloc( item_count, item_size );
  if (block == NULL) 
    complain( "Out of memory." );
  return block;
}

/*---------------------------------------------------------------------------*/

int_t 
renew_vector( void *block_p, int_t item_size, int_t item_count )
/* Realloc *BLOCK_P to contain ITEM_COUNT items of ITEM_SIZE bytes each.
 * Return ITEM_COUNT.
 * If memory is out, call the function "complain". */
{ 
  void *block;

  block = *((void **) block_p);
  block = realloc( block, item_count * item_size );
  if (block == NULL) 
    complain( "Out of memory." );
  *((void **) block_p) = block;
  return item_count;
}

/*---------------------------------------------------------------------------*/

void 
free_mem( void *pointer )
/* Free memory *POINTER points to, and set *POINTER to NULL. */
{ 
  free( *((void **) pointer) );
  *((void **) pointer) = NULL;
}

/* Functions for text (indefinitely growing strings). =======================*/

text_t *
new_text( void )
/* Return a new text structure. */
{ 
  text_t *text;
  
  text = new_mem( sizeof( text_t ) );
  text->buffer_size = 100;
  text->buffer = new_vector( sizeof( char_t ), text->buffer_size + 1 );
  /* text->buffer[0] is set to EOS. */
  text->string_size = 0;

  return text;
}

/*---------------------------------------------------------------------------*/

void 
clear_text( text_t *text )
/* Initialize TEXT to an empty string. */
{ 
  text->buffer[0] = EOS;
  text->string_size = 0;
}

/*---------------------------------------------------------------------------*/

void 
free_text( text_t **text_p )
/* Free the content of *TEXT_P. */
{ 
  if (*text_p != NULL) 
  { 
    free_mem( &(*text_p)->buffer );
    free_mem( text_p );
  }
}

/*---------------------------------------------------------------------------*/

void 
add_to_text( text_t *text, string_t string )
/* Add STRING to TEXT. */
{ 
  int_t string_len;

  string_len = strlen( string );
  if (text->buffer_size < text->string_size + string_len + 1) 
  { 
    text->buffer_size = renew_vector( &text->buffer, sizeof( char_t ), 
                                      2 * (text->string_size + string_len) );
  }
  strcpy( text->buffer + text->string_size, string );
  text->string_size += string_len;
}

/*---------------------------------------------------------------------------*/

void 
add_char_to_text( text_t *text, char_t character )
/* Add CHARACTER to TEXT. */
{ 
  if (text->buffer_size < text->string_size + 2)
  {
    text->buffer_size = renew_vector( &text->buffer, sizeof( char_t ), 
				      2 * (text->string_size + 1) );
  }
  text->buffer[ text->string_size++ ] = character;
  text->buffer[ text->string_size ] = EOS;
}

/*---------------------------------------------------------------------------*/

void 
add_unichar_to_text( text_t *text, gunichar c )
/* Add C to TEXT. */
{ 
  char buf[7];
  int_t n;

  n = g_unichar_to_utf8( c, buf );
  buf[n] = EOS;
  if (text->buffer_size < text->string_size + n + 1) 
  { 
    text->buffer_size = renew_vector( &text->buffer, sizeof( char_t ), 
                                      2 * (text->string_size + n) );
  }
  strcpy( text->buffer + text->string_size, buf );
  text->string_size += n;
}

/*---------------------------------------------------------------------------*/

void 
insert_in_text( text_t *text, string_t string, int_t position )
/* Insert STRING at POSITION in TEXT (position counts bytes from 0 onward). */
{ 
  int_t string_len;

  string_len = strlen( string );
  if (text->buffer_size < text->string_size + string_len + 1) 
  { 
    text->buffer_size = renew_vector( &text->buffer, sizeof( char_t ), 
                                      2 * (text->string_size + string_len) );
  }
  if (position < 0) 
    position = 0;
  if (position > text->string_size) 
    position = text->string_size;
  memmove( text->buffer + position + string_len, text->buffer + position,
           sizeof( char_t ) * (text->string_size + 1 - position) );
  memcpy( text->buffer + position, string, sizeof( char_t ) * string_len );
  text->string_size += string_len;
}

/*---------------------------------------------------------------------------*/

void 
insert_char_in_text( text_t *text, char_t character, int_t position )
/* Insert CHARACTER at POSITION in TEXT. */
{ 
  if (text->buffer_size < text->string_size + 2)
  {
    text->buffer_size = renew_vector( &text->buffer, sizeof( char_t ),
				      2 * (text->string_size + 1) );
  }
  if (position < 0) 
    position = 0;
  if (position > text->string_size) 
    position = text->string_size;
  memmove( text->buffer + position + 1, text->buffer + position,
           sizeof( char_t ) * (text->string_size + 1 - position) );
  text->buffer[ position ] = character;
  text->string_size++;
}

/*---------------------------------------------------------------------------*/

char_t * 
text_to_string( text_t **text_p )
/* Return content of *TEXT_P as a string and delete *TEXT_P.
 * The string must be freed after use. */
{
  char_t *string;

  string = new_string( (*text_p)->buffer, NULL );
  free_text( text_p );
  return string;
}

/*---------------------------------------------------------------------------*/

/* Add string_t FORMAT to text_t TEXT, replace any "%s" with the next
 * "string_t" argument in va_list ARGS, any "%c" with the next "char" argument,
 * any "%u" with the next "gunichar" argument, and any "%d" with the next 
 * "int_t" argument. */
#define VA_PRINT_TEXT( text, format, args ) \
do { \
  string_t format_p; \
  \
  for (format_p = format; *format_p != EOS; format_p++) \
  { \
    if (*format_p == '%') \
    { \
      format_p++; \
      switch (*format_p) \
      { \
      case 'c': \
        add_char_to_text( text, va_arg( args, int ) ); \
        break; \
      case 'u': \
      { \
        char buf[7]; \
        int n; \
        \
        n = g_unichar_to_utf8( va_arg( args, gunichar ), buf); \
        buf[n] = EOS; \
	add_to_text( text, buf ); \
	break; \
      } \
      case 's': \
        add_to_text( text, va_arg( args, string_t ) ); \
        break; \
      case 'd': \
      { \
        int_t pos, num; \
        \
        num = va_arg( args, int_t ); \
        if (num < 0) \
        { \
          add_char_to_text( text, '-' ); \
          num = -num; \
        } \
        pos = text->string_size; \
        do \
        { \
          insert_char_in_text( text, '0' + (num % 10), pos ); \
          num /= 10; \
        } while (num != 0); \
        break; \
      } \
      default: \
        add_char_to_text( text, *format_p ); \
        break; \
      } \
    } \
    else \
      ADD_CHAR_TO_TEXT( text, *format_p ); \
  } \
} while (FALSE)

/*---------------------------------------------------------------------------*/

void
print_text( text_t *text, string_t format, ... )
/* Add FORMAT to TEXT, replace any "%s" with a "string_t" argument, any "%c"
 * with a "char" argument, any "%u" with a "gunichar" argument, and any "%d"
 * with an "int_t" argument. */
{
  va_list args;

  va_start( args, format );
  VA_PRINT_TEXT( text, format, args );
  va_end( args );
}

/* String functions. ========================================================*/

char_t * 
new_string( string_t string, string_t end )
/* Allocate memory and copy STRING into it.
 * If END != NULL, it marks the end of the string.
 * The result string must be freed after use. */
{
  char_t *new_str;
  char_t *new_str_p;

  if (end == NULL) 
    end = string + strlen( string );
  new_str = new_str_p = new_vector( sizeof( char_t ), end - string + 1 );
  while (string < end) 
    *new_str_p++ = *string++;
  *new_str_p = EOS;
  return new_str;
}

/*---------------------------------------------------------------------------*/

char_t * 
new_string_readable( string_t from, string_t from_end )
/* Like "new_string", but enclose the string in double quotes, copy a "\" in 
 * front of quotes and backslashed, and copy control chars in "\uxxxx" format.
 * If FROM_END != NULL, it marks the end of the string. 
 * The result string must be freed after use. */
{
  text_t *text;
  int_t i, code, position;

  text = new_text();
  if (from_end == NULL) 
    from_end = from + strlen( from );
  add_char_to_text( text, '\"' );
  while (from < from_end)
  { 
    if (*from == '\"' || *from == '\\')  /* Prepend a backslash. */
    { 
      add_char_to_text( text, '\\' );
      add_char_to_text( text, *from++ );
    } 
    else if ((*from >= 0 && *from < 32) || *from == 127)
    { 
      /* Convert control chars to octal "\xxx" format. */
      add_char_to_text( text, '\\' );
      position = text->string_size;
      code = *from++;
      for (i = 0; i < 3; i++) 
      { 
	insert_char_in_text( text, code % 8 + '0', position );
        code = code / 8;  
      }
    } 
    else
    {
      add_unichar_to_text( text, g_utf8_get_char( from ) );
      from = g_utf8_next_char( from );
    }
  }
  add_char_to_text( text, '\"' );
  return text_to_string( &text );
}

/*---------------------------------------------------------------------------*/

char_t *
concat_strings( string_t first_string, ... )
/* Concatenate a list of strings and return the result string.
 * Must have NULL-terminated list of strings as parameters.
 * The result string must be freed after use. */
{
  va_list args;
  size_t length;
  string_t next_string;
  char_t *string;
  char_t *string_p;

  /* Compute length of the result string. */
  va_start( args, first_string );
  length = strlen( first_string );
  for (next_string = va_arg( args, string_t ); 
       next_string != NULL;
       next_string = va_arg( args, string_t )) 
  {
    length += strlen( next_string ); 
  }
  va_end( args );

  /* Concatenate strings. */
  va_start( args, first_string );
  string = new_vector( sizeof( char_t ), length + 1 );
  strcpy( string, first_string );
  string_p = string + strlen( first_string );
  for (next_string = va_arg( args, string_t ); 
       next_string != NULL; 
       next_string = va_arg( args, string_t )) 
  {
    strcpy( string_p, next_string );
    string_p += strlen( next_string );
  }
  va_end( args );

  return string;
}

/*---------------------------------------------------------------------------*/

string_t 
next_non_space( string_t string )
/* Return STRING, but without leading spaces. */
{ 
  while (g_unichar_isspace( g_utf8_get_char( string ) ))
    string = g_utf8_next_char( string );
  return string;
}

/*---------------------------------------------------------------------------*/

int_t 
strcmp_no_case( string_t str1, string_t str2 )
/* Return (case insensitive) lexical order of STR1 and STR2:
 * Result is -1 if STR1 < STR2,
 *            0 if STR1 = STR2,
 *            1 if STR1 > STR2. */
{ 
  gunichar c1, c2;

  /* Find first char where STR1 and STR2 differ. */
  while (TRUE)
  {
    if (*str1 == EOS)
      return (*str2 == EOS ? 0 : -1);
    else if (*str2 == EOS)
      return 1;

    c1 = g_unichar_tolower( g_utf8_get_char( str1 ));
    c2 = g_unichar_tolower( g_utf8_get_char( str2 ));
    if (c1 != c2)
      return (c1 < c2 ? -1 : 1);

    str1 = g_utf8_next_char( str1 );
    str2 = g_utf8_next_char( str2 );
  } 
}

/*---------------------------------------------------------------------------*/

int_t 
strncmp_no_case( string_t str1, string_t str2, int_t n )
/* Return (case insensitive) lexical order of STR1 and STR2,
 * but compare only the first N bytes.
 * Result is -1 if STR1 < STR2,
 *            0 if STR1 = STR2,
 *            1 if STR1 > STR2. */ 
{ 
  string_t str1_end = str1 + n;
  gunichar c1, c2;

  /* Find first char where STR1 and STR2 differ. */
  while (TRUE)
  {
    if (str1 == str1_end)
      return 0;
    else if (*str1 == EOS)
      return (*str2 == EOS ? 0 : -1);
    else if (*str2 == EOS)
      return 1;

    c1 = g_unichar_tolower( g_utf8_get_char( str1 ));
    c2 = g_unichar_tolower( g_utf8_get_char( str2 ));

    if (c1 != c2)
      return (c1 < c2 ? -1 : 1);

    str1 = g_utf8_next_char( str1 );
    str2 = g_utf8_next_char( str2 );
  }
}

/*---------------------------------------------------------------------------*/

char_t *
replace_arguments( string_t format, string_t chars, ... )
/* Create a new string with a copy of FORMAT.
 * Replace each sequence "%C" in FORMAT, where C is the N-th
 * char in CHARS, by the N-th additional string argument. 
 * Return the result string. It must be freed after use. */
{ 
  va_list args;
  int_t argument_count, i;
  string_t *arguments;
  text_t *text;
  string_t from;

  /* Copy the arguments into ARGUMENTS. */
  va_start( args, chars );
  argument_count = strlen( chars );
  arguments = new_vector( sizeof( string_t ), argument_count );
  for (i = 0; i < argument_count; i++) 
    arguments[i] = va_arg( args, string_t );
  va_end( args );

  /* Copy TEXT into BUFFER. */
  text = new_text();
  for (from = format; *from != EOS; from++) 
  { 
    if (*from == '%') 
    { 
      from++;

      /* Find character *FROM in CHARS. */
      for (i = 0; i < argument_count; i++) 
      { 
	if (*from == chars[i]) 
	  break;
      }

      if (i < argument_count) 
	add_to_text( text, arguments[i] );
      else 
	add_char_to_text( text, *from );
    } 
    else 
      ADD_CHAR_TO_TEXT( text, *from );
  }
  free_mem( &arguments );
  return text_to_string( &text );
}

/*---------------------------------------------------------------------------*/

char_t *
double_to_string( double number )
/* Convert NUMBER to a string. It must be freed after use. */
{ 
  char_t buffer[30];

  sprintf( buffer, "%.11G", number );
  return new_string( buffer, NULL );
}

/*---------------------------------------------------------------------------*/

char_t * 
int_to_string( int_t number )
/* Convert NUMBER to a string. It must be freed after use. */
{
  char_t buffer[12];
  
  sprintf( buffer, "%d", number );
  return new_string( buffer, NULL );
}

/* Error handling. ==========================================================*/

void 
throw( void )
/* Call the current error handler. 
 * If there is no current error handler, print error and exit. */
{ 
  if (current_error_handler != NULL) 
    longjmp( *current_error_handler, 1 );
  else 
  { 
    fprintf( stderr, "%s: %s\n", program_name, error_text->buffer );
    exit( 1 );
  }
}

/*---------------------------------------------------------------------------*/

void 
complain( string_t message, ... )
/* Save the error MESSAGE in ERROR_TEXT.
 * Additional arguments to "complain" are inserted where 
 * "%s" (string_t ARGUMENT), "%c" (char_t ARGUMENT), "%u" (gunichar ARGUMENT),
 * or "%d" (int_t ARGUMENT) is part of MESSAGE. */
{ 
  va_list args;
  
  clear_text( error_text );
  va_start( args, message ); 
  VA_PRINT_TEXT( error_text, message, args );
  va_end( args );
  throw();
}

/*---------------------------------------------------------------------------*/

void 
program_message( void )
/* Print some information about the program. */
{ 
  printf( "This is %s, version %s.\n", program_name, malaga_version );
  printf( "Copyright (C) 1995 Bjoern Beutel.\n" );
  printf( "This program is part of Malaga, "
          "a system for Natural Language Analysis.\n" );
  printf( "You can distribute it under the terms "
	  "of the GNU General Public License.\n" );
}

/* Module initialisation. ===================================================*/

void 
init_basic( string_t prog_name )
/* Initialise this module. PROG_NAME should be the name of the program. */
{
  error_text = new_text();
  program_name = prog_name;
  in_emacs_malaga_mode = (getenv( "MALAGA_MODE" ) != NULL);
#ifdef WIN32
  /* We must switch off buffering by hand if we are connected to Emacs. */
  if (getenv( "EMACS" ) != NULL) 
  {
    setbuf( stdout, NULL );
    setbuf( stderr, NULL );
  }
#endif
}

/*---------------------------------------------------------------------------*/

void 
terminate_basic( void )
/* Terminate this module. */
{ 
  free_text( &error_text );
}

/* End of file. =============================================================*/