Blob Blame History Raw
/* Copyright (C) 1995 Bjoern Beutel. */

/* Description. =============================================================*/

/* This program takes a lexicon file and compiles it to binary format. 
 * It also includes an interactive allomorph rules debugger. */

/* Includes. ================================================================*/

#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include <setjmp.h>
#include <time.h>
#include <glib.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "symbols.h"
#include "scanner.h"
#include "rule_type.h"
#include "rules.h"
#include "files.h"
#include "lex_compiler.h"
#include "input.h"
#include "commands.h"
#include "commands_interactive.h"
#include "options.h"
#include "breakpoints.h"
#include "debugger.h"
#include "display.h"
#include "transmit.h"
#include "patterns.h"
#include "hangul.h"

/* Variables. ===============================================================*/

static string_t allo_format; /* Format of allomorph output. */

static bool_t lex_tree_to_output = FALSE;
/* Indicates whether the lexicon tree can be printed. */

static string_t lexicon_file, rule_file, symbol_file, project_file;
static string_t prelex_file;

static string_t base_feat_string; /* Last base feature structure. */

/* Functions. ===============================================================*/

static void 
display_where( void )
/* Print the name of the current rule. */
{ 
  string_t file, rule;
  int_t line;

  source_of_instr( executed_rule_sys, pc, &line, &file, &rule );
  printf( "At \"%s\", line %d, rule \"%s\".", 
	  name_in_path( file ), line, rule );
  if (lex_entry_file_name != NULL) 
  { 
    printf( " (\"%s\", line %d)", 
	    name_in_path( lex_entry_file_name ), lex_entry_line_number );
  }
  printf( "\n" );
  if (in_emacs_malaga_mode) 
    printf( "SHOW \"%s\":%d:0\n", file, line );
}

/*---------------------------------------------------------------------------*/

static void
read_lexicon_file_name( string_t *arguments )
{
  if (**arguments != EOS)
  {
    free_mem( &lexicon_file );
    lexicon_file = parse_absolute_path( arguments, NULL );
  }
  if (lexicon_file == NULL)
    complain( "Missing lexicon file name." );
}

/*---------------------------------------------------------------------------*/

static void 
display_result( void )
/* Display result in the modes that have been switched on after analysis. */
{ 
  if (use_display) 
  { 
    start_display_process();
    fprintf( display_stream, "allomorph\n" );
    print_lex_buffer( display_stream, "%n %s {%f}" );
    fprintf( display_stream, "end\n" );
    fflush( display_stream );
  } 
  else 
    print_lex_buffer( stdout, NULL );
}

/*---------------------------------------------------------------------------*/

static void 
do_result( string_t arguments )
/* Show result of last allomorph generation. */
{ 
  parse_end( &arguments );
  if (! lex_tree_to_output) 
    complain( "No previous allomorph generation." );
  display_result();
}

static command_t result_command = 
{ 
  "result res", do_result,
  "Show result of last allomorph generation.\n"
  "Usage: result\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_read_constants( string_t arguments )
/* Read the constants in the lexicon with name on line ARGUMENTS. */
{ 
  assert_not_in_debug_mode();
  read_lexicon_file_name( &arguments );
  parse_end( &arguments );
  read_lex_constants( lexicon_file );
}

static command_t read_constants_command = 
{ 
  "read-constants", do_read_constants,
  "Read the constants from the definitions in a lexicon file.\n"
  "Usage: read-constants [LEXICON_FILE]\n"
  "If LEXICON_FILE is omitted, the previous lexicon file name is used.\n"
  "\"read-constants\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_ga_file( string_t arguments )
/* Generate allomorphs of a base lexicon.
 * Write the allomorphs readably into file. */
{ 
  string_t output_name;
  FILE *output_stream;

  assert_not_in_debug_mode();

  output_stream = NULL;
  output_name = NULL;
  TRY 
  { 
    read_lexicon_file_name( &arguments );
    if (*arguments != EOS) 
      output_name = parse_absolute_path( &arguments, NULL );
    parse_end( &arguments );

    set_debug_mode( RUN_MODE, NULL );
    lex_tree_to_output = FALSE;
    generate_allos_for_file( lexicon_file, NULL, TRUE );
    lex_tree_to_output = TRUE;

    if (output_name != NULL)
    {
      output_stream = open_stream( output_name, "w" );
      print_lex_buffer( output_stream, allo_format );
      print_lex_statistics( stdout );
      close_stream( &output_stream, output_name );
    }
  } 
  FINALLY 
  { 
    close_stream( &output_stream, NULL );
    free_mem( &output_name );
  }
  END_TRY;
}

static command_t ga_file_command = 
{ 
  "ga-file gaf", do_ga_file,
  "Generate allomorphs from the entries in a lexicon file.\n"
  "Usage: ga-file [LEXICON_FILE [ALLO_FILE]]\n"
  "If LEXICON_FILE is omitted, the previous lexicon file name is used.\n"
  "If ALLO_FILE is given, the results are written to \"ALLO_FILE\".\n"
  "The results can also be displayed by the command \"result\".\n"
  "\"ga-file\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_debug_ga_file( string_t arguments )
/* Generate allomorphs of the base lexicon with name in ARGUMENTS.
 * Execute rules in debug mode. */
{ 
  assert_not_in_debug_mode();
  read_lexicon_file_name( &arguments );
  parse_end( &arguments );
  set_debug_mode( WALK_MODE, allo_rule_sys );
  lex_tree_to_output = FALSE;
  generate_allos_for_file( lexicon_file, NULL, TRUE );
  lex_tree_to_output = TRUE;
}

static command_t debug_ga_file_command = 
{ 
  "debug-ga-file dgaf", do_debug_ga_file,
  "Generate allomorphs from the entries in a lexicon file.\n"
  "Execute the rules in debug mode.\n"
  "Usage: debug-ga-file [LEXICON_FILE]\n"
  "If LEXICON_FILE is omitted, the previous lexicon file name is used.\n"
  "\"debug-ga-file\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
generate_allomorphs_for_line( string_t arguments )
/* Generate allomorphs for ARGUMENTS, which should consist
 * of a file name and a line number. */
{ 
  int_t line;

  line = parse_cardinal( &arguments );
  read_lexicon_file_name( &arguments );
  parse_end( &arguments );
  lex_tree_to_output = FALSE;
  generate_allos_for_line( lexicon_file, line );
  lex_tree_to_output = TRUE;
}

/*---------------------------------------------------------------------------*/

static void 
do_ga_line( string_t arguments )
/* Generate allomorphs for ARGUMENTS, which should consist
 * of a file name and a line number. */
{ 
  assert_not_in_debug_mode();
  set_debug_mode( RUN_MODE, NULL );
  generate_allomorphs_for_line( arguments );
  display_result();
}

static command_t ga_line_command = 
{ 
  "ga-line gal", do_ga_line,
  "Generate allomorphs from a single entry in a file.\n"
  "Usage: ga-line LINE [FILE]\n"
  "The first lexicon entry at or behind LINE in FILE is read in.\n"
  "If FILE is omitted, the previous file name is used.\n"
  "\"ga-line\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_debug_ga_line( string_t arguments )
/* Generate an allomorph for ARGUMENTS, which should consist
 * of a file name and a line number, in debugger mode. */
{ 
  assert_not_in_debug_mode();
  set_debug_mode( WALK_MODE, allo_rule_sys );
  generate_allomorphs_for_line( arguments );
}

static command_t debug_ga_line_command = 
{ 
  "debug-ga-line dgal", do_debug_ga_line,
  "Generate allomorphs from a single entry in a file.\n"
  "Execute allomorph rules in debug mode.\n"
  "Usage: debug-ga-line LINE [FILE]\n"
  "The first lexicon entry at or behind LINE in FILE is read in.\n"
  "Allomorph rule execution stops at the first statement.\n"
  "If FILE is omitted, the previous file name is used.\n"
  "\"debug-line\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
generate_allomorphs( string_t arguments )
/* Generate allomorphs for lexicon entry ARGUMENTS. */
{ 
  /* If no argument given, re-analyze last argument */
  if (*arguments == EOS) 
  { 
    if (base_feat_string == NULL) 
      complain( "No previous base feature structure." );
  } 
  else 
  { 
    free_mem( &base_feat_string );
    base_feat_string = new_string( arguments, NULL );
  }
  lex_tree_to_output = FALSE;
  generate_allos_for_string( base_feat_string );
  lex_tree_to_output = TRUE;
}

/*---------------------------------------------------------------------------*/

static void 
do_ga( string_t arguments )
/* Generate allomorphs for ARGUMENTS. */
{ 
  assert_not_in_debug_mode();
  set_debug_mode( RUN_MODE, NULL );
  generate_allomorphs( arguments );
  display_result();
}

static command_t ga_command = 
{ 
  "ga", do_ga,
  "Generate allomorphs from a feature structure argument.\n"
  "Usage:\n"
  "  ga FEAT -- Generate allomorphs for feature structure FEAT.\n"
  "  ga -- Re-generate allomorphs for the last argument.\n"
  "The allomorphs are shown on screen.\n"
  "\"ga\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_debug_ga( string_t arguments )
/* Generate allomorphs for ARGUMENTS.
 * Execute allomorph rules in debug mode. */
{
  assert_not_in_debug_mode();
  set_debug_mode( WALK_MODE, allo_rule_sys );
  generate_allomorphs( arguments );
}

static command_t debug_ga_command = 
{ 
  "debug-ga dga ga-debug gad", do_debug_ga,
  "Generate allomorphs from the feature structure argument. "
  "Execute allomorph rules in debug mode.\n"
  "Usage:\n"
  "  debug-ga FEAT -- Generate allomorphs for feature structure FEAT.\n"
  "  debug-ga -- Re-generate allomorphs for the last argument.\n"
  "Rule execution stops at the first statement.\n"
  "The allomorphs are shown on screen.\n"
  "\"debug-ga\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_allo_format_option( string_t arguments ) 
/* Change allomorph output line to "arguments" */
{ 
  string_t format;

  if (*arguments == EOS) 
  { 
    format = new_string_readable( allo_format, NULL );
    printf( "allo-format: %s\n", format );
    free_mem( &format );
  } 
  else 
  { 
    format = parse_word( &arguments );
    free_mem( &allo_format );
    allo_format = format;
  }
}

static command_t allo_format_option = 
{ 
  "allo-format", do_allo_format_option,
  "Describe the format in which generated allomorphs will be printed.\n"
  "Usage: allo-format STRING\n"
  "STRING may contain the following special sequences:\n"
  "  %f -- Allomorph feature structure.\n"
  "  %n -- Allomorph number.\n"
  "  %s -- Allomorph surface.\n"
};

/* Commands. ================================================================*/

static command_t *mallex_options[] = 
{ 
  &alias_option, &allo_format_option, &auto_variables_option, 
  &display_cmd_option, &hidden_option, &roman_hangul_option, 
  &sort_records_option, &switch_option, &transmit_cmd_option, 
  &use_display_option,
  NULL
};

static command_t *mallex_commands[] = 
{ 
  &backtrace_command, &break_command, &continue_command, &debug_ga_command, 
  &debug_ga_file_command, &debug_ga_line_command, &delete_command, 
  &down_command, &finish_command, &frame_command, &ga_command, 
  &ga_file_command, &ga_line_command, &get_command, &help_command, 
  &list_command, &next_command, &print_command, &quit_command, 
  &read_constants_command, &result_command, &run_command, &set_command, 
  &step_command, &transmit_command, &up_command, &variables_command, 
  &walk_command, &where_command,
  NULL
};

/*---------------------------------------------------------------------------*/

static void 
read_project_file( string_t file_name )
/* Read the project file FILE_NAME. */
{ 
  FILE *project_stream;
  string_t include_file;
  string_t project_line_p, argument, extension;
  char_t *project_line;
  string_t *name_p;
  volatile bool_t binary = FALSE;
  volatile int_t line_count;
  static bool_t err_pos_printed;

  err_pos_printed = FALSE;
  project_stream = open_stream( file_name, "r" );
  line_count = 0;
  while (TRUE) 
  { 
    TRY
      project_line = read_line( project_stream );
    IF_ERROR
    {
      print_text( error_text, " (\"%s\", line %d)",
		  name_in_path( file_name ), line_count + 1 );
      err_pos_printed = TRUE;
    }
    END_TRY;
    if (project_line == NULL) 
      break;
    line_count++;
    cut_comment( project_line );
    project_line_p = project_line;

    if (*project_line_p != EOS) 
    { 
      argument = NULL;
      TRY
      {
	argument = parse_word( &project_line_p );
	extension = NULL;
	name_p = NULL;
	if (strcmp_no_case( argument, "sym:" ) == 0) 
	{ 
	  name_p = &symbol_file;
	  extension = "sym";
	  binary = TRUE;
	} 
	else if (strcmp_no_case( argument, "lex:" ) == 0) 
	{
	  name_p = &lexicon_file;
	  extension = "lex";
	  binary = FALSE;
	}
	else if (strcmp_no_case( argument, "all:" ) == 0) 
	{ 
	  name_p = &rule_file;
	  extension = "all";
	  binary = TRUE;
	} 
	else if (strcmp_no_case( argument, "prelex:" ) == 0)
	{
	  if (prelex_file != NULL) 
	    complain( "Prelex file already defined." );
	  name_p = &prelex_file;
	  extension = "prelex";
	  binary = TRUE;
	}
	else if (strcmp_no_case( argument, "include:" ) == 0) 
	{ 
	  include_file = parse_absolute_path( &project_line_p, file_name );
	  parse_end( &project_line_p );
	  read_project_file( include_file );
	  free_mem( &include_file );
	}
	free_mem( &argument );

	if (name_p != NULL && *name_p == NULL && *project_line_p != EOS) 
	{ 
	  argument = parse_absolute_path( &project_line_p, file_name );
	  if (! has_extension( argument, extension ))
	  {
	    complain( "\"%s\" should have extension \"%s\".", 
		      name_in_path( argument ), extension );
	  }
	  if (binary) 
	    set_binary_file_name( name_p, argument );
	  else 
	    set_file_name( name_p, argument );
	  free_mem( &argument );
	}
      }
      IF_ERROR
      {
	if (! err_pos_printed)
	{
	  print_text( error_text, " (\"%s\", line %d)",
		      name_in_path( file_name ), line_count );
	  err_pos_printed = TRUE;
	}
      }
      END_TRY;
    }
    free_mem( &project_line );
  }
  close_stream( &project_stream, file_name );
}

/*---------------------------------------------------------------------------*/

int 
main( int argc, char *argv[] )
/* The main function of "mallex". */
{ 
  volatile enum {INTERACTIVE_MODE, BINARY_MODE, TEXT_MODE, 
		 PRELEX_MODE} mallex_mode;
  int_t i;
  string_t malagarc_path, s;
  rule_sys_name_t rule_systems[1]; /* Rule system for debugger. */
  string_t object_file = NULL; /* Object file for binary and prelex mode. */
    
  mallex_mode = INTERACTIVE_MODE;
  init_basic( "mallex" );
  init_input();

  /* Parse arguments. */
  if (argc == 2) 
  { 
    if (strcmp_no_case( argv[1], "--version" ) == 0
	|| strcmp_no_case( argv[1], "-version" ) == 0
	|| strcmp_no_case( argv[1], "-v" ) == 0)  
    { 
      program_message();
      exit( 0 );
    } 
    else if (strcmp_no_case( argv[1], "--help" ) == 0
	     || strcmp_no_case( argv[1], "-help" ) == 0
	     || strcmp_no_case( argv[1], "-h" ) == 0) 
    { 
      printf( "Apply the allomorph rules on the entries of a Malaga lexicon.\n"
	      "\n"
	      "Usage:\n"
	      "mallex GRAMMAR             "
	      "-- Start interactive mallex.\n"
	      "mallex GRAMMAR -b[inary]   "
	      "-- Create binary allomorph lexicon.\n"
	      "mallex GRAMMAR -r[eadable] "
	      "-- Output readable allomorph lexicon.\n"
	      "mallex GRAMMAR -p[relex]   "
	      "-- Output precompiled lexicon.\n"
	      "mallex -v[ersion]          "
	      "-- Print version information.\n"
	      "mallex -h[elp]             "
	      "-- Print this help.\n\n"
	      "GRAMMAR may be \"PROJECT_FILE\" "
	      "or \"SYM_FILE ALLO_FILE LEX_FILE [PRELEX_FILE]\".\n"
	      "PROJECT_FILE must end on \".pro\".\n"
	      "SYM_FILE must end on \".sym\".\n"
	      "ALLO_FILE must end on \".all\".\n"
	      "LEX_FILE must end on \".lex\".\n"
	      "PRELEX_FILE must end on \".prelex\".\n" );
      exit( 0 );
    }
  }
  for (i = 1; i < argc; i++) 
  { 
    if (has_extension( argv[i], "pro" )) 
      set_file_name( &project_file, argv[i] ); 
    else if (has_extension( argv[i], "lex" )) 
      set_file_name( &lexicon_file, argv[i] );
    else if (has_extension( argv[i], "all" )) 
      set_binary_file_name( &rule_file, argv[i] );
    else if (has_extension( argv[i], "sym" )) 
      set_binary_file_name( &symbol_file, argv[i] );
    else if (has_extension( argv[i], "prelex") )
      set_binary_file_name( &prelex_file, argv[i] );
    else if (strcmp_no_case( argv[i], "-binary" ) == 0
	     || strcmp_no_case( argv[i], "-b" ) == 0) 
    { 
      mallex_mode = BINARY_MODE; 
    } 
    else if (strcmp_no_case( argv[i], "-readable" ) == 0
	     || strcmp_no_case( argv[i], "-r" ) == 0) 
    { 
      mallex_mode = TEXT_MODE; 
    } 
    else if (strcmp_no_case( argv[i], "-prelex" ) == 0
	     || strcmp_no_case( argv[i], "-p" ) == 0) 
    { 
      mallex_mode = PRELEX_MODE; 
    } 
    else 
      complain( "Illegal argument \"%s\".", argv[i] );
  }
  if (project_file != NULL) 
    read_project_file( project_file );
  if (rule_file == NULL) 
    complain( "Missing allomorph rule file name." );
  if (symbol_file == NULL) 
    complain( "Missing symbol file name." );

  /* Init modules. */
  init_values();
  init_symbols( symbol_file );
  init_hangul();
  init_transmit();
  init_lex_compiler( rule_file );
  init_scanner();

  /* Set mallex options to default values. */
  options = mallex_options;
  allo_format = new_string( "%s: %f", NULL );
  use_display = FALSE;

  /* Set mallex options by user scripts. */
  if (project_file != NULL) 
    execute_set_commands( project_file, "mallex:" );
  malagarc_path = NULL;
#ifdef POSIX
  TRY 
    malagarc_path = absolute_path( "~/.malagarc", NULL );
  IF_ERROR 
    RESUME;
  END_TRY;
#endif
#ifdef WIN32
  TRY 
    malagarc_path = absolute_path( "~\\malaga.ini", NULL );
  IF_ERROR 
    RESUME;
  END_TRY;
#endif
  if (malagarc_path != NULL && file_exists( malagarc_path ))
    execute_set_commands( malagarc_path, "mallex:" );
  free_mem( &malagarc_path );

  if (mallex_mode == INTERACTIVE_MODE) 
  { 
    init_debugger( display_where, mallex_commands );
    rule_systems[0].rule_sys = allo_rule_sys;
    rule_systems[0].name = "all";
    init_breakpoints( 1, rule_systems );
    program_message();
    command_loop( program_name, mallex_commands );
    terminate_breakpoints();
    terminate_debugger();
  } 
  else 
  { 
    if (lexicon_file == NULL) 
      complain( "missing lexicon file name" );
    switch (mallex_mode)
    {
    case TEXT_MODE:
      generate_allos_for_file( lexicon_file, NULL, TRUE );
      print_lex_buffer( stdout, allo_format );
      break;
    case BINARY_MODE:
      generate_allos_for_file( lexicon_file, prelex_file, TRUE );
      set_binary_file_name( &object_file, lexicon_file );
      write_lex_buffer( object_file );
      free_mem( &object_file );
      break;
    case PRELEX_MODE:
      generate_allos_for_file( lexicon_file, prelex_file, FALSE );
      s = replace_extension( lexicon_file, "prelex" );
      set_binary_file_name( &object_file, s );
      free_mem( &s );
      write_prelex_file( object_file );
      free_mem( &object_file );
      break;
    default:
      complain( "Internal error." );
    }
    print_lex_statistics( stderr );
  }

  free_aliases();
  free_mem( &base_feat_string );
  free_mem( &allo_format );
  stop_display_process();
  terminate_lex_compiler();
  terminate_hangul();
  terminate_symbols();
  terminate_transmit();
  terminate_values();
  terminate_scanner();
  terminate_patterns();
  free_switches();
  free_mem( &rule_file );
  free_mem( &symbol_file );
  free_mem( &lexicon_file );
  free_mem( &project_file );
  terminate_input();
  terminate_basic();
  return 0;
}

/* End of file. =============================================================*/