/* Copyright (C) 1995 Bjoern Beutel. */ /* Description. =============================================================*/ /* This program takes a lexicon file and compiles it to binary format. * It also includes an interactive allomorph rules debugger. */ /* Includes. ================================================================*/ #include #include #include #include #include #include #include #include "basic.h" #include "pools.h" #include "values.h" #include "symbols.h" #include "scanner.h" #include "rule_type.h" #include "rules.h" #include "files.h" #include "lex_compiler.h" #include "input.h" #include "commands.h" #include "commands_interactive.h" #include "options.h" #include "breakpoints.h" #include "debugger.h" #include "display.h" #include "transmit.h" #include "patterns.h" #include "hangul.h" /* Variables. ===============================================================*/ static string_t allo_format; /* Format of allomorph output. */ static bool_t lex_tree_to_output = FALSE; /* Indicates whether the lexicon tree can be printed. */ static string_t lexicon_file, rule_file, symbol_file, project_file; static string_t prelex_file; static string_t base_feat_string; /* Last base feature structure. */ /* Functions. ===============================================================*/ static void display_where( void ) /* Print the name of the current rule. */ { string_t file, rule; int_t line; source_of_instr( executed_rule_sys, pc, &line, &file, &rule ); printf( "At \"%s\", line %d, rule \"%s\".", name_in_path( file ), line, rule ); if (lex_entry_file_name != NULL) { printf( " (\"%s\", line %d)", name_in_path( lex_entry_file_name ), lex_entry_line_number ); } printf( "\n" ); if (in_emacs_malaga_mode) printf( "SHOW \"%s\":%d:0\n", file, line ); } /*---------------------------------------------------------------------------*/ static void read_lexicon_file_name( string_t *arguments ) { if (**arguments != EOS) { free_mem( &lexicon_file ); lexicon_file = parse_absolute_path( arguments, NULL ); } if (lexicon_file == NULL) complain( "Missing lexicon file name." ); } /*---------------------------------------------------------------------------*/ static void display_result( void ) /* Display result in the modes that have been switched on after analysis. */ { if (use_display) { start_display_process(); fprintf( display_stream, "allomorph\n" ); print_lex_buffer( display_stream, "%n %s {%f}" ); fprintf( display_stream, "end\n" ); fflush( display_stream ); } else print_lex_buffer( stdout, NULL ); } /*---------------------------------------------------------------------------*/ static void do_result( string_t arguments ) /* Show result of last allomorph generation. */ { parse_end( &arguments ); if (! lex_tree_to_output) complain( "No previous allomorph generation." ); display_result(); } static command_t result_command = { "result res", do_result, "Show result of last allomorph generation.\n" "Usage: result\n" }; /*---------------------------------------------------------------------------*/ static void do_read_constants( string_t arguments ) /* Read the constants in the lexicon with name on line ARGUMENTS. */ { assert_not_in_debug_mode(); read_lexicon_file_name( &arguments ); parse_end( &arguments ); read_lex_constants( lexicon_file ); } static command_t read_constants_command = { "read-constants", do_read_constants, "Read the constants from the definitions in a lexicon file.\n" "Usage: read-constants [LEXICON_FILE]\n" "If LEXICON_FILE is omitted, the previous lexicon file name is used.\n" "\"read-constants\" can't be used in debug mode.\n" }; /*---------------------------------------------------------------------------*/ static void do_ga_file( string_t arguments ) /* Generate allomorphs of a base lexicon. * Write the allomorphs readably into file. */ { string_t output_name; FILE *output_stream; assert_not_in_debug_mode(); output_stream = NULL; output_name = NULL; TRY { read_lexicon_file_name( &arguments ); if (*arguments != EOS) output_name = parse_absolute_path( &arguments, NULL ); parse_end( &arguments ); set_debug_mode( RUN_MODE, NULL ); lex_tree_to_output = FALSE; generate_allos_for_file( lexicon_file, NULL, TRUE ); lex_tree_to_output = TRUE; if (output_name != NULL) { output_stream = open_stream( output_name, "w" ); print_lex_buffer( output_stream, allo_format ); print_lex_statistics( stdout ); close_stream( &output_stream, output_name ); } } FINALLY { close_stream( &output_stream, NULL ); free_mem( &output_name ); } END_TRY; } static command_t ga_file_command = { "ga-file gaf", do_ga_file, "Generate allomorphs from the entries in a lexicon file.\n" "Usage: ga-file [LEXICON_FILE [ALLO_FILE]]\n" "If LEXICON_FILE is omitted, the previous lexicon file name is used.\n" "If ALLO_FILE is given, the results are written to \"ALLO_FILE\".\n" "The results can also be displayed by the command \"result\".\n" "\"ga-file\" can't be used in debug mode.\n" }; /*---------------------------------------------------------------------------*/ static void do_debug_ga_file( string_t arguments ) /* Generate allomorphs of the base lexicon with name in ARGUMENTS. * Execute rules in debug mode. */ { assert_not_in_debug_mode(); read_lexicon_file_name( &arguments ); parse_end( &arguments ); set_debug_mode( WALK_MODE, allo_rule_sys ); lex_tree_to_output = FALSE; generate_allos_for_file( lexicon_file, NULL, TRUE ); lex_tree_to_output = TRUE; } static command_t debug_ga_file_command = { "debug-ga-file dgaf", do_debug_ga_file, "Generate allomorphs from the entries in a lexicon file.\n" "Execute the rules in debug mode.\n" "Usage: debug-ga-file [LEXICON_FILE]\n" "If LEXICON_FILE is omitted, the previous lexicon file name is used.\n" "\"debug-ga-file\" can't be used in debug mode.\n" }; /*---------------------------------------------------------------------------*/ static void generate_allomorphs_for_line( string_t arguments ) /* Generate allomorphs for ARGUMENTS, which should consist * of a file name and a line number. */ { int_t line; line = parse_cardinal( &arguments ); read_lexicon_file_name( &arguments ); parse_end( &arguments ); lex_tree_to_output = FALSE; generate_allos_for_line( lexicon_file, line ); lex_tree_to_output = TRUE; } /*---------------------------------------------------------------------------*/ static void do_ga_line( string_t arguments ) /* Generate allomorphs for ARGUMENTS, which should consist * of a file name and a line number. */ { assert_not_in_debug_mode(); set_debug_mode( RUN_MODE, NULL ); generate_allomorphs_for_line( arguments ); display_result(); } static command_t ga_line_command = { "ga-line gal", do_ga_line, "Generate allomorphs from a single entry in a file.\n" "Usage: ga-line LINE [FILE]\n" "The first lexicon entry at or behind LINE in FILE is read in.\n" "If FILE is omitted, the previous file name is used.\n" "\"ga-line\" can't be used in debug mode.\n" }; /*---------------------------------------------------------------------------*/ static void do_debug_ga_line( string_t arguments ) /* Generate an allomorph for ARGUMENTS, which should consist * of a file name and a line number, in debugger mode. */ { assert_not_in_debug_mode(); set_debug_mode( WALK_MODE, allo_rule_sys ); generate_allomorphs_for_line( arguments ); } static command_t debug_ga_line_command = { "debug-ga-line dgal", do_debug_ga_line, "Generate allomorphs from a single entry in a file.\n" "Execute allomorph rules in debug mode.\n" "Usage: debug-ga-line LINE [FILE]\n" "The first lexicon entry at or behind LINE in FILE is read in.\n" "Allomorph rule execution stops at the first statement.\n" "If FILE is omitted, the previous file name is used.\n" "\"debug-line\" can't be used in debug mode.\n" }; /*---------------------------------------------------------------------------*/ static void generate_allomorphs( string_t arguments ) /* Generate allomorphs for lexicon entry ARGUMENTS. */ { /* If no argument given, re-analyze last argument */ if (*arguments == EOS) { if (base_feat_string == NULL) complain( "No previous base feature structure." ); } else { free_mem( &base_feat_string ); base_feat_string = new_string( arguments, NULL ); } lex_tree_to_output = FALSE; generate_allos_for_string( base_feat_string ); lex_tree_to_output = TRUE; } /*---------------------------------------------------------------------------*/ static void do_ga( string_t arguments ) /* Generate allomorphs for ARGUMENTS. */ { assert_not_in_debug_mode(); set_debug_mode( RUN_MODE, NULL ); generate_allomorphs( arguments ); display_result(); } static command_t ga_command = { "ga", do_ga, "Generate allomorphs from a feature structure argument.\n" "Usage:\n" " ga FEAT -- Generate allomorphs for feature structure FEAT.\n" " ga -- Re-generate allomorphs for the last argument.\n" "The allomorphs are shown on screen.\n" "\"ga\" can't be used in debug mode.\n" }; /*---------------------------------------------------------------------------*/ static void do_debug_ga( string_t arguments ) /* Generate allomorphs for ARGUMENTS. * Execute allomorph rules in debug mode. */ { assert_not_in_debug_mode(); set_debug_mode( WALK_MODE, allo_rule_sys ); generate_allomorphs( arguments ); } static command_t debug_ga_command = { "debug-ga dga ga-debug gad", do_debug_ga, "Generate allomorphs from the feature structure argument. " "Execute allomorph rules in debug mode.\n" "Usage:\n" " debug-ga FEAT -- Generate allomorphs for feature structure FEAT.\n" " debug-ga -- Re-generate allomorphs for the last argument.\n" "Rule execution stops at the first statement.\n" "The allomorphs are shown on screen.\n" "\"debug-ga\" can't be used in debug mode.\n" }; /*---------------------------------------------------------------------------*/ static void do_allo_format_option( string_t arguments ) /* Change allomorph output line to "arguments" */ { string_t format; if (*arguments == EOS) { format = new_string_readable( allo_format, NULL ); printf( "allo-format: %s\n", format ); free_mem( &format ); } else { format = parse_word( &arguments ); free_mem( &allo_format ); allo_format = format; } } static command_t allo_format_option = { "allo-format", do_allo_format_option, "Describe the format in which generated allomorphs will be printed.\n" "Usage: allo-format STRING\n" "STRING may contain the following special sequences:\n" " %f -- Allomorph feature structure.\n" " %n -- Allomorph number.\n" " %s -- Allomorph surface.\n" }; /* Commands. ================================================================*/ static command_t *mallex_options[] = { &alias_option, &allo_format_option, &auto_variables_option, &display_cmd_option, &hidden_option, &roman_hangul_option, &sort_records_option, &switch_option, &transmit_cmd_option, &use_display_option, NULL }; static command_t *mallex_commands[] = { &backtrace_command, &break_command, &continue_command, &debug_ga_command, &debug_ga_file_command, &debug_ga_line_command, &delete_command, &down_command, &finish_command, &frame_command, &ga_command, &ga_file_command, &ga_line_command, &get_command, &help_command, &list_command, &next_command, &print_command, &quit_command, &read_constants_command, &result_command, &run_command, &set_command, &step_command, &transmit_command, &up_command, &variables_command, &walk_command, &where_command, NULL }; /*---------------------------------------------------------------------------*/ static void read_project_file( string_t file_name ) /* Read the project file FILE_NAME. */ { FILE *project_stream; string_t include_file; string_t project_line_p, argument, extension; char_t *project_line; string_t *name_p; volatile bool_t binary = FALSE; volatile int_t line_count; static bool_t err_pos_printed; err_pos_printed = FALSE; project_stream = open_stream( file_name, "r" ); line_count = 0; while (TRUE) { TRY project_line = read_line( project_stream ); IF_ERROR { print_text( error_text, " (\"%s\", line %d)", name_in_path( file_name ), line_count + 1 ); err_pos_printed = TRUE; } END_TRY; if (project_line == NULL) break; line_count++; cut_comment( project_line ); project_line_p = project_line; if (*project_line_p != EOS) { argument = NULL; TRY { argument = parse_word( &project_line_p ); extension = NULL; name_p = NULL; if (strcmp_no_case( argument, "sym:" ) == 0) { name_p = &symbol_file; extension = "sym"; binary = TRUE; } else if (strcmp_no_case( argument, "lex:" ) == 0) { name_p = &lexicon_file; extension = "lex"; binary = FALSE; } else if (strcmp_no_case( argument, "all:" ) == 0) { name_p = &rule_file; extension = "all"; binary = TRUE; } else if (strcmp_no_case( argument, "prelex:" ) == 0) { if (prelex_file != NULL) complain( "Prelex file already defined." ); name_p = &prelex_file; extension = "prelex"; binary = TRUE; } else if (strcmp_no_case( argument, "include:" ) == 0) { include_file = parse_absolute_path( &project_line_p, file_name ); parse_end( &project_line_p ); read_project_file( include_file ); free_mem( &include_file ); } free_mem( &argument ); if (name_p != NULL && *name_p == NULL && *project_line_p != EOS) { argument = parse_absolute_path( &project_line_p, file_name ); if (! has_extension( argument, extension )) { complain( "\"%s\" should have extension \"%s\".", name_in_path( argument ), extension ); } if (binary) set_binary_file_name( name_p, argument ); else set_file_name( name_p, argument ); free_mem( &argument ); } } IF_ERROR { if (! err_pos_printed) { print_text( error_text, " (\"%s\", line %d)", name_in_path( file_name ), line_count ); err_pos_printed = TRUE; } } END_TRY; } free_mem( &project_line ); } close_stream( &project_stream, file_name ); } /*---------------------------------------------------------------------------*/ int main( int argc, char *argv[] ) /* The main function of "mallex". */ { volatile enum {INTERACTIVE_MODE, BINARY_MODE, TEXT_MODE, PRELEX_MODE} mallex_mode; int_t i; string_t malagarc_path, s; rule_sys_name_t rule_systems[1]; /* Rule system for debugger. */ string_t object_file = NULL; /* Object file for binary and prelex mode. */ mallex_mode = INTERACTIVE_MODE; init_basic( "mallex" ); init_input(); /* Parse arguments. */ if (argc == 2) { if (strcmp_no_case( argv[1], "--version" ) == 0 || strcmp_no_case( argv[1], "-version" ) == 0 || strcmp_no_case( argv[1], "-v" ) == 0) { program_message(); exit( 0 ); } else if (strcmp_no_case( argv[1], "--help" ) == 0 || strcmp_no_case( argv[1], "-help" ) == 0 || strcmp_no_case( argv[1], "-h" ) == 0) { printf( "Apply the allomorph rules on the entries of a Malaga lexicon.\n" "\n" "Usage:\n" "mallex GRAMMAR " "-- Start interactive mallex.\n" "mallex GRAMMAR -b[inary] " "-- Create binary allomorph lexicon.\n" "mallex GRAMMAR -r[eadable] " "-- Output readable allomorph lexicon.\n" "mallex GRAMMAR -p[relex] " "-- Output precompiled lexicon.\n" "mallex -v[ersion] " "-- Print version information.\n" "mallex -h[elp] " "-- Print this help.\n\n" "GRAMMAR may be \"PROJECT_FILE\" " "or \"SYM_FILE ALLO_FILE LEX_FILE [PRELEX_FILE]\".\n" "PROJECT_FILE must end on \".pro\".\n" "SYM_FILE must end on \".sym\".\n" "ALLO_FILE must end on \".all\".\n" "LEX_FILE must end on \".lex\".\n" "PRELEX_FILE must end on \".prelex\".\n" ); exit( 0 ); } } for (i = 1; i < argc; i++) { if (has_extension( argv[i], "pro" )) set_file_name( &project_file, argv[i] ); else if (has_extension( argv[i], "lex" )) set_file_name( &lexicon_file, argv[i] ); else if (has_extension( argv[i], "all" )) set_binary_file_name( &rule_file, argv[i] ); else if (has_extension( argv[i], "sym" )) set_binary_file_name( &symbol_file, argv[i] ); else if (has_extension( argv[i], "prelex") ) set_binary_file_name( &prelex_file, argv[i] ); else if (strcmp_no_case( argv[i], "-binary" ) == 0 || strcmp_no_case( argv[i], "-b" ) == 0) { mallex_mode = BINARY_MODE; } else if (strcmp_no_case( argv[i], "-readable" ) == 0 || strcmp_no_case( argv[i], "-r" ) == 0) { mallex_mode = TEXT_MODE; } else if (strcmp_no_case( argv[i], "-prelex" ) == 0 || strcmp_no_case( argv[i], "-p" ) == 0) { mallex_mode = PRELEX_MODE; } else complain( "Illegal argument \"%s\".", argv[i] ); } if (project_file != NULL) read_project_file( project_file ); if (rule_file == NULL) complain( "Missing allomorph rule file name." ); if (symbol_file == NULL) complain( "Missing symbol file name." ); /* Init modules. */ init_values(); init_symbols( symbol_file ); init_hangul(); init_transmit(); init_lex_compiler( rule_file ); init_scanner(); /* Set mallex options to default values. */ options = mallex_options; allo_format = new_string( "%s: %f", NULL ); use_display = FALSE; /* Set mallex options by user scripts. */ if (project_file != NULL) execute_set_commands( project_file, "mallex:" ); malagarc_path = NULL; #ifdef POSIX TRY malagarc_path = absolute_path( "~/.malagarc", NULL ); IF_ERROR RESUME; END_TRY; #endif #ifdef WIN32 TRY malagarc_path = absolute_path( "~\\malaga.ini", NULL ); IF_ERROR RESUME; END_TRY; #endif if (malagarc_path != NULL && file_exists( malagarc_path )) execute_set_commands( malagarc_path, "mallex:" ); free_mem( &malagarc_path ); if (mallex_mode == INTERACTIVE_MODE) { init_debugger( display_where, mallex_commands ); rule_systems[0].rule_sys = allo_rule_sys; rule_systems[0].name = "all"; init_breakpoints( 1, rule_systems ); program_message(); command_loop( program_name, mallex_commands ); terminate_breakpoints(); terminate_debugger(); } else { if (lexicon_file == NULL) complain( "missing lexicon file name" ); switch (mallex_mode) { case TEXT_MODE: generate_allos_for_file( lexicon_file, NULL, TRUE ); print_lex_buffer( stdout, allo_format ); break; case BINARY_MODE: generate_allos_for_file( lexicon_file, prelex_file, TRUE ); set_binary_file_name( &object_file, lexicon_file ); write_lex_buffer( object_file ); free_mem( &object_file ); break; case PRELEX_MODE: generate_allos_for_file( lexicon_file, prelex_file, FALSE ); s = replace_extension( lexicon_file, "prelex" ); set_binary_file_name( &object_file, s ); free_mem( &s ); write_prelex_file( object_file ); free_mem( &object_file ); break; default: complain( "Internal error." ); } print_lex_statistics( stderr ); } free_aliases(); free_mem( &base_feat_string ); free_mem( &allo_format ); stop_display_process(); terminate_lex_compiler(); terminate_hangul(); terminate_symbols(); terminate_transmit(); terminate_values(); terminate_scanner(); terminate_patterns(); free_switches(); free_mem( &rule_file ); free_mem( &symbol_file ); free_mem( &lexicon_file ); free_mem( &project_file ); terminate_input(); terminate_basic(); return 0; } /* End of file. =============================================================*/