/* This file is part of Malaga, a system for Natural Language Analysis.
 * Copyright (C) 1995-1999 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* description ==============================================================*/

/* This program takes a lexicon file and compiles it to internal format. */

/* includes =================================================================*/

#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include <setjmp.h>
#include <time.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "symbols.h"
#include "scanner.h"
#include "rule_type.h"
#include "rules.h"
#include "files.h"
#include "lex_compiler.h"
#include "input.h"
#include "commands.h"
#include "options.h"
#include "breakpoints.h"
#include "debugger.h"
#include "display.h"

#ifdef HANGUL
#include "hangul.h"
#endif

#undef GLOBAL
#define GLOBAL

/* constants ================================================================*/

LOCAL string_t standard_allo_format = "\"%s\": %c";

/* variables ================================================================*/

LOCAL string_t allo_format;
/* format of allomorph output, %s for surf, %c for cat */

LOCAL bool_t lex_buffer_to_output = FALSE;
/* indicates whether the lexicon buffer can be printed. */

LOCAL string_t program_name; /* this is set to argv[0] by main */

LOCAL string_t lexicon_file, rule_file, symbol_file, project_file;

LOCAL string_t base_category_string; /* last base category */

LOCAL FILE *output_stream; /* where a visible output will be printed to */

/* functions ================================================================*/

LOCAL void error_handler (string_t format, ...) NO_RETURN;
LOCAL void error_handler (string_t format, ...)
/* Print an error message and exit or return to command loop. */
{
  va_list arg;
  string_t file_name, rule_name;
  int_t line, column;
  FILE *stream;

  fflush (stdout);

  if (error_jump_point != NULL)
  {
    stream = stdout;
    fprintf (stream, "error: ");
  }
  else
  {
    stream = stderr;
    fprintf (stream, "%s: ", program_name);
  }

  if (executing_rule) 
  {
    /* Get source file position.
     * (Subtract 1 from <pc> because when an instruction is executed,
     * <pc> usually already points to next instruction.) */
    source_of_instr (executed_rule_sys, pc - 1, &line, NULL, &file_name,
		     &rule_name);
    column = 0;
    
    if (line != -1)
      fprintf (stream, "file \"%s\", line %ld, rule \"%s\": ", 
	       name_in_path (file_name), line, rule_name);
    if (current_line_number () != -1)
      fprintf (stream, "(file \"%s\", line %ld): ", 
	       name_in_path (lex_entry_file_name), lex_entry_line_number);
    
    executing_rule = FALSE;
  } 
  else if (! in_debugger && lex_entry_file_name != NULL)
  {
    /* An entry has been run through allomorph rules. */
    fprintf (stream, "file \"%s\", line %ld: ", 
	     name_in_path (lex_entry_file_name), lex_entry_line_number);
    file_name = lex_entry_file_name;
    line = lex_entry_line_number;
    column = 0;
  }
  else if (! in_debugger && current_line_number () != -1)
  {
    /* An entry has been parsed. */
    fprintf (stream, "file \"%s\", line %ld, column %ld: ", 
	     name_in_path (current_file_name ()), current_line_number (), 
	     current_column ());
    file_name = current_file_name ();
    line = current_line_number ();
    column = current_column ();
  }
  else
  {
    line = -1;
    column = 0;
  }

  va_start (arg, format);
  vfprintf (stream, format, arg);
  va_end (arg);

  fprintf (stream, "\n");
  
  if (getenv ("MALAGA_MODE") != NULL && line != -1)
    printf ("SHOW \"%s\":%ld:%ld\n", file_name, line, column);

  if (! in_debugger && current_line_number () != -1) 
    {
      stop_scanner ();
      lex_entry_file_name = NULL;
      lex_entry_line_number = -1;
    }

  if (error_jump_point != NULL)
    longjmp (*error_jump_point, 1);
  else
    exit (1);
}

/*---------------------------------------------------------------------------*/

GLOBAL void print_rule (void)
/* Print the name of the current rule. */
{
  rule_t *rule = executed_rule_sys->rules + executed_rule_number;
  
  printf ("at rule \"%s\"\n", executed_rule_sys->strings + rule->name);
}

/*---------------------------------------------------------------------------*/

LOCAL void do_rule (string_t arguments)
/* Print rule name. */
{
  if (pc == -1)
    error ("no rule executed");
  parse_end (arguments);
  print_rule ();
}

LOCAL command_t rule_command =
{
  "rule", do_rule,
  "Show the name of the executed rule.\n"
  "Usage: rule\n"
  "\"rule\" can only be used in debug mode or after a rule execution error.\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void display_allomorph (void)
/* Generate result file and start TCL program to display allomorphs. */
{
  start_display_process ();
  fprintf (display_stream, "allomorph\n");
  print_lex_buffer (display_stream, "%n \"%s\" {%c}");
  fprintf (display_stream, "end\n");
  fflush (display_stream);
}

/*---------------------------------------------------------------------------*/

LOCAL void display_result (void)
/* Display result in the modes that have been switched on after analysis. */
{
  if (show_output) 
    print_lex_buffer (stdout, standard_allo_format);

  if (show_result)
    display_allomorph ();
}

/*---------------------------------------------------------------------------*/

LOCAL void do_output (string_t arguments)
/* Print output of last allomorph generation. */
{
  parse_end (arguments);

  if (! lex_buffer_to_output)
    error ("no previous allomorph generation");

  print_lex_buffer (stdout, standard_allo_format);
}

LOCAL command_t output_command = 
{
  "output o", do_output,
  "Show output of last allomorph generation.\n"
  "Usage: output\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void do_result (string_t arguments)
/* Show result of last allomorph generation. */
{
  parse_end (arguments);

  if (! lex_buffer_to_output)
    error ("no previous allomorph generation");

  display_allomorph ();
}

LOCAL command_t result_command = 
{
  "result res", do_result,
  "Show result of last allomorph generation.\n"
  "Usage: result\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void do_ga_file (string_t arguments)
/* Generate allomorphs of the base lexicon with name <file_name>.
 * Write the allomorphs into file "<file_name>.cat". */
{
  string_t file_name, lexicon_path, result_path;

  if (in_debugger)
    error ("in debug mode");

  file_name = parse_word (&arguments);
  lexicon_path = absolute_path (file_name, NULL);
  free_mem (&file_name);

  if (*arguments != EOS)
  {
    file_name = parse_word (&arguments);
    result_path = absolute_path (file_name, NULL);
    free_mem (&file_name);
  }
  else
    result_path = concat_strings (lexicon_path, ".cat", NULL);
    
  parse_end (arguments);

  close_stream (&output_stream, NULL);

  set_debug_mode (RUN_MODE, NULL);
  lex_buffer_to_output = FALSE;
  generate_allos_for_file (lexicon_path);
  lex_buffer_to_output = TRUE;
  output_stream = open_stream (result_path, "w");
  print_lex_buffer (output_stream, allo_format);
  print_lex_statistics (stdout);
  
  close_stream (&output_stream, result_path);

  free_mem (&result_path);
  free_mem (&lexicon_path);
}

LOCAL command_t ga_file_command =
{
  "ga-file gaf", do_ga_file,
  "Generate allomorphs from the entries in a lexicon file.\n"
  "Usage: ga-file <lexicon-file> [<allo-file>]\n"
  "The results are written to \"<allo-file>\".\n"
  "If <allo-file> is missing, they are written to \"<lexicon-file>.cat\".\n"
  "\"ga-file\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void do_debug_file (string_t arguments)
/* Generate allomorphs of the base lexicon with name <file_name>.
 * Execute rules in debug mode. */
{
  string_t file_name, lexicon_path;

  if (in_debugger)
    error ("in debug mode");

  file_name = parse_word (&arguments);
  lexicon_path = absolute_path (file_name, NULL);
  free_mem (&file_name);
  parse_end (arguments);

  set_debug_mode (STEP_MODE, allo_rule_sys);
  lex_buffer_to_output = FALSE;
  generate_allos_for_file (lexicon_path);
  lex_buffer_to_output = TRUE;

  free_mem (&lexicon_path);
}

LOCAL command_t debug_file_command =
{
  "debug-file df", do_debug_file,
  "Generate allomorphs from the entries in a lexicon file.\n"
  "Execute the rules in debug mode.\n"
  "Usage: debug-file <lexicon-file>\n"
  "\"debug-file\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void generate_allomorphs_for_line (string_t arguments)
/* Generate allomorphs for <arguments>, which should consist
 * of a file name and a line number. */
{
  string_t lexicon_name, lexicon_path;
  int_t line;

  if (in_debugger)
    error ("in debug mode");
  
  lexicon_name = parse_word (&arguments);
  lexicon_path = absolute_path (lexicon_name, NULL);
  free_mem (&lexicon_name);

  line = parse_int (&arguments);
  parse_end (arguments);

  lex_buffer_to_output = FALSE;
  generate_allos_for_line (lexicon_path, line);
  lex_buffer_to_output = TRUE;
  free_mem (&lexicon_path);
}

/*---------------------------------------------------------------------------*/

LOCAL void do_ga_line (string_t arguments)
/* Generate allomorphs for <arguments>, which should consist
 * of a file name and a line number. */
{
  set_debug_mode (RUN_MODE, NULL);
  generate_allomorphs_for_line (arguments);
  display_result ();
}

LOCAL command_t ga_line_command = 
{
  "ga-line gal", do_ga_line,
  "Generate allomorphs from a single entry in a file.\n"
  "Usage: ga-line <file> <line>\n"
  "The first lexicon entry at or behind <line> in <file> is read in.\n"
  "\"ga-line\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void do_debug_line (string_t arguments)
/* Generate an allomorph for <arguments>, which should consist
 * of a file name and a line number, in debugger mode. */
{
  set_debug_mode (STEP_MODE, allo_rule_sys);
  generate_allomorphs_for_line (arguments);
}

LOCAL command_t debug_line_command = 
{
  "debug-line dl", do_debug_line,
  "Generate allomorphs from a single entry in a file.\n"
  "Execute allomorph rules in debug mode.\n"
  "Usage: debug-line <file> <line>\n"
  "The first lexicon entry at or behind <line> in <file> is read in.\n"
  "Allomorph rule execution stops at the first statement.\n"
  "\"debug-line\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void generate_allomorphs (string_t arguments)
/* Generate allomorphs for <arguments>. */
{
  /* if no argument given, re-analyze last argument */
  if (*arguments == EOS)
  {
    if (base_category_string == NULL)
      error ("no previous base category");
  }
  else
  {
    free_mem (&base_category_string);
    base_category_string = new_string (arguments, NULL);
  }

  lex_buffer_to_output = FALSE;
  generate_allos_for_string (base_category_string);
  lex_buffer_to_output = TRUE;
}

/*---------------------------------------------------------------------------*/

LOCAL void do_ga (string_t arguments)
/* Generate allomorphs for <arguments>. */
{
  if (in_debugger)
    error ("in debug mode");

  set_debug_mode (RUN_MODE, NULL);
  generate_allomorphs (arguments);
  display_result ();
}

LOCAL command_t ga_command =
{
  "ga", do_ga,
  "Generate allomorphs from a category argument.\n"
  "Usage:\n"
  "  ga <category> -- generate allomorphs for <category>\n"
  "  ga -- re-generate allomorphs for the last argument\n"
  "The allomorphs are printed on screen.\n"
  "\"ga\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void do_debug_entry (string_t arguments)
/* Generate allomorphs for <arguments>.
 * Execute allomorph rules in debug mode. */
{
  if (in_debugger)
    error ("in debug mode");

  set_debug_mode (STEP_MODE, allo_rule_sys);
  generate_allomorphs (arguments);
}

LOCAL command_t debug_entry_command =
{
  "debug-entry ga-debug gad", do_debug_entry,
  "Generate allomorphs from the category argument. "
  "Execute allomorph rules in debug mode.\n"
  "Usage:\n"
  "  debug-entry <category> -- generate allomorphs for <category>\n"
  "  debug-entry -- re-generate allomorphs for the last argument\n"
  "Rule execution stops at the first statement.\n"
  "The allomorphs are printed on screen.\n"
  "\"debug\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void do_allo_format_option (string_t arguments) 
/* Change allomorph output line to "arguments" */
{
  if (*arguments == EOS) 
  {
    string_t format = new_string_readable (allo_format, NULL);

    printf ("allo-format: %s\n", format);
    free_mem (&format);
  }
  else 
  {
    string_t new_format = parse_word (&arguments);

    free_mem (&allo_format);
    allo_format = new_format;
  }
}

LOCAL command_t allo_format_option =
{
  "allo-format", do_allo_format_option,
  "Describe the format in which generated allomorphs will be printed.\n"
  "Usage: allo-format <string>\n"
  "<string> may contain the following special sequences:\n"
  "  %c -- allomorph category\n"
  "  %n -- allomorph number\n"
  "  %s -- allomorph surface\n"
};

/* mallex commands ==========================================================*/

LOCAL command_t *mallex_options[] = 
{
  &alias_option, &allo_format_option, &display_option,
#ifdef HANGUL
  &hangul_option,
#endif
  &hidden_option, &output_option, &result_option, &sort_records_option, 
  &switch_option, &variables_option,
  NULL
};

LOCAL command_t *mallex_commands[] = 
{
  &break_command, &debug_entry_command, &debug_file_command, 
  &debug_line_command, &delete_command, &ga_command, &ga_file_command, 
  &ga_line_command, &get_command, &go_command, &help_command, &list_command, 
  &next_command, &output_command, &print_command, &quit_command, 
  &result_command, &rule_command, &run_command, &set_command, &step_command, 
  &trace_command, &variables_command, &walk_command,
  NULL
};

/*---------------------------------------------------------------------------*/

LOCAL void program_message (void)
/* Print some information about the program. */
{
  printf ("%s (%s) - Copyright (C) 1995-1999 Bjoern Beutel\n",
	  program_name, MALAGA_VERSION);
  printf ("It is part of Malaga, a system for Natural Language Analysis.\n");
  printf ("This program comes with ABSOLUTELY NO WARRANTY.\n");
  printf ("This is free software which you may redistribute "
	  "under certain conditions.\n");
  printf ("For details, refer to the GNU General Public License.\n");
}

/*---------------------------------------------------------------------------*/

LOCAL void read_project_file (string_t project_file)
/* Read the project file. */
{
  FILE *project_stream;

  project_stream = open_stream (project_file, "r");
  while (! feof (project_stream))  
  {
    string_t project_line, project_line_ptr, argument, path, extension;
    string_t *file_name;
    
    project_line = read_line (project_stream);
    project_line_ptr = project_line;
    
    if (*project_line_ptr != EOS)
    {
      argument = parse_word (&project_line_ptr);
      
      extension = NULL;
      file_name = NULL;
      if (strcmp_no_case (argument, "sym:") == 0) 
      {
	file_name = &symbol_file;
	extension = "sym_c";
      } 
      else if (strcmp_no_case (argument, "lex:") == 0) 
      {
	file_name = &lexicon_file;
	extension = NULL;
      } 
      else if (strcmp_no_case (argument, "all:") == 0) 
      {
	file_name = &rule_file;
	extension = "all_c";
      } 
      else if (strcmp_no_case (argument, "include:") == 0) 
      {
	string_t include_file = parse_word (&project_line_ptr);
	string_t include_path = absolute_path (include_file, project_file);
	
	parse_end (project_line_ptr);
	read_project_file (include_path);
	free_mem (&include_path);
	free_mem (&include_file);
      }
      
      free_mem (&argument);
      
      if (file_name != NULL && *file_name == NULL && *project_line_ptr != EOS) 
      {
	argument = parse_word (&project_line_ptr);
	path = absolute_path (argument, project_file);
	free_mem (&argument);
	set_file_name (file_name, path, extension);
	free_mem (&path);
      }
      
    }
    free_mem (&project_line);
  }
  
  close_stream (&project_stream, project_file);
}

/*---------------------------------------------------------------------------*/

GLOBAL int main (int argc, string_t argv[])
/* The main function of "mallex". */
{
  enum {INTERACTIVE_MODE, LEXICON_MODE, READABLE_MODE} mallex_mode;
  int_t i;
  string_t malagarc_name;

  error = error_handler;
  mallex_mode = LEXICON_MODE;
  program_name = name_in_path (argv[0]);

#ifdef HANGUL
  init_hangul ();
#endif

  if (argc == 2 && (strcmp_no_case (argv[1], "-version") == 0
		    || strcmp_no_case (argv[1], "-v") == 0))
  {
    program_message ();
    exit (0);
  }

  for (i = 1; i < argc; i++) 
  {
    string_t argument;

    if (*argv[i] == '-') 
      argument = new_string (argv[i], NULL);
    else
      argument = absolute_path (argv[i], NULL);
    
    if (has_extension (argument, "pro"))
      set_file_name (&project_file, argument, NULL);
    else if (has_extension (argument, "lex"))
      set_file_name (&lexicon_file, argument, NULL);
    else if (has_extension (argument, "all_c"))
      set_file_name (&rule_file, argument, NULL);
    else if (has_extension (argument, "sym_c"))
      set_file_name (&symbol_file, argument, NULL);
    else if (strcmp_no_case (argument, "-interactive") == 0
	     || strcmp_no_case (argument, "-i") == 0)
      mallex_mode = INTERACTIVE_MODE;
    else if (strcmp_no_case (argument, "-readable") == 0
	     || strcmp_no_case (argument, "-r") == 0)
      mallex_mode = READABLE_MODE;
    else
      error ("illegal argument \"%s\"", argument);

    free_mem (&argument);
  }

  if (project_file != NULL) 
    read_project_file (project_file);

  if (rule_file == NULL) 
    error ("missing allomorph rule file name");

  if (symbol_file == NULL) 
    error ("missing symbol file name");

  init_values ();
  init_symbols (symbol_file);
  init_lex_compiler (rule_file);
  options = mallex_options;
  allo_format = new_string ("\"%s\": %c", NULL);
  show_output = TRUE;
  show_result = FALSE;

  if (project_file != NULL) 
    execute_set_commands (project_file, "mallex:");
  malagarc_name = absolute_path ("~/.malagarc", NULL);
  if (file_exists (malagarc_name))
    execute_set_commands (malagarc_name, "mallex:");
  free_mem (&malagarc_name);
      
  if (mallex_mode == INTERACTIVE_MODE) 
  {
    rule_sys_name_t rule_systems[1];
    
    program_message ();
    init_debugger (print_rule, mallex_commands);
    
    rule_systems[0].rule_sys = allo_rule_sys;
    rule_systems[0].name = "all";
    init_breakpoints (1, rule_systems);
    
    command_loop (program_name, mallex_commands);
    
    terminate_breakpoints ();
    terminate_debugger ();
  } 
  else 
  {
    if (lexicon_file == NULL) error ("missing lexicon file name");
    
    generate_allos_for_file (lexicon_file);

    if (mallex_mode == READABLE_MODE)
      print_lex_buffer (stdout, allo_format);
    else 
    {
      string_t object_file;
    
      object_file = replace_extension (lexicon_file, "lex_c");
      write_lex_buffer (object_file);
      free_mem (&object_file);
    }

    print_lex_statistics (stderr);
  }

  free_mem (&base_category_string);
  free_mem (&allo_format);
  terminate_lex_compiler ();
  terminate_symbols ();
  terminate_values ();
  free_switches ();

  free_mem (&rule_file);
  free_mem (&symbol_file);
  free_mem (&lexicon_file);
  free_mem (&project_file);
  close_stream (&output_stream, NULL);

#ifdef HANGUL
  terminate_hangul ();
#endif

  return 0;
}

/* end of file ==============================================================*/
