/* this file contains code derived from : */
/***********************************************************************/
/*                                                                     */
/*                           Objective Caml                            */
/*                                                                     */
/*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
/*                                                                     */
/*  Copyright 1996 Institut National de Recherche en Informatique et   */
/*  en Automatique.  All rights reserved.  This file is distributed    */
/*  under the terms of the GNU Library General Public License.         */
/*                                                                     */
/***********************************************************************/

/* $Id: lexing.c,v 1.14 1999/11/17 18:57:11 xleroy Exp $ */

/* The table-driven automaton for lexers generated by camllex. */

#include "caml/fail.h"
#include "caml/mlvalues.h"
#include "caml/callback.h"
#include "caml/memory.h"

struct lexer_buffer {
  value refill_buff;
  value lex_buffer;
  value lex_buffer_len;
  value lex_abs_pos;
  value lex_start_pos;
  value lex_curr_pos;
  value lex_last_pos;
  value lex_last_action;
  value lex_eof_reached;
  value lex_mem;
};

struct lexing_table {
  value lex_base;
  value lex_backtrk;
  value lex_default;
  value lex_trans;
  value lex_check;
  value lex_base_code;
  value lex_backtrk_code;
  value lex_default_code;
  value lex_trans_code;
  value lex_check_code;
  value lex_code;
};

#if defined(ARCH_BIG_ENDIAN) || SIZEOF_SHORT != 2
#define Short(tbl,n) \
  (*((unsigned char *)((tbl) + (n) * 2)) + \
          (*((schar *)((tbl) + (n) * 2 + 1)) << 8))
#else
#define Short(tbl,n) (((short *)(tbl))[n])
#endif

#define ENGINE_PREFIX \
  int state, base, backtrk, c;						\
									\
  state = Int_val(start_state);						\
  lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos;	\
  lexbuf->lex_last_action = Val_int(-1);				\
									\
  while(1) {								\
    /* Lookup base address or action number for current state */	\
    base = Short(tbl->lex_base, state);					\
    if (base < 0) CAMLreturn (Val_int(-base-1));                        \
    /* See if it's a backtrack point */					\
    backtrk = Short(tbl->lex_backtrk, state);				\
    if (backtrk >= 0) {							\
      lexbuf->lex_last_pos = lexbuf->lex_curr_pos;			\
      lexbuf->lex_last_action = Val_int(backtrk);			\
    }                                                                   \
    if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len)	{               \
      if (lexbuf->lex_eof_reached == Val_true)			        \
	{ c = 0; goto suffix; }                              	        \
      else								\
        { 								\
          callback(lexbuf->refill_buff, (value)lexbuf);                 \
	  if (lexbuf->lex_eof_reached == Val_true)			\
	    { c = 0; goto suffix; }					\
	}                                                               \
    }
	      
	  

 
#define ENGINE_SUFFIX							\
    suffix:                                                             \
    /* Determine next state */						\
    if (Short(tbl->lex_check, base + c) == state)			\
      state = Short(tbl->lex_trans, base + c);				\
    else								\
      state = Short(tbl->lex_default, state);				\
    /* If no transition on this char, return to last backtrack point */	\
    if (state < 0) {							\
      lexbuf->lex_curr_pos = lexbuf->lex_last_pos;			\
      CAMLreturn (lexbuf->lex_last_action);				\
    }else{								\
      /* Erase the EOF condition only if the EOF pseudo-character was	\
         consumed by the automaton (i.e. there was no backtrack above)	\
       */								\
      if (c == 0) lexbuf->lex_eof_reached = Val_bool (0);		\
    }									\
  }									\

#define cur_char(res,lexbuf) \
   res = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos)); \
   lexbuf->lex_curr_pos += 2

#define get_and_refill(res,lexbuf)	     				 \
  if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len) {	        	 \
    if (lexbuf->lex_eof_reached == Val_true)				 \
      failwith("Unexpected end of stream during lexing");		 \
    else								 \
      {									 \
        callback(lexbuf->refill_buff, (value)lexbuf);                    \
	if (lexbuf->lex_eof_reached == Val_true)			 \
	  failwith("Unexpected end of stream during lexing");		 \
      }									 \
  }                                                                      \
  cur_char(res,lexbuf)
  
value lex_engine_classify_fun(
	       value classify,
	       struct lexing_table *tbl, 
	       value start_state, 
	       struct lexer_buffer *lexbuf)     /* ML */
{
  CAMLparam3(classify, *((value*)&tbl), *((value*)&lexbuf));
  ENGINE_PREFIX
    c = Int_val( callback(classify, (value)lexbuf) );
  ENGINE_SUFFIX
}

value lex_engine_8bit(
	       value class_table,
	       struct lexing_table *tbl, 
	       value start_state, 
	       struct lexer_buffer *lexbuf)     /* ML */
{
  CAMLparam3(class_table, *((value*)&tbl), *((value*)&lexbuf));
  ENGINE_PREFIX
    cur_char(c,lexbuf);
    c = Int_val(Field(class_table, c));
  ENGINE_SUFFIX
}

value lex_engine_tiny_8bit(
	       value class_table,
	       struct lexing_table *tbl, 
	       value start_state, 
	       struct lexer_buffer *lexbuf)     /* ML */
{
  CAMLparam3(class_table, *((value*)&tbl), *((value*)&lexbuf));
  ENGINE_PREFIX
    cur_char(c,lexbuf);
    c = Byte_u(class_table, c);
  ENGINE_SUFFIX
}

value lex_engine_tiny_utf8(
	       value class_table,
	       value others,
	       struct lexing_table *tbl, 
	       value start_state, 
	       struct lexer_buffer *lexbuf)     /* ML */
{
  CAMLparam4(class_table, others, *((value*)&tbl), *((value*)&lexbuf));
  int val;
  int classes_in_table = string_length(class_table);

  ENGINE_PREFIX
#define next(dec)				\
	get_and_refill(c,lexbuf);		\
	if ( c >= 0x80 && c < 0xc0 )		\
	  val |= (c-0x80)<<dec;	\
	else					\
	  goto invalid_utf

    cur_char(c,lexbuf);
    if (c < 0x80)
      val = c;
    else if ( c >= 0xc0 && c < 0xe0 )
      {
	val = (c - 0xc0)<<6;
	next(0);
      }
    else if ( c >= 0xe0 && c < 0xf0 )
      {
	val = (c-0xe0)<<12;
	next(6);
	next(0);
      }
    else if ( c >= 0xf0 && c < 0xf8 )
      {
	val = (c-0xf0)<<18;
	next(12);
	next(6);
	next(0);
      }
    else 
      goto invalid_utf;
#undef next

    if (val >= classes_in_table)
      c = Int_val(callback(others, Val_int(val)));
    else
      c = Byte_u(class_table, val);
    // printf("UTF-8 code point: %i   class: %i\n", val, c);
  ENGINE_SUFFIX

invalid_utf:
  failwith ("Invalid UTF-8 stream");
}

