(*pp ocamlrun ../common/version_filter -pp "camlp4o -impl" *)
(************************************************************
 *
 * A part of Regexp/OCaml module.
 * 
 * (c) 2002-2003 Yutaka Oiwa. All Rights Reserved.
 *
 * This file is distributed under the terms of the Q Public
 * License version 1.0.
 *
 ************************************************************)
(* $Id: translator.ml,v 1.20 2004/06/09 21:29:47 oiwa Exp $ *)

open Printf

open Pcaml

open Parse_regexp

#load "pa_extend.cmo";;
#load "q_MLast.cmo";;

#if 3.06 | 3.07pre | 3.07 | 3.07_5
#else
open Lexing
#endif

let package = "regexp"

let debug = 
  try ignore (Sys.getenv "MR_DEBUG"); true with _ -> false

let no_compile = ref 
  (try ignore (Sys.getenv "MR_NO_COMPILE"); true with _ -> false)

let debug_no_compile () = 
  if !no_compile then "true" else "false"

let _ = if debug then
  (Printf.eprintf "LOADED translator (%s).\n" (debug_no_compile ()); flush stderr)

let auto_converter = 
  ref
    [ "int", "int_of_string";
      "float", "float_of_string";
      "bool", "bool_of_string";
      "string", "";
      "int32", "Int32.of_string";
      "int64", "Int64.of_string";
      "nativeint", "Nativeint.of_string";
    ]

let cflags_table = []
  

(*--- main body ---*)

type cvt_pattern = 
  | Raw
  | TypeConvert of MLast.ctyp * bool
  | FuncConvert of MLast.expr

type bind_pattern = 
    Bind of string * cvt_pattern * MLast.expr option
  | Discard

type patmatch_clause = 
    { pm_options : string list;
      pm_regexp : string;
      pm_regexp_loc : MLast.loc;
      pm_pattern : (bind_pattern list) option;
      pm_when : MLast.expr option;
      pm_body : MLast.expr;
      pm_loc : MLast.loc }

type clause = 
    Wildcard of string option * MLast.expr
  | PatMatch of patmatch_clause

let compiled_regexp_nocache_ast re ~loc = 
  let re = pcre_string_of_regexp re in
  <:expr<Pcre.regexp ~flags : [`DOLLAR_ENDONLY; `DOTALL] $str: String.escaped re$>>

let compiled_regexp_ast flags re ~loc = 
  let re_str = pcre_string_of_regexp re in
  match
    Declare_once.lookup_cached ~package re_str
  with
    Some s -> <:expr< $lid:s$ >>
  | None -> begin
      let loc = Declare_once.dummy_loc in
      let re_exp = compiled_regexp_nocache_ast re loc in
      let name = Declare_once.gensym ~package in
      let exp = <:expr< $lid:name$ >> in
      Declare_once.declare ~package ~tag:re_str name
	(Declare_once.Expr re_exp);
      if debug then
	(Printf.eprintf "PRECOMPILE(%s): %s for /%s/\n"
	   (debug_no_compile ()) name re_str; flush stderr);
      exp
  end

let rec convert_modpath_to_expr ~loc str = 
  try
    let p = String.index str '.' in
    let before, after =
      String.sub str 0 p, 
      String.sub str (p + 1) (String.length str - p - 1) in
    <:expr< $uid:before$ . $ convert_modpath_to_expr ~loc after $ >>
  with
    Not_found ->
      <:expr< $lid:str$ >>

let get_converter ~loc typ = 
  let rec iter = function
      <:ctyp< t >> ->
	<:expr< of_string >>
    | <:ctyp< $lid:typ$ >> -> begin
	try
	  let fname = List.assoc typ !auto_converter in
	  convert_modpath_to_expr fname ~loc
	with
	  Not_found ->
	    <:expr< $lid:(typ ^ "_of_string")$ >>
    end
    | <:ctyp< $uid:m$ . $t$ >> ->
	<:expr< $uid:m$ . $ iter t $ >>
    | ty -> 
	Stdpp.raise_with_loc (MLast.loc_of_ctyp ty)
	  (Failure "unknown type kind: cannot generate auto-conversion")
  in
  match typ with
    <:ctyp< string >> -> None
  | _ -> Some (iter typ)

let get_converted_exp ~loc typ exp = 
  match get_converter ~loc typ with
    None -> <:expr< ( $exp$ : $typ$ ) >>
  | Some f -> <:expr< ( $f$ $exp$ : $typ$ ) >>

let compiled_regexp flags re ~loc = 
  if !no_compile then
    compiled_regexp_nocache_ast re ~loc
  else
    compiled_regexp_ast flags re ~loc

let convert_pattern_match p ~argvar ~tempname rest = 
  let loc = p.pm_loc in
  let failwith s = 
    Stdpp.raise_with_loc loc (Failure s)
  in
  let parsed_regexp = 
    try parse_regexp p.pm_regexp with
      Parse_regexp.Parse_error(st,ed,s) ->
#if 3.06 | 3.07pre | 3.07 | 3.07_5
	let cpos_start = fst p.pm_regexp_loc in
#else
        let loc_start = fst p.pm_regexp_loc in
	let cpos_start = loc_start.pos_cnum in
#endif
	let cpos = cpos_start + 1 in
#if 3.06 | 3.07pre | 3.07 | 3.07_5
	let loc = (cpos + st, cpos + ed) in
#else
	let loc = ({ loc_start with pos_cnum = cpos + st},
		   { loc_start with pos_cnum = cpos + ed}) in
#endif
	Stdpp.raise_with_loc loc
	  (Failure (sprintf "bad regexp: %s" s))
    | Failure s -> 
	  Stdpp.raise_with_loc p.pm_regexp_loc
	  (Failure ("bad regexp: " ^ s))
  in
  let backref_type = make_backref_table parsed_regexp in
  let re_exp = compiled_regexp [](*flags*) parsed_regexp ~loc in
  let bindings = 
    match (p.pm_pattern : bind_pattern list option) with
      None -> []
    | Some bind_patterns ->
	if List.length bind_patterns <> Array.length backref_type then
	  failwith (sprintf "invalid regexp capture count: %d should be %d"
				     (List.length bind_patterns) (Array.length backref_type)) else ();
	snd (List.fold_left
	  (fun (cnt, prev_binds) var ->
	    match var with
	      Discard -> cnt + 1, prev_binds
	    | Bind(id, conversion, default) ->
		let rhs = 
		  match backref_type.(cnt) with
		    BR_List ->
		      failwith 
			(sprintf "regexp binding #%d may match repeatedly: cannot be bound to variable"
			   (cnt + 1))
		  | BR_Normal -> begin
		      (
		       match default with
			 Some _ ->
			   failwith 
			     (sprintf 
				"regexp binding #%d is not optional: default value not allowed"
				(cnt + 1))
		       | None -> ()
		      );
		      let rhs_str = 
			<:expr< Pcre.get_substring
			  $lid:tempname$
			$int:(string_of_int (cnt + 1))$ >> in
		      match conversion with
			Raw -> rhs_str
		      | TypeConvert(typ,false) ->
			  get_converted_exp ~loc typ rhs_str
		      | TypeConvert(_,true) ->
			  failwith
			    (sprintf 
			       "regexp binding #%d is not optional: option type not allowed"
			       (cnt + 1))
		      | FuncConvert(exp) ->
			  <:expr< ($exp$ $rhs_str$) >>
		    end
		  | BR_Optional -> begin
		      let rhs_str = 
			<:expr< try
			  Some (do { ignore (Pcre.get_substring_ofs
					       $lid:tempname$
					     $int:(string_of_int (cnt + 1))$); 
				     Pcre.get_substring
				       $lid:tempname$
				     $int:(string_of_int (cnt + 1))$ })
			with [ Not_found -> None ] >> in
		      match default, conversion with
			None, Raw -> rhs_str
		      |	Some d, Raw ->
			  ( <:expr< match $rhs_str$ with [ Some s -> s | None -> $d$ ] >> )
		      | None, TypeConvert(_, false) ->
			  failwith
			    (sprintf "regexp binding #%d is optional: option type required"
			       (cnt + 1))
		      | None, TypeConvert(typ, true) -> begin
			  match get_converter ~loc typ with
			    None -> <:expr< $rhs_str$ >>
			  | Some convert_func ->
			      ( <:expr< 
				match $rhs_str$ with 
				  [ Some $lid:tempname$ ->
				    Some ($convert_func$ $lid:tempname$ : $typ$)
				| None -> None ] >> )
		      end
		      | Some d, TypeConvert(typ, false) ->
			  let converted = get_converted_exp ~loc typ <:expr<$lid:tempname$>> in
			  ( <:expr< 
			    match $rhs_str$ with 
			      [ Some $lid:tempname$ -> $converted$
			    | None -> $d$ ] >> )
		      | Some _, TypeConvert(_, true) ->
			  failwith
			    (sprintf "regexp binding #%d has default value: option type not allowed"
			       (cnt + 1))
		      | Some d, FuncConvert(exp) ->
			  ( <:expr< 
			    match $rhs_str$ with 
			      [ Some $lid:tempname$ -> ($exp$ $lid:tempname$)
			    | None -> $d$ ] >> )
		      |	None, FuncConvert(exp) ->
			  ( <:expr< 
			    match $rhs_str$ with 
			      [ Some $lid:tempname$ -> Some ($exp$ $lid:tempname$)
			    | None -> None ] >> )
		  end
		in
		cnt + 1, (prev_binds @ [id, rhs]))
	  (0, []) bind_patterns)
  in
  match p.pm_when with
    None -> begin
      (* no when clause: simple version *)
      let body_with_bind = 
	List.fold_right 
	  (fun (id,exp) b ->
	    <:expr< let $lid:id$ = $exp$ in $b$ >>)
	  bindings p.pm_body
      in
      <:expr<
      match 
	try Some (Pcre.exec ~rex : $re_exp$ $lid:argvar$) with [Not_found -> None]
      with
	[ Some($lid:tempname$) -> $body_with_bind$
      | None -> $rest$ ]
	  >>
    end
  | Some e -> begin
      (* we have when clause: use tuple *)
      let idlist, rhslist = List.split bindings in
      let idlistp = List.map (fun id -> <:patt<$lid:id$>>) idlist in
      let idlistv = List.map (fun id -> <:expr<$lid:id$>>) idlist in
      let idlist_with_bind = 
	List.fold_right 
	  (fun (id,exp) b ->
	    <:expr< let $lid:id$ = $exp$ in $b$ >>)
	  bindings <:expr< Some ($list:idlistv$) >>
      in
      <:expr<
      match
	match 
	  try 
	    Some (Pcre.exec ~rex :$re_exp$ $lid:argvar$)
	  with
	    [ Not_found -> None ]
	with
	  [ Some ($lid:tempname$) -> $idlist_with_bind$
	| None -> None ]
      with
	[ Some($list:idlistp$) when $e$ -> $p.pm_body$
      | _ -> $rest$ ]
	  >>
  end

let parse_optional = function
    <:ctyp< option $t$ >> -> t, true
  | t -> t, false

let _ = EXTEND
  GLOBAL: expr;
  expr: LEVEL "expr1"
    [[ "match"; "with_regexp"; m = regexpmatch_clause -> m ] |
     [ UIDENT "Regexp"; "."; "match"; m = regexpmatch_clause -> m ]];
  regexpmatch_clause:
    [[ e = expr; "with"; 
      OPT "|"; c = LIST1 match_regexp_match_clause SEP "|"
      ->
       let argvar = "_regexp_str" in
       let tempname = "_regexp_sub" in
       ignore (List.fold_left
		 (fun ok (clause, loc) ->
		   if not ok then 
		     Stdpp.raise_with_loc loc (Failure "wildcard pattern must be last clause");
		   match clause with
		     Wildcard _ -> false
		   | PatMatch _ -> ok) true c);
       let body = List.fold_right
	   (fun clause rest ->
	     match clause with
	       Wildcard(Some p, body), _ ->
		 <:expr<let $lid:p$ = $lid:argvar$ in $body$ >>
	     | Wildcard(None, body), _ ->
		 <:expr< $body$ >>
	     | PatMatch p, loc ->
		 convert_pattern_match p ~argvar ~tempname rest
	   )
	   c
(*	   <:expr< raise (Match_failure($str:!Pcaml.input_file$, 
					$int:string_of_int (fst loc)$,
					$int:string_of_int (snd loc)$)) >>*)
	   <:expr< match $lid:argvar$ with [] >>
       in
       <:expr< let $lid:argvar$ = $e$ in $body$>>
   ]]
    ;
  expr: LEVEL "apply"
    [[ "compile_regexp";
       o = OPT [ "["; opts = LIST0 re_opts SEP ";"; "]" -> opts ];
       r = STRING ->
	 let re_opts = match o with None -> [] | Some l -> l in
	 (* let cflags_now = List.map (fun f -> List.assoc f cflags_table) re_opts in *)
	 let re_now =
	   try parse_regexp r
	   with _ -> 
	     failwith (sprintf "invalid regexp \"%s\": cannot be parsed" r)
	 in
	 compiled_regexp re_opts re_now ~loc
     ]]
    ;
  match_regexp_match_clause:
    [[ 
     (o, r, lr, c, l) = [ 
     o = OPT [ "["; opts = LIST0 re_opts SEP ";"; "]" -> opts ];
     (r, lr) = [ r = STRING -> r, loc ]; 
     c = OPT [ "as"; c = LIST1 match_regexp_var_pattern SEP "," -> c ]
     -> (o, r, lr, c, loc) ];
     w = OPT [ "when"; w = expr -> w ]; "->"; b = expr 
     -> PatMatch
	 { 
	   pm_options = (match o with None -> [] | Some l -> l);
	   pm_regexp = r;
	   pm_regexp_loc = lr;
	   pm_pattern = c;
	   pm_when = w;
	   pm_body = b;
	   pm_loc = l
	 }, loc
   | p = ident_pattern; "->"; e = expr
     -> Wildcard(p, e), loc ]]
    ;
  match_regexp_var_pattern:
    [[
     i = LIDENT;
     c1 = OPT  
       [ ">"; t = expr LEVEL "simple" -> FuncConvert(t) 
       | ":"; t = ctyp LEVEL "star"
	 ->
	   let base, is_option =
	     parse_optional t in
	   TypeConvert(base, is_option)];
     d = OPT
       [ "="; e = expr LEVEL "simple" -> e ];
     c2 = OPT 
       [ ":"; t = ctyp LEVEL "star"
	 ->
	   let base, is_option =
	     parse_optional t in
	   TypeConvert(base, is_option) ]
     -> Bind 
	 (i,
	  (match c1, c2 with 
	    None, None -> Raw 
	  | Some c1, None -> c1
	  | None, Some c2 -> c2
	  | Some c1, Some c2 ->
	      failwith "duplicated type specifier"
	  ), d)
    | "_" -> Discard 
    | "("; c = SELF; ")" -> c
   ]]
    ;
  ident_pattern:
    [[
     c = LIDENT -> Some c
  | "_" -> None ]]
    ;
  re_opts:
    [[ "`"; i = UIDENT -> 
      if List.mem_assoc i cflags_table then i 
      else failwith ("unknown regexp option " ^ i) ]]
    ;
END

let _ = Pcaml.add_option "-no-precompile-regexp" 
    (Arg.Unit
       (fun () ->
	 if debug then (Printf.eprintf "NO COMPILE (%s) -> " (debug_no_compile ()));
	 no_compile := true;
	 if debug then (Printf.eprintf "(%s).\n" (debug_no_compile ()); flush stderr)
       ))
    "not pre-compiling regexps."

