(* $Id: netcgi_env.mli,v 1.7 2002/10/24 23:47:48 stolpmann Exp $
 * ----------------------------------------------------------------------
 *
 *)


type input_mode = 
    [ `Standard (* | `Direct *) ]

    (* `Standard: conforms to CGI
     * `Direct: input channel reads the complete HTTP message including header
     *)

type input_state =
    [ `Start | 
      `Receiving_header | `Received_header |
      `Receiving_body | `Received_body
    ]

type output_mode =
    [ `Standard (* | `Direct *) ]

    (* `Standard: conforms to CGI
     * `Direct: the full HTTP message is written to the output channel
     *    ("non-parsed header")
     *)

type output_state =
    [ `Start | 
      `Sending_header      | `Sent_header | 
      `Sending_body        | `Sent_body |
      `Sending_part_header | `Sent_part_header |
      `Sending_part_body   | `Sent_part_body |
      `End
    ]

   (* For single-part messages:
    * The transitions are
    *   `Start -> `Sending_header -> `Sent_header -> `Sending_body ->
    *   `Sent_body
    * For multi-part messages (e.g. "server push"):
    *   `Start -> `Sending_header -> `Sent_header -> ( `Sending_part_header ->
    *   `Sent_part_header -> `Sending_part_body -> `Sent_part_body )* ->
    *   `Sent_body
    * The state `Sent_body allows it to go back to `Sending_body:
    *   `Sent_body -> `Sending_body -> ... -> `Sent_body
    * This may happen if data is committed multiple times. `Sent_body
    * is something like a checkpoint.
    * The state `End means that no data must follow, it is the definite
    * end of the output stream:
    *   `Send_body -> `End
    *)

type protocol_version = 
    int * int           (* (major,minor) number *)

type protocol_attribute =
  [ `Secure_https
  ]

type protocol =
  [ `Http of (protocol_version * protocol_attribute list)
  | `Other
  ]

type workaround =
  [ `Work_around_MSIE_Content_type_bug
  | `Work_around_backslash_bug
  ]


type cgi_config =
    { (* System: *)
      tmp_directory : string;
      tmp_prefix : string;
      (* Limits: *)
      permitted_http_methods : string list;
      permitted_input_content_types : string list;
        (* Content type parameters (like "charset") are ignored.
	 * If the list is empty, all content types are allowed.
	 *)
      input_content_length_limit : int;
      workarounds : workaround list;
    }

val default_config : cgi_config

(* The default configuration is:
 * - tmp_directory: one of /var/tmp, /tmp, C:\temp, .
 * - tmp_prefix: "netstring"
 * - permitted_http_methods: "GET", "POST"
 * - permitted_input_content_types: "multipart/form-data", 
 *     "application/x-www-form-urlencoded"
 * - input_content_length_limit: maxint
 * - workarounds: all
 *)

(* DISCUSS: Is the configuration part of cgi_environment or cgi_activation?
 *)

(* The class type cgi_environment contains the operating means by which
 * the CGI activation is connected to the "outer world". In particular,
 * the following applies:
 *
 * - CGI properties: These are the global properties of the CGI request
 *   such as the HTTP method, which HTTP server serves the request, and
 *   which client sent the request.
 *   For a classic CGI environment, the properties are the environment
 *   variables not beginning with "HTTP_", and neither "CONTENT_LENGTH"
 *   nor "CONTENT_TYPE".
 * - Input header: The header of the HTTP request. 
 *   For a classic CGI environment, the input header can be extracted
 *   from the process environment. It consists of all variables beginning
 *   with "HTTP_" and the variables "CONTENT_LENGTH" and "CONTENT_TYPE".
 * - Input channel: Over the input channel the HTTP request can be read in.
 *   The input state tracks which parts of the request have already be
 *   read.
 *   For a classic CGI environment, the input channel contains only the
 *   body of the request, and the (required) header field content-length
 *   specifies the length of the body in bytes.
 * - Output header: The header of the HTTP response.
 * - Output channel: Over the output channel the HTTP response is sent.
 *   The output state tracks which parts of the response have already been
 *   sent.
 *
 * The CGI environment cannot only be used for classic CGI but also for
 * non-standard ways of communication with the HTTP server. By design,
 * the header and the body of both the request and the response are
 * separated, and because of this every of these message parts can be
 * processed independently of the other parts.
 *
 * There are ways of using the CGI environment that are regarded as bad
 * practice:
 * - It should be avoided to send the response header directly to the
 *   output channel. Instead it is recommended to call the method
 *   send_output_header. This method may choose another way of transmitting
 *   the output header to the HTTP server.
 *
 * - The CGI activation objects expect that the input state is at least
 *   `Received_header, i.e. it is the task of the environment object to read
 *   the request header. It is bad practice to read the request header
 *   directly from the input channel.
 *)

class type cgi_environment =
object
  (* Configuration: *)
  method config : cgi_config

  (* Standard CGI environment: *)
  method cgi_gateway_interface  : string
  method cgi_server_software    : string
  method cgi_server_name        : string
  method cgi_server_protocol    : string
  method cgi_server_port        : int option
  method cgi_request_method     : string
  method cgi_path_info          : string
  method cgi_path_translated    : string
  method cgi_script_name        : string
  method cgi_query_string       : string
  method cgi_remote_host        : string
  method cgi_remote_addr        : string
  method cgi_auth_type          : string
  method cgi_remote_user        : string
  method cgi_remote_ident       : string
    (* These methods return "" or None if the property is not available.
     * We can do this because it is known for these properties that "" has
     * no meaning
     *)

  (* Extensions: *)
  method cgi_property          : ?default:string -> string -> string
    (* Returns a (possibly non-standard) environment property. If the property
     * is not set, Not_found will be raised unless the ~default argument is 
     * passed. The ~default argument determines the result of the function in
     * this case.
     * The passed unnamed string is the name of the property. The name is
     * case-sensitive.
     *
     * For example, cgi_gateway_interface is the same as
     * cgi_property ~default:"" "GATEWAY_INTERFACE".
     *
     * You can normally not access the HTTP header by calling cgi_property.
     * Use the method input_header_field instead.
     *)

  method cgi_https              : bool
    (* A well-known extension is the HTTPS property. It indicates whether
     * a secure connection is used (SSL/TLS). This method interprets this
     * property and returns true if the connection is secure.
     * This method fails if there is a HTTPS property with an unknown value.
     *)

  (* Convenience: *)
  method protocol : protocol

  (* Input header: *)

  method input_header : Netmime.mime_header
    (* The whole header. *)

  method input_header_field : ?default:string -> string -> string
    (* Returns the value of a field of the input MIME header. If the field
     * does not exist, Not_found will be raised unless the ~default argument
     * is passed. The ~default argument determines the result of the function in
     * this case.
     * If there are several fields with the passed name only the first field
     * will be returned.
     * The passed string is the name of the field. The name is case-insensitive,
     * and it does not matter whether it consists of lowercase or uppercase
     * letters. If the name is a compound name, the parts are separated by "-",
     * e.g. "content-length". (These are the normal rules for MIME headers.)
     *)

  method multiple_input_header_field : string -> string list
    (* Returns the values of all fields with the passed name of the input
     * MIME header.
     *)

  method input_header_fields : (string * string) list
    (* Returns the input header as (name,value) pairs. The names may consist
     * of lowercase or uppercase letters.
     *)

  (* Convenience: *)
  method user_agent : string
  method cookies : (string * string) list

  (* Input selection: *)

  method input_ch : Netchannels.in_obj_channel

  method input_content_length : int
    (* Returns the "content-length" input header field, or raises Not_found
     * if it is not set
     *)

  method input_content_type_string : string 
    (* Returns the "content-type" input header field or "" if it is not set *)

  method input_content_type : (string * (string * Mimestring.s_param) list)
    (* Returns the parsed "content-type" input header field, or raises
     * Not_found if it is not set.
     * See Mimestring.scan_mime_type_ep.
     *)

  method input_state : input_state
  method set_input_state : input_state -> unit
      (* Should only be called by cgi_activation *)

  (* Output selection: *)

  method output_ch : Netchannels.out_obj_channel

  (* Output header: *)

  method output_header : Netmime.mime_header
    (* The whole output header *)

  method output_header_field : ?default:string -> string -> string
    (* Returns the value of a field of the output MIME header. If the field
     * does not exist, Not_found will be raised unless the ~default argument
     * is passed. The ~default argument determines the result of the function in
     * this case.
     * If there are several fields with the passed name only the first field
     * will be returned.
     * The passed string is the name of the field. The name is case-insensitive,
     * and it does not matter whether it consists of lowercase or uppercase
     * letters. If the name is a compound name, the parts are separated by "-",
     * e.g. "content-length". (These are the normal rules for MIME headers.)
     *)

  method multiple_output_header_field : string -> string list
    (* Returns the values of all fields with the passed name of the output
     * MIME header.
     *)

  method output_header_fields : (string * string) list
    (* Returns the output header as (name,value) pairs. The names may consist
     * of lowercase or uppercase letters.
     *)

  method set_output_header_field : string -> string -> unit
    (* Sets the value of a field of the output header. The previous value, if 
     * any, is overwritten. If there have been multiple values, all values
     * will be removed and replaced by the single new value.
     *)

  method set_multiple_output_header_field : string -> string list -> unit
    (* Sets multiple values of a field of the output header. Any previous
     * values are removed and replaced by the new values.
     *)

  method set_output_header_fields : (string * string) list -> unit
    (* Sets the complete output header at once. *)

  method send_output_header : unit -> unit
    (* If the output state is `Start, this method will encode and send
     * the output header to the output channel, and the state will be
     * changed to `Sent_header.
     * The method will fail if the output state is not `Start.
     * Note that this method is usually automatically called by the
     * cgi_activation object.
     *)

  method output_state : output_state
  method set_output_state : output_state -> unit
      (* Should only be called by cgi_activation *)

      (* Note: Setting the output state to `End causes that the output
       * channel is closed. This is the preferred way of closing the
       * channel.
       *)

end


exception Std_environment_not_found
class std_environment : ?config:cgi_config -> unit -> cgi_environment
  (* The input channel is stdin, the input environment comes from the process
   * environment, the output channel is stdout, the output content type
   * is text/html.
   * The new operator will raise Std_environment_not_found if the typical
   * CGI environment variables are not set.
   *)

class test_environment : ?config:cgi_config -> unit -> cgi_environment
  (* Interprets command-line arguments from Sys.argv as test arguments.
   * If there are not any arguments, the user is asked interactively
   * for arguments.
   *)


class custom_environment : ?config:cgi_config -> unit ->
object 
  inherit cgi_environment

  method set_cgi :
           ?gateway_interface:string ->
           ?server_software:string ->
           ?server_name:string ->
           ?server_protocol:string ->
           ?server_port:int option ->
           ?request_method:string ->
           ?path_info:string ->
           ?path_translated:string ->
           ?script_name:string ->
           ?query_string:string ->
           ?remote_host:string ->
           ?remote_addr:string ->
           ?auth_type:string ->
           ?remote_user:string ->
           ?remote_ident:string ->
	   ?https:bool ->
           ?property:(string * string) ->
	   unit ->
	     unit

  method set_input_header_field : string -> string -> unit
  method set_multiple_input_header_field : string -> string list -> unit
  method set_input_header_fields : (string * string) list -> unit
  method set_input_ch : Netchannels.in_obj_channel -> unit
  method set_input_content_length : int -> unit
  method set_input_content_type : string -> unit
  method set_output_ch : Netchannels.out_obj_channel -> unit

  method setup_finished : unit -> unit
      (* Disable further invocations of 'set_*' methods *)
end

(* ======================================================================
 * History:
 * 
 * $Log: netcgi_env.mli,v $
 * Revision 1.7  2002/10/24 23:47:48  stolpmann
 * 	Support for the HEAD method.
 * 	Workaround for a bug in MSIE: Empty cookies are represented
 * in the wrong way
 *
 * Revision 1.6  2002/01/14 01:12:10  stolpmann
 * 	Representing input and output headers as Netmime.mime_header.
 * 	Removed the method _cgi, and many references to the Cgi module.
 *
 * Revision 1.5  2001/10/04 01:04:58  stolpmann
 * 	Moved from directory /src/netstring to /src/cgi.
 *
 * Revision 1.4  2001/10/04 00:56:12  stolpmann
 * 	Implemented class [custom_environment].
 * 	Fixed method [user_agent].
 *
 * Revision 1.3  2001/09/30 00:03:28  stolpmann
 * 	Documentation only
 *
 * Revision 1.2  2001/09/27 22:00:43  stolpmann
 * 	Changed type protocol_attribute.
 *
 * Revision 1.1  2001/09/24 21:26:54  stolpmann
 * 	Initial revision (compiles, but untested)
 *
 * 
 *)
