MODULE XML:Basic:Parser [OOC_EXTENSIONS];
(**This parser builds on the non-validating parser @omodule{*XML:Parser}.  It
   uses the element description facilities from @omodule{*XML:Basic:Element} to
   perform syntactical and semantical checks, as well as to construct a data
   structure from an XML document.  It is intended for small XML documents,
   like configuration files or messages.

   This version of the ``basic'' parser does not support namespaces.  *)

IMPORT
  Ch := Channel, Msg, URI,
  XMLError := XML:Error, XML:DTD, XML:Builder, XML:UnicodeCodec, 
  XML:Locator, XML:Parser, XML:Builder:Validation, XML:Error,
  XML:Basic:Element;

CONST
  initialNestingDepth = 4;

CONST  (* legal values for `flags' argument of `Parse': *)
  validate* = 0;
  
TYPE
  ElementStack = POINTER TO ARRAY OF Element.Element;
  Build = POINTER TO BuildDesc;
  BuildDesc = RECORD
    (Builder.BuilderDesc)
    errorListener: Locator.ErrorListener;
    locator: Locator.Locator;
    noerr: BOOLEAN;
    
    rootFactory: Element.RootFactory;
    stack: ElementStack;
    nestingLevel: LONGINT;
    (* stack[nestingLevel] holds the current top-most element; nestingLevel=-1
       means that the parser is before the root element, nestingLevel=-2 means
       that it is behind the root element *)
    maxNestingDepth: LONGINT;
    
    root: Element.Element;
  END;

CONST (* error codes *)
  invalidRootElement = 1;
  junkAfterDocument = 2;
  namespacesNotSupported = 3;
  nestingLevelOverflow = 4;

  invalidAttributeName = 10;
  invalidAttributeValue = 11;
  requiredAttributeMissing = 12;
  
  invalidChildName = 20;
  invalidChildElement = 21;
  requiredChildMissing = 22;
  nonWhitespaceContent = 23;
  elementCheckFailed = 24;

VAR
  msgParserContext: Error.Context;
  (* the context in which all error messages generated by this module
     are interpreted *)


(* ------------------------------------------------------------------------ *)

PROCEDURE InitBuild (b: Build; rootFactory: Element.RootFactory;
                    maxNestingDepth: LONGINT);
(* maximum number of nested elements below the root element; a value of zero
   means that there are no elements below the root, one means that the child
   element of root have no childs, and so on *)
  VAR
    i: LONGINT;
  BEGIN
    Builder.Init (b);
    b. rootFactory := rootFactory;
    b. locator := NIL;
    b. errorListener := NIL;
    b. noerr := TRUE;
    b. maxNestingDepth := maxNestingDepth;
    NEW (b. stack, initialNestingDepth+1);
    FOR i := 0 TO initialNestingDepth DO
      b. stack[i] := NIL
    END;
    b. nestingLevel := -1;
    b. root := NIL;
  END InitBuild;

PROCEDURE (b: Build) Error (code: Error.Code);
  VAR
    msg: Error.Msg;
  BEGIN
    IF b. noerr THEN
      msg := b. errorListener. Error (msgParserContext, code, TRUE, b. locator^);
      b. noerr := FALSE
    END
  END Error;
  
PROCEDURE (b: Build) ErrorName (code: Error.Code; name: DTD.String);
  VAR
    msg: Error.Msg;
  BEGIN
    IF b. noerr THEN
      msg := b. errorListener. Error (msgParserContext, code, TRUE, b. locator^);
      msg. SetLStringAttrib ("name", Msg.GetLStringPtr (name^));
      b. noerr := FALSE
    END
  END ErrorName;
  
PROCEDURE (b: Build) ErrorString (code: Error.Code; string: DTD.String);
  VAR
    msg: Error.Msg;
  BEGIN
    IF b. noerr THEN
      msg := b. errorListener. Error (msgParserContext, code, TRUE, b. locator^);
      msg. SetLStringAttrib ("string", Msg.GetLStringPtr (string^));
      b. noerr := FALSE
    END
  END ErrorString;
  
PROCEDURE (b: Build) SetLocator* (locator: Locator.Locator);
  BEGIN
    b. locator := locator
  END SetLocator;
  
PROCEDURE (b: Build) SetErrorListener* (el: Locator.ErrorListener);
  BEGIN
    b. errorListener := el
  END SetErrorListener;
  
PROCEDURE (b: Build) StartElement* (namespaceDecl: DTD.NamespaceDeclaration; localName: DTD.String);
  VAR
    newStack: ElementStack;
    i: LONGINT;
  BEGIN
    IF (namespaceDecl # NIL) THEN
      b. Error (namespacesNotSupported)
    END;
    IF (b. nestingLevel = -1) THEN
      b. stack[0] := b. rootFactory. NewElement (localName);
      IF (b. stack[0] = NIL) THEN
        b. Error (invalidRootElement)
      ELSE
        INC (b. nestingLevel)
      END
    ELSIF (b. nestingLevel = -2) THEN
      b. Error (junkAfterDocument)
    ELSIF (b. maxNestingDepth >= 0) &
          (b. nestingLevel = b. maxNestingDepth) THEN
      b. Error (nestingLevelOverflow)
    ELSIF ~b. stack[b. nestingLevel]. IsChildName (localName) THEN
      b. Error (invalidChildName)
    ELSE
      IF (b. nestingLevel+1 = LEN (b. stack^)) THEN
        NEW (newStack, LEN (b. stack^)*2);
        FOR i := 0 TO LEN (b. stack^)*2-1 DO
          IF (i <= b. nestingLevel) THEN
            newStack[i] := b. stack[i]
          ELSE
            newStack[i] := NIL
          END
        END
      END;
      
      b. stack[b. nestingLevel+1] :=
        b. stack[b. nestingLevel]. NewChild (localName);
      IF (b. stack[b. nestingLevel+1] = NIL) THEN
        b. Error (invalidChildElement)
      ELSE
        INC (b. nestingLevel);
        b. stack[b. nestingLevel]. SetLocator (b. locator)
      END
    END
  END StartElement;

PROCEDURE (b: Build) Attribute* (namespaceDecl: DTD.NamespaceDeclaration; 
                                 localName: DTD.String;
                                 attrDecl: DTD.AttrDecl;
                                 value: DTD.AttValue;
                                 specified: BOOLEAN);
  BEGIN
    IF (namespaceDecl # NIL) THEN
      b. Error (namespacesNotSupported)
    ELSIF ~b. stack[b. nestingLevel]. IsAttributeName (localName) THEN
      b. Error (invalidAttributeName)
    ELSIF ~b. stack[b. nestingLevel]. AddAttribute (localName, value) THEN
      b. Error (invalidAttributeValue)
    END
  END Attribute;

PROCEDURE (b: Build) AttributesDone*;
  VAR
    name: DTD.String;
  BEGIN
    name := b. stack[b. nestingLevel]. MissingAttribute();
    IF (name # NIL) THEN
      b. ErrorName (requiredAttributeMissing, name)
    END
  END AttributesDone;
  
PROCEDURE (b: Build) Characters* (VAR chars: DTD.StringVar;
                                 charsStart, charsEnd: DTD.CharPos;
                                 elementWhitespace: DTD.ElementWhitespaceType);
  PROCEDURE IsWhitespace (): BOOLEAN;
    VAR
      i: DTD.CharPos;
    BEGIN
      i := charsStart;
      WHILE (i # charsEnd) & (chars[i] <= " ") DO
        INC (i)
      END;
      RETURN (i = charsEnd)
    END IsWhitespace;
  
  BEGIN
    IF b. stack[b. nestingLevel]. HasMixedContent() THEN
      b. stack[b. nestingLevel]. AddCharacters (chars, charsStart, charsEnd)
    ELSIF ~IsWhitespace() THEN
      b. Error (nonWhitespaceContent)
    END
  END Characters;
  
PROCEDURE (b: Build) EndElement* (namespaceDecl: DTD.NamespaceDeclaration; localName: DTD.String);
  VAR
    name, errorMsg: DTD.String;
  BEGIN
    name := b. stack[b. nestingLevel]. MissingChild();
    IF (name # NIL) THEN
      b. ErrorName (requiredChildMissing, name)
    END;
    
    errorMsg := b. stack[b. nestingLevel]. CheckElement();
    IF (errorMsg # NIL) THEN
      b. ErrorString (elementCheckFailed, errorMsg)
    END;
    
    IF (b. nestingLevel = 0) THEN
      b. root := b. stack[b. nestingLevel];
      b. stack[b. nestingLevel] := NIL;
      b. nestingLevel := -1              (* the DEC at the end makes this -2 *)
    ELSE
      b. stack[b. nestingLevel-1]. AddChild (b. stack[b. nestingLevel]);
      b. stack[b. nestingLevel] := NIL;
    END;
    DEC (b. nestingLevel)
  END EndElement;


(*PROCEDURE ParseMIMEHeader* (textReader: TextRider.Reader;
                            VAR unicodeFactory: UnicodeCodec.Factory;
                            VAR byteReader: Ch.Reader);
  VAR
    line: ARRAY 1024 OF CHAR;
  BEGIN
    REPEAT
      textReader. ReadLine (line);
      (* should do proper parsing of MIME header here, but this is overkill
         for the usual case... *)
      Strings.Capitalize (line);
      ASSERT ((line = "") OR
              (line = "CONTENT-TYPE: APPLICATION/BEEP+XML"));
    UNTIL (textReader. res # TextRider.done) OR (line = "");
    unicodeFactory := UTF8.factory;      (* default encoding *)
    byteReader := textReader. byteReader;
    byteReader. SetPos (textReader. Pos())
  END ParseMIMEHeader;*)

PROCEDURE Parse* (byteReader: Ch.Reader;
                  baseURI: URI.URI;
                  unicodeFactory: UnicodeCodec.Factory;
                  rootFactory: Element.RootFactory;
                  maxNestingDepth: LONGINT;
                  flags: SET;
                  VAR errList: XMLError.List): Element.Element;
(**Parses an XML document from reader @samp{byteReader} request.  If
   @oparam{baseURI} is not @code{NIL}, or if @oconst{validate} is in
   @oparam{flags}, external references in the XML document are followed.  In
   this case, relative URIs are interpreted using the base URI
   @oparam{baseURI}.  If @oparam{unicodeFactory} is given, then the document is
   assumed to use its character encoding.

   The root element of the document is created by selecting the matching
   factory function from @oparam{rootFactory}.  An error is signalled if the
   nesting level of an element exceeds @oparam{maxNestingDepth}.

   The parser acts as a validating parser if @oconst{validate} is in
   @oparam{flags}.

   In case of an error, @oparam{errList} refers to the error list and result is
   @code{NIL}.  Otherwise, @oparam{errList} is @code{NIL} and an instance of
   the root element is returned.  *)
  VAR
    b: Build;
    parser: Parser.Parser;
    builder: Builder.Builder;
  BEGIN
    NEW (b);
    InitBuild (b, rootFactory, maxNestingDepth);
    
    IF (validate IN flags) THEN
      builder := Validation.New (b)
    ELSE
      builder := b
    END;
    
    parser := Parser.NewReader (byteReader, baseURI, unicodeFactory, NIL, builder);
    (* ... should disable XMLDecl and DoctypeDecl if flags are set *)
    parser. followExternalRef := (validate IN flags) OR (baseURI # NIL);
    parser. validating := (validate IN flags);
    
    parser. ParseDocument();
    IF (parser. errList. msgCount = 0) THEN
      errList := NIL;
      ASSERT (b. root # NIL);
      RETURN b. root
    ELSE
      errList := parser. errList;
      RETURN NIL
    END
  END Parse;

BEGIN
  msgParserContext := Error.NewContext ("XML:Basic:Parser");
  
  msgParserContext. SetString (invalidRootElement,
    "Unknown root element name");
  msgParserContext. SetString (junkAfterDocument,
    "Junk after root element");
  msgParserContext. SetString (namespacesNotSupported,
    "XML namespaces are not supported");
  msgParserContext. SetString (nestingLevelOverflow,
    "This child element exceeds the maximum nesting depth");
  
  msgParserContext. SetString (invalidAttributeName,
    "Invalid attribute name for this element");
  msgParserContext. SetString (invalidAttributeValue,
    "Invalid value for this attribute");
  msgParserContext. SetString (requiredAttributeMissing,
    "Attribute `${name}' is required for this element");
  
  msgParserContext. SetString (invalidChildName,
    "Invalid name for child element");
  msgParserContext. SetString (invalidChildElement,
    "This child element cannot appear at this position");
  msgParserContext. SetString (requiredChildMissing,
    "Child element `${name}' is required for this element");
  msgParserContext. SetString (nonWhitespaceContent,
    "Only whitespace allowed as content of this element");
  msgParserContext. SetString (elementCheckFailed,
    "${string}");
END XML:Basic:Parser.
