
module REParser (RETree(..), Collection, parse) where

import Data.Char (isDigit, isAlpha, isAlphaNum, isLower, isUpper,
                  isHexDigit, isSpace, isControl, isPrint)

type Collection = Int

data RETree = Epsilon
            | Char (Char -> Bool)       -- a, [abc]
            | Start                     -- ^
            | End                       -- $
            | Concat RETree RETree      -- a b
            | AtLeastOne RETree         -- a +
            | AnyNumber RETree          -- a *
            | Optional RETree           -- a ?
            | Exactly Int RETree        -- a { m }
            | AtLeast Int RETree        -- a { m, }
            | FromTo Int Int RETree     -- a { m, n }   (m >= n)
            | Or RETree RETree          -- a | b
            | Capture Collection RETree -- ( a )

instance Show RETree where
    show Epsilon = ""
    show (Char _) = "."
    show Start = "^"
    show End = "$"
    show (Concat t1 t2) = show t1 ++ show t2
    show (AtLeastOne t) = show t ++ "+"
    show (AnyNumber t) = show t ++ "*"
    show (Optional t) = show t ++ "?"
    show (Exactly t m) = show t ++ "{" ++ show m ++ "}"
    show (AtLeast t m) = show t ++ "{" ++ show m ++ ",}"
    show (FromTo t m n) = show t ++ "{" ++ show m ++ "," ++ show n ++ "}"
    show (Or t1 t2) = show t1 ++ "|" ++ show t2
    show (Capture _ t) = "(" ++ show t ++ ")"

parse :: String -> Either (RETree, Collection) String
parse xs = run_parser 0 (parse_regexp False <*  eoi) xs

parse_regexp :: Bool -> Parser RETree
parse_regexp in_parens
             = foldr1 Or <$> pSepList1 (pChar '|') (parse_branch in_parens)
           <|> pSucceed Epsilon

parse_branch :: Bool -> Parser RETree
parse_branch in_parens = foldr1 Concat <$> pMany1 (parse_atomop in_parens)
                     <|> pSucceed Epsilon

parse_atomop :: Bool -> Parser RETree
parse_atomop in_parens = flip ($) <$> parse_atom in_parens <*> parse_op
                     <|> Start <$  pChar '^'
                     <|> End <$  pChar '$'

parse_atom :: Bool -> Parser RETree
parse_atom in_parens
           = Capture <$> pCapture <*> pChar '('  *> parse_regexp True
                                  <*  pChar ')'
         <|> pChar '['  *> parse_group <*  pChar ']'
         <|> Char . (==) <$> pPred (\x -> x `notElem` special)
         <|> Char . (==) <$  pChar '\\' <*> pPred (\x -> x `elem` special)
         <|> Char ('\0' /=) <$  pChar '.'
    where special_base = ".[\\(*+?{|^$"
          special = if in_parens then ')':special_base else special_base

parse_group :: Parser RETree
parse_group = (Char . (not .)) <$  pChar '^' <*> parse_negated_group
          <|> Char <$> parse_negated_group
    where parse_negated_group :: Parser (Char -> Bool)
          parse_negated_group = (\p c -> c == ']' || p c)
                            <$  pChar ']'
                            <*> parse_group_body
                        <|>     parse_group_body
          parse_group_body :: Parser (Char -> Bool)
          parse_group_body = (\p1 p2 -> \c -> p1 c || p2 c)
                         <$> (    isAlphaNum     <$  pString "[:alnum:]"
                              <|> isAlpha        <$  pString "[:alpha:]"
                              <|> (`elem` " \t") <$  pString "[:blank:]"
                              <|> isControl      <$  pString "[:cntrl:]"
                              <|> isDigit        <$  pString "[:digit:]"
                              <|> (\c -> isPrint c && not (isSpace c))
                                                 <$  pString "[:graph:]"
                              <|> isLower        <$  pString "[:lower:]"
                              <|> isPrint        <$  pString "[:print:]"
                              <|> (\c -> isPrint c
                                      && not (isAlphaNum c || isSpace c))
                                                 <$  pString "[:punct:]"
                              <|> isSpace        <$  pString "[:space:]"
                              <|> isUpper        <$  pString "[:upper:]"
                              <|> isHexDigit     <$  pString "[:xdigit:]"
                              <|> pString "[:"  *> pFailCut
                              <|>     (\c1 c2 -> \c -> c1 <= c && c <= c2)
                                  <$> pPred (']' /=)
                                  <*  pChar '-'
                                  <*> pPred (']' /=)
                              <|> (==) <$> pPred (']' /=))
                          <*> parse_group_body
                      <|> pSucceed (const False)

parse_op :: Parser (RETree -> RETree)
parse_op = AtLeastOne <$  pChar '+'
       <|> AnyNumber <$  pChar '*'
       <|> Optional <$  pChar '?'
       <|>     (\m -> Exactly (read m))
           <$  pChar '{' <*> pMany1 pDigit <*  pChar '}'
       <|>     (\m -> AtLeast (read m))
           <$  pChar '{' <*> pMany1 pDigit <*  pString ",}"
       <|?>    ((\m n -> let m' = read m
                             n' = read n
                         in if m' <= n' then Just (FromTo m' n')
                                        else Nothing)
           <$  pChar '{' <*> pMany1 pDigit <*  pChar ','
                         <*> pMany1 pDigit <*  pChar '}')
       <|> pSucceed id

data Pos = Pos LinePos CharPos
         | EOI
    deriving (Eq, Ord)
type LinePos = Integer
type CharPos = Integer
type Parser a = Collection -> [(Char, Pos)] -> Res a
-- Pos in Succ is furthest we got before failing
data Res a = Succ !Pos Collection a [(Char, Pos)]
           | Fail !Pos
           | FailCut !Pos
    deriving Show

instance Show Pos where
    show (Pos l c) = "line " ++ show l ++ ", char " ++ show c
    show EOI = "end of input"

-- Primitives:

run_parser :: Collection -> Parser a -> String -> Either (a, Collection) String
run_parser n p xs = case p n (posify xs) of
                        Succ _ c x [] -> Left (x, c)
                        Succ pos _ _ _ -> Right (input_at pos)
                        Fail pos -> Right (err_at pos)
                        FailCut pos -> Right (err_at pos)
    where err_at pos = "Syntax error at " ++ show pos
          input_at pos = "Unconsumed input at " ++ show pos

posify :: String -> [(Char, Pos)]
posify = f (Pos 1 1)
    where f _ "" = []
          f p@(Pos l _) ('\n':xs) = ('\n', p):f (Pos (l+1) 1) xs
          f p@(Pos l c) (x:xs) = (x, p):f (Pos l (c+1)) xs
          f EOI _ = error "posify: Can't happen"

pos_of :: [(Char, Pos)] -> Pos
pos_of [] = EOI
pos_of ((_, p):_) = p

eoi :: Parser ()
eoi = \c xs -> case xs of
                   ((_, p):_) -> Fail p
                   [] -> Succ EOI c () []

pCapture :: Parser Collection
pCapture = \c xs -> Succ (pos_of xs) (c+1) c xs

(<*>) :: Parser (a -> b) -> Parser a -> Parser b
p <*> q = \c xs -> case p c xs of
                       Succ pp cp rp xs' ->
                          case q cp xs' of
                              Succ pq cq rq xs'' ->
                                  Succ (pp `max` pq) cq (rp rq) xs''
                              Fail pos -> Fail (pp `max` pos)
                              FailCut pos -> FailCut (pp `max` pos)
                       Fail pos -> Fail pos
                       FailCut pos -> FailCut pos

(<|>) :: Parser a -> Parser a -> Parser a
p <|> q = \c xs -> case p c xs of
                       FailCut p1 -> FailCut p1
                       Fail p1 ->
                           case q c xs of
                               Fail p2 -> Fail (p1 `max` p2)
                               FailCut p2 -> FailCut (p1 `max` p2)
                               Succ p2 c' x xs' -> Succ (p1 `max` p2) c' x xs'
                       s -> s

(<|?>) :: Parser a -> Parser (Maybe a) -> Parser a
p <|?> q = \c xs -> case p c xs of
                        FailCut p1 -> FailCut p1
                        Fail p1 ->
                            case q c xs of
                                Fail p2 -> Fail (p1 `max` p2)
                                FailCut p2 -> FailCut (p1 `max` p2)
                                Succ p2 _ Nothing _ -> Fail (p1 `max` p2)
                                Succ p2 c' (Just x) xs' ->
                                    Succ (p1 `max` p2) c' x xs'
                        s -> s

pSucceed :: a -> Parser a
pSucceed x = \c xs -> Succ (pos_of xs) c x xs

pFailCut :: Parser a
pFailCut = \_ xs -> FailCut (pos_of xs)

pPred :: (Char -> Bool) -> Parser Char
pPred p = \c xs -> case xs of
                       ((x, _):xs')
                        | p x -> Succ (pos_of xs') c x xs'
                       _ -> Fail (pos_of xs)

-- Derived:

(<$>) :: (a -> b) -> Parser a -> Parser b
f <$> q = pSucceed f <*> q

(<*) :: Parser a -> Parser b -> Parser a
p <* q = const <$> p <*> q

(*>) :: Parser a -> Parser b -> Parser b
p *> q = flip const <$> p <*> q

(<$) :: a -> Parser b -> Parser a
f <$ q = pSucceed f <*  q

pChar :: Char -> Parser Char
pChar c = pPred (c ==)

pString :: String -> Parser String
pString "" = pSucceed ""
pString (x:xs) = (:) <$> pChar x <*> pString xs

pDigit :: Parser Char
pDigit = pPred isDigit

pSepList1 :: Parser a -> Parser b -> Parser [b]
pSepList1 s p = (:) <$> p <*> pMany (s  *> p)

pMany1 :: Parser a -> Parser [a]
pMany1 p = (:) <$> p <*> pMany p

pMany :: Parser a -> Parser [a]
pMany p = (:) <$> p <*> pMany p
      <|> pSucceed []

infixl 3 <|>, <|?>
infixl 5 <*>, <$>, <*, <$

