Blame src/Scan.x.boot

Packit 2cbdf3
-------------------------------------------------------------------------------
Packit 2cbdf3
--                  ALEX SCANNER AND LITERATE PREPROCESSOR
Packit 2cbdf3
--
Packit 2cbdf3
-- This Script defines the grammar used to generate the Alex scanner and a
Packit 2cbdf3
-- preprocessing scanner for dealing with literate scripts.  The actions for
Packit 2cbdf3
-- the Alex scanner are given separately in the Alex module.
Packit 2cbdf3
--
Packit 2cbdf3
-- See the Alex manual for a discussion of the scanners defined here.
Packit 2cbdf3
--
Packit 2cbdf3
-- Chris Dornan, Aug-95, 4-Jun-96, 10-Jul-96, 29-Sep-97
Packit 2cbdf3
-------------------------------------------------------------------------------
Packit 2cbdf3
Packit 2cbdf3
{
Packit 2cbdf3
module Scan (lexer, AlexPosn(..), Token(..), Tkn(..), tokPosn) where
Packit 2cbdf3
Packit 2cbdf3
import Data.Char
Packit 2cbdf3
import ParseMonad
Packit 2cbdf3
--import Debug.Trace
Packit 2cbdf3
}
Packit 2cbdf3
Packit 2cbdf3
$digit    = 0-9
Packit 2cbdf3
$hexdig   = [0-9 A-F a-f]
Packit 2cbdf3
$octal    = 0-7
Packit 2cbdf3
$lower    = a-z
Packit 2cbdf3
$upper    = A-Z
Packit 2cbdf3
$alpha    = [$upper $lower]
Packit 2cbdf3
$alphanum = [$alpha $digit]
Packit 2cbdf3
$idchar   = [$alphanum \_ \']
Packit 2cbdf3
Packit 2cbdf3
$special    = [\.\;\,\$\|\*\+\?\#\~\-\{\}\(\)\[\]\^\/]
Packit 2cbdf3
$graphic    = $printable # $white
Packit 2cbdf3
$nonspecial = $graphic # [$special \%]
Packit 2cbdf3
Packit 2cbdf3
@id     = $alpha $idchar*
Packit 2cbdf3
@smac   = \$ @id | \$ \{ @id \}
Packit 2cbdf3
@rmac   = \@ @id | \@ \{ @id \}
Packit 2cbdf3
Packit 2cbdf3
@comment = "--".*
Packit 2cbdf3
@ws      = $white+ | @comment
Packit 2cbdf3
Packit 2cbdf3
alex :-
Packit 2cbdf3
Packit 2cbdf3
@ws                             { skip }     -- white space; ignore
Packit 2cbdf3
Packit 2cbdf3
<0> \" [^\"]* \"                { string }
Packit 2cbdf3
<0> (@id @ws?)? \:\-            { bind }
Packit 2cbdf3
<0> \{ / (\n | [^$digit])       { code }
Packit 2cbdf3
<0> $special                    { special }  -- note: matches {
Packit 2cbdf3
<0> \% "wrapper"                { wrapper }
Packit 2cbdf3
<0> \% "encoding"               { encoding }
Packit 2cbdf3
<0> \% "action"                 { actionty }
Packit 2cbdf3
<0> \% "token"                  { tokenty }
Packit 2cbdf3
<0> \% "typeclass"              { typeclass }
Packit 2cbdf3
Packit 2cbdf3
<0> \\ $digit+                  { decch }
Packit 2cbdf3
<0> \\ x $hexdig+               { hexch }
Packit 2cbdf3
<0> \\ o $octal+                { octch }
Packit 2cbdf3
<0> \\ $printable               { escape }
Packit 2cbdf3
<0> $nonspecial # [\<]          { char }
Packit 2cbdf3
<0> @smac                       { smac }
Packit 2cbdf3
<0> @rmac                       { rmac }
Packit 2cbdf3
Packit 2cbdf3
<0> @smac @ws? \=               { smacdef }
Packit 2cbdf3
<0> @rmac @ws? \=               { rmacdef }
Packit 2cbdf3
Packit 2cbdf3
-- identifiers are allowed to be unquoted in startcode lists
Packit 2cbdf3
<0>             \<              { special `andBegin` startcodes }
Packit 2cbdf3
<startcodes>    0               { zero }
Packit 2cbdf3
<startcodes>    @id             { startcode }
Packit 2cbdf3
<startcodes>    \,              { special }
Packit 2cbdf3
<startcodes>    \>              { special `andBegin` afterstartcodes }
Packit 2cbdf3
Packit 2cbdf3
-- After a <..> startcode sequence, we can have a {...} grouping of rules,
Packit 2cbdf3
-- so don't try to interpret the opening { as a code block.
Packit 2cbdf3
<afterstartcodes> \{ (\n | [^$digit ])  { special `andBegin` 0 }
Packit 2cbdf3
<afterstartcodes> ()            { skip `andBegin` 0 }  -- note: empty pattern
Packit 2cbdf3
{
Packit 2cbdf3
Packit 2cbdf3
-- -----------------------------------------------------------------------------
Packit 2cbdf3
-- Token type
Packit 2cbdf3
Packit 2cbdf3
data Token = T AlexPosn Tkn
Packit 2cbdf3
  deriving Show
Packit 2cbdf3
Packit 2cbdf3
tokPosn (T p _) = p
Packit 2cbdf3
Packit 2cbdf3
data Tkn
Packit 2cbdf3
  = SpecialT Char
Packit 2cbdf3
  | CodeT String
Packit 2cbdf3
  | ZeroT
Packit 2cbdf3
  | IdT String
Packit 2cbdf3
  | StringT String
Packit 2cbdf3
  | BindT String
Packit 2cbdf3
  | CharT Char
Packit 2cbdf3
  | SMacT String
Packit 2cbdf3
  | RMacT String
Packit 2cbdf3
  | SMacDefT String
Packit 2cbdf3
  | RMacDefT String
Packit 2cbdf3
  | NumT Int
Packit 2cbdf3
  | WrapperT
Packit 2cbdf3
  | EncodingT
Packit 2cbdf3
  | ActionTypeT
Packit 2cbdf3
  | TokenTypeT
Packit 2cbdf3
  | TypeClassT
Packit 2cbdf3
  | EOFT
Packit 2cbdf3
  deriving Show
Packit 2cbdf3
Packit 2cbdf3
-- -----------------------------------------------------------------------------
Packit 2cbdf3
-- Token functions
Packit 2cbdf3
Packit 2cbdf3
special, zero, string, bind, escape, decch, hexch, octch, char :: Action
Packit 2cbdf3
smac, rmac, smacdef, rmacdef, startcode, wrapper, encoding :: Action
Packit 2cbdf3
actionty, tokenty, typeclass :: Action
Packit 2cbdf3
special   (p,_,str) _  = return $ T p (SpecialT  (head str))
Packit 2cbdf3
zero      (p,_,_)   _  = return $ T p ZeroT
Packit 2cbdf3
string    (p,_,str) ln = return $ T p (StringT (extract ln str))
Packit 2cbdf3
bind      (p,_,str) _  = return $ T p (BindT (takeWhile isIdChar str))
Packit 2cbdf3
escape    (p,_,str) _  = return $ T p (CharT (esc str))
Packit 2cbdf3
decch     (p,_,str) ln = return $ T p (CharT (do_ech 10 ln (take (ln-1) (tail str))))
Packit 2cbdf3
hexch     (p,_,str) ln = return $ T p (CharT (do_ech 16 ln (take (ln-2) (drop 2 str))))
Packit 2cbdf3
octch     (p,_,str) ln = return $ T p (CharT (do_ech 8  ln (take (ln-2) (drop 2 str))))
Packit 2cbdf3
char      (p,_,str) _  = return $ T p (CharT (head str))
Packit 2cbdf3
smac      (p,_,str) ln = return $ T p (SMacT (mac ln str))
Packit 2cbdf3
rmac      (p,_,str) ln = return $ T p (RMacT (mac ln str))
Packit 2cbdf3
smacdef   (p,_,str) ln = return $ T p (SMacDefT (macdef ln str))
Packit 2cbdf3
rmacdef   (p,_,str) ln = return $ T p (RMacDefT (macdef ln str))
Packit 2cbdf3
startcode (p,_,str) ln = return $ T p (IdT (take ln str))
Packit 2cbdf3
wrapper   (p,_,_)   _  = return $ T p WrapperT
Packit 2cbdf3
encoding  (p,_,_)   _  = return $ T p EncodingT
Packit 2cbdf3
actionty  (p,_,_)   _  = return $ T p ActionTypeT
Packit 2cbdf3
tokenty   (p,_,_)   _  = return $ T p TokenTypeT
Packit 2cbdf3
typeclass (p,_,_)   _  = return $ T p TypeClassT
Packit 2cbdf3
Packit 2cbdf3
isIdChar :: Char -> Bool
Packit 2cbdf3
isIdChar c = isAlphaNum c || c `elem` "_'"
Packit 2cbdf3
Packit 2cbdf3
extract :: Int -> String -> String
Packit 2cbdf3
extract ln str = take (ln-2) (tail str)
Packit 2cbdf3
Packit 2cbdf3
do_ech :: Int -> Int -> String -> Char
Packit 2cbdf3
do_ech radix _ln str = chr (parseInt radix str)
Packit 2cbdf3
Packit 2cbdf3
mac :: Int -> String -> String
Packit 2cbdf3
mac ln str = take (ln-1) $ tail str
Packit 2cbdf3
Packit 2cbdf3
-- TODO : replace not . isSpace with (\c -> not (isSpace c) && c /= '=')
Packit 2cbdf3
macdef :: Int -> String -> String
Packit 2cbdf3
macdef _ln str = takeWhile (\c -> not (isSpace c) && c /= '=') $ tail str
Packit 2cbdf3
Packit 2cbdf3
esc :: String -> Char
Packit 2cbdf3
esc str =
Packit 2cbdf3
  case head $ tail str of
Packit 2cbdf3
    'a' -> '\a'
Packit 2cbdf3
    'b' -> '\b'
Packit 2cbdf3
    'f' -> '\f'
Packit 2cbdf3
    'n' -> '\n'
Packit 2cbdf3
    'r' -> '\r'
Packit 2cbdf3
    't' -> '\t'
Packit 2cbdf3
    'v' -> '\v'
Packit 2cbdf3
    c   ->  c
Packit 2cbdf3
Packit 2cbdf3
parseInt :: Int -> String -> Int
Packit 2cbdf3
parseInt radix ds = foldl1 (\n d -> n * radix + d) (map digitToInt ds)
Packit 2cbdf3
Packit 2cbdf3
-- In brace-delimited code, we have to be careful to match braces
Packit 2cbdf3
-- within the code, but ignore braces inside strings and character
Packit 2cbdf3
-- literals.  We do an approximate job (doing it properly requires
Packit 2cbdf3
-- implementing a large chunk of the Haskell lexical syntax).
Packit 2cbdf3
Packit 2cbdf3
code :: Action
Packit 2cbdf3
code (p,_,_inp) _ = do
Packit 2cbdf3
  currentInput <- getInput
Packit 2cbdf3
  go currentInput 1 ""
Packit 2cbdf3
  where
Packit 2cbdf3
    go :: AlexInput -> Int -> String -> P Token
Packit 2cbdf3
    go inp 0 cs = do
Packit 2cbdf3
      setInput inp
Packit 2cbdf3
      return (T p (CodeT (reverse (tail cs))))
Packit 2cbdf3
    go inp n cs = do
Packit 2cbdf3
      case alexGetChar inp of
Packit 2cbdf3
        Nothing       -> err inp
Packit 2cbdf3
        Just (c,inp2) ->
Packit 2cbdf3
          case c of
Packit 2cbdf3
            '{'  -> go inp2 (n+1) (c:cs)
Packit 2cbdf3
            '}'  -> go inp2 (n-1) (c:cs)
Packit 2cbdf3
            '\'' -> go_char inp2 n (c:cs)
Packit 2cbdf3
            '\"' -> go_str inp2 n (c:cs) '\"'
Packit 2cbdf3
            c2   -> go inp2 n (c2:cs)
Packit 2cbdf3
Packit 2cbdf3
    go_char :: AlexInput -> Int -> String -> P Token
Packit 2cbdf3
    -- try to catch multiple occurrences of ' at identifier end
Packit 2cbdf3
    go_char inp n cs@('\'':'\'':_) = go inp n cs
Packit 2cbdf3
    -- try to catch occurrences of ' within an identifier
Packit 2cbdf3
    go_char inp n cs@('\'':c2:_)
Packit 2cbdf3
      | isAlphaNum c2              = go inp n cs
Packit 2cbdf3
    go_char inp n cs               = go_str inp n cs '\''
Packit 2cbdf3
Packit 2cbdf3
    go_str :: AlexInput -> Int -> String -> Char -> P Token
Packit 2cbdf3
    go_str inp n cs end = do
Packit 2cbdf3
      case alexGetChar inp of
Packit 2cbdf3
          Nothing -> err inp
Packit 2cbdf3
          Just (c,inp2)
Packit 2cbdf3
            | c == end  -> go inp2 n (c:cs)
Packit 2cbdf3
            | otherwise ->
Packit 2cbdf3
              case c of
Packit 2cbdf3
                '\\' -> case alexGetChar inp2 of
Packit 2cbdf3
                          Nothing       -> err inp2
Packit 2cbdf3
                          Just (d,inp3) -> go_str inp3 n (d:c:cs) end
Packit 2cbdf3
                c2   -> go_str inp2 n (c2:cs) end
Packit 2cbdf3
Packit 2cbdf3
    err inp = do setInput inp; lexError "lexical error in code fragment"
Packit 2cbdf3
Packit 2cbdf3
lexError :: String -> P a
Packit 2cbdf3
lexError s = do
Packit 2cbdf3
  (_,_,_,input) <- getInput
Packit 2cbdf3
  failP (s ++ (if (not (null input))
Packit 2cbdf3
                  then " at " ++ show (head input)
Packit 2cbdf3
                  else " at end of file"))
Packit 2cbdf3
Packit 2cbdf3
lexer :: (Token -> P a) -> P a
Packit 2cbdf3
lexer cont = lexToken >>= cont
Packit 2cbdf3
Packit 2cbdf3
lexToken :: P Token
Packit 2cbdf3
lexToken = do
Packit 2cbdf3
  inp@(p,c,_,s) <- getInput
Packit 2cbdf3
  sc <- getStartCode
Packit 2cbdf3
  case alexScan inp sc of
Packit 2cbdf3
    AlexEOF -> return (T p EOFT)
Packit 2cbdf3
    AlexError _ -> lexError "lexical error"
Packit 2cbdf3
    AlexSkip inp1 _ -> do
Packit 2cbdf3
      setInput inp1
Packit 2cbdf3
      lexToken
Packit 2cbdf3
    AlexToken inp1 len t -> do
Packit 2cbdf3
      setInput inp1
Packit 2cbdf3
      t (p,c,s) len
Packit 2cbdf3
Packit 2cbdf3
type Action = (AlexPosn,Char,String) -> Int -> P Token
Packit 2cbdf3
Packit 2cbdf3
skip :: Action
Packit 2cbdf3
skip _ _ = lexToken
Packit 2cbdf3
Packit 2cbdf3
andBegin :: Action -> StartCode -> Action
Packit 2cbdf3
andBegin act sc inp len = setStartCode sc >> act inp len
Packit 2cbdf3
}