|
Packit |
2cbdf3 |
-------------------------------------------------------------------------------
|
|
Packit |
2cbdf3 |
-- ALEX SCANNER AND LITERATE PREPROCESSOR
|
|
Packit |
2cbdf3 |
--
|
|
Packit |
2cbdf3 |
-- This Script defines the grammar used to generate the Alex scanner and a
|
|
Packit |
2cbdf3 |
-- preprocessing scanner for dealing with literate scripts. The actions for
|
|
Packit |
2cbdf3 |
-- the Alex scanner are given separately in the Alex module.
|
|
Packit |
2cbdf3 |
--
|
|
Packit |
2cbdf3 |
-- See the Alex manual for a discussion of the scanners defined here.
|
|
Packit |
2cbdf3 |
--
|
|
Packit |
2cbdf3 |
-- Chris Dornan, Aug-95, 4-Jun-96, 10-Jul-96, 29-Sep-97
|
|
Packit |
2cbdf3 |
-------------------------------------------------------------------------------
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
{
|
|
Packit |
2cbdf3 |
module Scan (lexer, AlexPosn(..), Token(..), Tkn(..), tokPosn) where
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
import Data.Char
|
|
Packit |
2cbdf3 |
import ParseMonad
|
|
Packit |
2cbdf3 |
--import Debug.Trace
|
|
Packit |
2cbdf3 |
}
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
$digit = 0-9
|
|
Packit |
2cbdf3 |
$hexdig = [0-9 A-F a-f]
|
|
Packit |
2cbdf3 |
$octal = 0-7
|
|
Packit |
2cbdf3 |
$lower = a-z
|
|
Packit |
2cbdf3 |
$upper = A-Z
|
|
Packit |
2cbdf3 |
$alpha = [$upper $lower]
|
|
Packit |
2cbdf3 |
$alphanum = [$alpha $digit]
|
|
Packit |
2cbdf3 |
$idchar = [$alphanum \_ \']
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
$special = [\.\;\,\$\|\*\+\?\#\~\-\{\}\(\)\[\]\^\/]
|
|
Packit |
2cbdf3 |
$graphic = $printable # $white
|
|
Packit |
2cbdf3 |
$nonspecial = $graphic # [$special \%]
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
@id = $alpha $idchar*
|
|
Packit |
2cbdf3 |
@smac = \$ @id | \$ \{ @id \}
|
|
Packit |
2cbdf3 |
@rmac = \@ @id | \@ \{ @id \}
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
@comment = "--".*
|
|
Packit |
2cbdf3 |
@ws = $white+ | @comment
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
alex :-
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
@ws { skip } -- white space; ignore
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> \" [^\"]* \" { string }
|
|
Packit |
2cbdf3 |
<0> (@id @ws?)? \:\- { bind }
|
|
Packit |
2cbdf3 |
<0> \{ / (\n | [^$digit]) { code }
|
|
Packit |
2cbdf3 |
<0> $special { special } -- note: matches {
|
|
Packit |
2cbdf3 |
<0> \% "wrapper" { wrapper }
|
|
Packit |
2cbdf3 |
<0> \% "encoding" { encoding }
|
|
Packit |
2cbdf3 |
<0> \% "action" { actionty }
|
|
Packit |
2cbdf3 |
<0> \% "token" { tokenty }
|
|
Packit |
2cbdf3 |
<0> \% "typeclass" { typeclass }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> \\ $digit+ { decch }
|
|
Packit |
2cbdf3 |
<0> \\ x $hexdig+ { hexch }
|
|
Packit |
2cbdf3 |
<0> \\ o $octal+ { octch }
|
|
Packit |
2cbdf3 |
<0> \\ $printable { escape }
|
|
Packit |
2cbdf3 |
<0> $nonspecial # [\<] { char }
|
|
Packit |
2cbdf3 |
<0> @smac { smac }
|
|
Packit |
2cbdf3 |
<0> @rmac { rmac }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> @smac @ws? \= { smacdef }
|
|
Packit |
2cbdf3 |
<0> @rmac @ws? \= { rmacdef }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
-- identifiers are allowed to be unquoted in startcode lists
|
|
Packit |
2cbdf3 |
<0> \< { special `andBegin` startcodes }
|
|
Packit |
2cbdf3 |
<startcodes> 0 { zero }
|
|
Packit |
2cbdf3 |
<startcodes> @id { startcode }
|
|
Packit |
2cbdf3 |
<startcodes> \, { special }
|
|
Packit |
2cbdf3 |
<startcodes> \> { special `andBegin` afterstartcodes }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
-- After a <..> startcode sequence, we can have a {...} grouping of rules,
|
|
Packit |
2cbdf3 |
-- so don't try to interpret the opening { as a code block.
|
|
Packit |
2cbdf3 |
<afterstartcodes> \{ (\n | [^$digit ]) { special `andBegin` 0 }
|
|
Packit |
2cbdf3 |
<afterstartcodes> () { skip `andBegin` 0 } -- note: empty pattern
|
|
Packit |
2cbdf3 |
{
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
-- -----------------------------------------------------------------------------
|
|
Packit |
2cbdf3 |
-- Token type
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
data Token = T AlexPosn Tkn
|
|
Packit |
2cbdf3 |
deriving Show
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
tokPosn (T p _) = p
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
data Tkn
|
|
Packit |
2cbdf3 |
= SpecialT Char
|
|
Packit |
2cbdf3 |
| CodeT String
|
|
Packit |
2cbdf3 |
| ZeroT
|
|
Packit |
2cbdf3 |
| IdT String
|
|
Packit |
2cbdf3 |
| StringT String
|
|
Packit |
2cbdf3 |
| BindT String
|
|
Packit |
2cbdf3 |
| CharT Char
|
|
Packit |
2cbdf3 |
| SMacT String
|
|
Packit |
2cbdf3 |
| RMacT String
|
|
Packit |
2cbdf3 |
| SMacDefT String
|
|
Packit |
2cbdf3 |
| RMacDefT String
|
|
Packit |
2cbdf3 |
| NumT Int
|
|
Packit |
2cbdf3 |
| WrapperT
|
|
Packit |
2cbdf3 |
| EncodingT
|
|
Packit |
2cbdf3 |
| ActionTypeT
|
|
Packit |
2cbdf3 |
| TokenTypeT
|
|
Packit |
2cbdf3 |
| TypeClassT
|
|
Packit |
2cbdf3 |
| EOFT
|
|
Packit |
2cbdf3 |
deriving Show
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
-- -----------------------------------------------------------------------------
|
|
Packit |
2cbdf3 |
-- Token functions
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
special, zero, string, bind, escape, decch, hexch, octch, char :: Action
|
|
Packit |
2cbdf3 |
smac, rmac, smacdef, rmacdef, startcode, wrapper, encoding :: Action
|
|
Packit |
2cbdf3 |
actionty, tokenty, typeclass :: Action
|
|
Packit |
2cbdf3 |
special (p,_,str) _ = return $ T p (SpecialT (head str))
|
|
Packit |
2cbdf3 |
zero (p,_,_) _ = return $ T p ZeroT
|
|
Packit |
2cbdf3 |
string (p,_,str) ln = return $ T p (StringT (extract ln str))
|
|
Packit |
2cbdf3 |
bind (p,_,str) _ = return $ T p (BindT (takeWhile isIdChar str))
|
|
Packit |
2cbdf3 |
escape (p,_,str) _ = return $ T p (CharT (esc str))
|
|
Packit |
2cbdf3 |
decch (p,_,str) ln = return $ T p (CharT (do_ech 10 ln (take (ln-1) (tail str))))
|
|
Packit |
2cbdf3 |
hexch (p,_,str) ln = return $ T p (CharT (do_ech 16 ln (take (ln-2) (drop 2 str))))
|
|
Packit |
2cbdf3 |
octch (p,_,str) ln = return $ T p (CharT (do_ech 8 ln (take (ln-2) (drop 2 str))))
|
|
Packit |
2cbdf3 |
char (p,_,str) _ = return $ T p (CharT (head str))
|
|
Packit |
2cbdf3 |
smac (p,_,str) ln = return $ T p (SMacT (mac ln str))
|
|
Packit |
2cbdf3 |
rmac (p,_,str) ln = return $ T p (RMacT (mac ln str))
|
|
Packit |
2cbdf3 |
smacdef (p,_,str) ln = return $ T p (SMacDefT (macdef ln str))
|
|
Packit |
2cbdf3 |
rmacdef (p,_,str) ln = return $ T p (RMacDefT (macdef ln str))
|
|
Packit |
2cbdf3 |
startcode (p,_,str) ln = return $ T p (IdT (take ln str))
|
|
Packit |
2cbdf3 |
wrapper (p,_,_) _ = return $ T p WrapperT
|
|
Packit |
2cbdf3 |
encoding (p,_,_) _ = return $ T p EncodingT
|
|
Packit |
2cbdf3 |
actionty (p,_,_) _ = return $ T p ActionTypeT
|
|
Packit |
2cbdf3 |
tokenty (p,_,_) _ = return $ T p TokenTypeT
|
|
Packit |
2cbdf3 |
typeclass (p,_,_) _ = return $ T p TypeClassT
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
isIdChar :: Char -> Bool
|
|
Packit |
2cbdf3 |
isIdChar c = isAlphaNum c || c `elem` "_'"
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
extract :: Int -> String -> String
|
|
Packit |
2cbdf3 |
extract ln str = take (ln-2) (tail str)
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
do_ech :: Int -> Int -> String -> Char
|
|
Packit |
2cbdf3 |
do_ech radix _ln str = chr (parseInt radix str)
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
mac :: Int -> String -> String
|
|
Packit |
2cbdf3 |
mac ln str = take (ln-1) $ tail str
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
-- TODO : replace not . isSpace with (\c -> not (isSpace c) && c /= '=')
|
|
Packit |
2cbdf3 |
macdef :: Int -> String -> String
|
|
Packit |
2cbdf3 |
macdef _ln str = takeWhile (\c -> not (isSpace c) && c /= '=') $ tail str
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
esc :: String -> Char
|
|
Packit |
2cbdf3 |
esc str =
|
|
Packit |
2cbdf3 |
case head $ tail str of
|
|
Packit |
2cbdf3 |
'a' -> '\a'
|
|
Packit |
2cbdf3 |
'b' -> '\b'
|
|
Packit |
2cbdf3 |
'f' -> '\f'
|
|
Packit |
2cbdf3 |
'n' -> '\n'
|
|
Packit |
2cbdf3 |
'r' -> '\r'
|
|
Packit |
2cbdf3 |
't' -> '\t'
|
|
Packit |
2cbdf3 |
'v' -> '\v'
|
|
Packit |
2cbdf3 |
c -> c
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
parseInt :: Int -> String -> Int
|
|
Packit |
2cbdf3 |
parseInt radix ds = foldl1 (\n d -> n * radix + d) (map digitToInt ds)
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
-- In brace-delimited code, we have to be careful to match braces
|
|
Packit |
2cbdf3 |
-- within the code, but ignore braces inside strings and character
|
|
Packit |
2cbdf3 |
-- literals. We do an approximate job (doing it properly requires
|
|
Packit |
2cbdf3 |
-- implementing a large chunk of the Haskell lexical syntax).
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
code :: Action
|
|
Packit |
2cbdf3 |
code (p,_,_inp) _ = do
|
|
Packit |
2cbdf3 |
currentInput <- getInput
|
|
Packit |
2cbdf3 |
go currentInput 1 ""
|
|
Packit |
2cbdf3 |
where
|
|
Packit |
2cbdf3 |
go :: AlexInput -> Int -> String -> P Token
|
|
Packit |
2cbdf3 |
go inp 0 cs = do
|
|
Packit |
2cbdf3 |
setInput inp
|
|
Packit |
2cbdf3 |
return (T p (CodeT (reverse (tail cs))))
|
|
Packit |
2cbdf3 |
go inp n cs = do
|
|
Packit |
2cbdf3 |
case alexGetChar inp of
|
|
Packit |
2cbdf3 |
Nothing -> err inp
|
|
Packit |
2cbdf3 |
Just (c,inp2) ->
|
|
Packit |
2cbdf3 |
case c of
|
|
Packit |
2cbdf3 |
'{' -> go inp2 (n+1) (c:cs)
|
|
Packit |
2cbdf3 |
'}' -> go inp2 (n-1) (c:cs)
|
|
Packit |
2cbdf3 |
'\'' -> go_char inp2 n (c:cs)
|
|
Packit |
2cbdf3 |
'\"' -> go_str inp2 n (c:cs) '\"'
|
|
Packit |
2cbdf3 |
c2 -> go inp2 n (c2:cs)
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
go_char :: AlexInput -> Int -> String -> P Token
|
|
Packit |
2cbdf3 |
-- try to catch multiple occurrences of ' at identifier end
|
|
Packit |
2cbdf3 |
go_char inp n cs@('\'':'\'':_) = go inp n cs
|
|
Packit |
2cbdf3 |
-- try to catch occurrences of ' within an identifier
|
|
Packit |
2cbdf3 |
go_char inp n cs@('\'':c2:_)
|
|
Packit |
2cbdf3 |
| isAlphaNum c2 = go inp n cs
|
|
Packit |
2cbdf3 |
go_char inp n cs = go_str inp n cs '\''
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
go_str :: AlexInput -> Int -> String -> Char -> P Token
|
|
Packit |
2cbdf3 |
go_str inp n cs end = do
|
|
Packit |
2cbdf3 |
case alexGetChar inp of
|
|
Packit |
2cbdf3 |
Nothing -> err inp
|
|
Packit |
2cbdf3 |
Just (c,inp2)
|
|
Packit |
2cbdf3 |
| c == end -> go inp2 n (c:cs)
|
|
Packit |
2cbdf3 |
| otherwise ->
|
|
Packit |
2cbdf3 |
case c of
|
|
Packit |
2cbdf3 |
'\\' -> case alexGetChar inp2 of
|
|
Packit |
2cbdf3 |
Nothing -> err inp2
|
|
Packit |
2cbdf3 |
Just (d,inp3) -> go_str inp3 n (d:c:cs) end
|
|
Packit |
2cbdf3 |
c2 -> go_str inp2 n (c2:cs) end
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
err inp = do setInput inp; lexError "lexical error in code fragment"
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
lexError :: String -> P a
|
|
Packit |
2cbdf3 |
lexError s = do
|
|
Packit |
2cbdf3 |
(_,_,_,input) <- getInput
|
|
Packit |
2cbdf3 |
failP (s ++ (if (not (null input))
|
|
Packit |
2cbdf3 |
then " at " ++ show (head input)
|
|
Packit |
2cbdf3 |
else " at end of file"))
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
lexer :: (Token -> P a) -> P a
|
|
Packit |
2cbdf3 |
lexer cont = lexToken >>= cont
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
lexToken :: P Token
|
|
Packit |
2cbdf3 |
lexToken = do
|
|
Packit |
2cbdf3 |
inp@(p,c,_,s) <- getInput
|
|
Packit |
2cbdf3 |
sc <- getStartCode
|
|
Packit |
2cbdf3 |
case alexScan inp sc of
|
|
Packit |
2cbdf3 |
AlexEOF -> return (T p EOFT)
|
|
Packit |
2cbdf3 |
AlexError _ -> lexError "lexical error"
|
|
Packit |
2cbdf3 |
AlexSkip inp1 _ -> do
|
|
Packit |
2cbdf3 |
setInput inp1
|
|
Packit |
2cbdf3 |
lexToken
|
|
Packit |
2cbdf3 |
AlexToken inp1 len t -> do
|
|
Packit |
2cbdf3 |
setInput inp1
|
|
Packit |
2cbdf3 |
t (p,c,s) len
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
type Action = (AlexPosn,Char,String) -> Int -> P Token
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
skip :: Action
|
|
Packit |
2cbdf3 |
skip _ _ = lexToken
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
andBegin :: Action -> StartCode -> Action
|
|
Packit |
2cbdf3 |
andBegin act sc inp len = setStartCode sc >> act inp len
|
|
Packit |
2cbdf3 |
}
|