|
Packit |
2cbdf3 |
--
|
|
Packit |
2cbdf3 |
-- Lexical syntax for Haskell 98.
|
|
Packit |
2cbdf3 |
--
|
|
Packit |
2cbdf3 |
-- (c) Simon Marlow 2003, with the caveat that much of this is
|
|
Packit |
2cbdf3 |
-- translated directly from the syntax in the Haskell 98 report.
|
|
Packit |
2cbdf3 |
--
|
|
Packit |
2cbdf3 |
-- This isn't a complete Haskell 98 lexer - it doesn't handle layout
|
|
Packit |
2cbdf3 |
-- for one thing. However, it could be adapted with a small
|
|
Packit |
2cbdf3 |
-- amount of effort.
|
|
Packit |
2cbdf3 |
--
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
{
|
|
Packit |
2cbdf3 |
module Main (main) where
|
|
Packit |
2cbdf3 |
import Data.Char (chr)
|
|
Packit |
2cbdf3 |
}
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
%wrapper "monad"
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
$whitechar = [ \t\n\r\f\v]
|
|
Packit |
2cbdf3 |
$special = [\(\)\,\;\[\]\`\{\}]
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
$ascdigit = 0-9
|
|
Packit |
2cbdf3 |
$unidigit = [] -- TODO
|
|
Packit |
2cbdf3 |
$digit = [$ascdigit $unidigit]
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
|
|
Packit |
2cbdf3 |
$unisymbol = [] -- TODO
|
|
Packit |
2cbdf3 |
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
$large = [A-Z \xc0-\xd6 \xd8-\xde]
|
|
Packit |
2cbdf3 |
$small = [a-z \xdf-\xf6 \xf8-\xff \_]
|
|
Packit |
2cbdf3 |
$alpha = [$small $large]
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
$graphic = [$small $large $symbol $digit $special \:\"\']
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
$octit = 0-7
|
|
Packit |
2cbdf3 |
$hexit = [0-9 A-F a-f]
|
|
Packit |
2cbdf3 |
$idchar = [$alpha $digit \']
|
|
Packit |
2cbdf3 |
$symchar = [$symbol \:]
|
|
Packit |
2cbdf3 |
$nl = [\n\r]
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
@reservedid =
|
|
Packit |
2cbdf3 |
as|case|class|data|default|deriving|do|else|hiding|if|
|
|
Packit |
2cbdf3 |
import|in|infix|infixl|infixr|instance|let|module|newtype|
|
|
Packit |
2cbdf3 |
of|qualified|then|type|where
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
@reservedop =
|
|
Packit |
2cbdf3 |
".." | ":" | "::" | "=" | \\ | "|" | "<-" | "->" | "@" | "~" | "=>"
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
@varid = $small $idchar*
|
|
Packit |
2cbdf3 |
@conid = $large $idchar*
|
|
Packit |
2cbdf3 |
@varsym = $symbol $symchar*
|
|
Packit |
2cbdf3 |
@consym = \: $symchar*
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
@decimal = $digit+
|
|
Packit |
2cbdf3 |
@octal = $octit+
|
|
Packit |
2cbdf3 |
@hexadecimal = $hexit+
|
|
Packit |
2cbdf3 |
@exponent = [eE] [\-\+] @decimal
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
$cntrl = [$large \@\[\\\]\^\_]
|
|
Packit |
2cbdf3 |
@ascii = \^ $cntrl | NUL | SOH | STX | ETX | EOT | ENQ | ACK
|
|
Packit |
2cbdf3 |
| BEL | BS | HT | LF | VT | FF | CR | SO | SI | DLE
|
|
Packit |
2cbdf3 |
| DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM
|
|
Packit |
2cbdf3 |
| SUB | ESC | FS | GS | RS | US | SP | DEL
|
|
Packit |
2cbdf3 |
$charesc = [abfnrtv\\\"\'\&]
|
|
Packit |
2cbdf3 |
@escape = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal)
|
|
Packit |
2cbdf3 |
@gap = \\ $whitechar+ \\
|
|
Packit |
2cbdf3 |
@string = $graphic # [\"\\] | " " | @escape | @gap
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
haskell :-
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> $white+ { skip }
|
|
Packit |
2cbdf3 |
<0> "--"\-*[^$symbol].* { skip }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
"{-" { nested_comment }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> $special { mkL LSpecial }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> @reservedid { mkL LReservedId }
|
|
Packit |
2cbdf3 |
<0> @conid \. @varid { mkL LQVarId }
|
|
Packit |
2cbdf3 |
<0> @conid \. @conid { mkL LQConId }
|
|
Packit |
2cbdf3 |
<0> @varid { mkL LVarId }
|
|
Packit |
2cbdf3 |
<0> @conid { mkL LConId }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> @reservedop { mkL LReservedOp }
|
|
Packit |
2cbdf3 |
<0> @conid \. @varsym { mkL LVarSym }
|
|
Packit |
2cbdf3 |
<0> @conid \. @consym { mkL LConSym }
|
|
Packit |
2cbdf3 |
<0> @varsym { mkL LVarSym }
|
|
Packit |
2cbdf3 |
<0> @consym { mkL LConSym }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> @decimal
|
|
Packit |
2cbdf3 |
| 0[oO] @octal
|
|
Packit |
2cbdf3 |
| 0[xX] @hexadecimal { mkL LInteger }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> @decimal \. @decimal @exponent?
|
|
Packit |
2cbdf3 |
| @decimal @exponent { mkL LFloat }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> \' ($graphic # [\'\\] | " " | @escape) \'
|
|
Packit |
2cbdf3 |
{ mkL LChar }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
<0> \" @string* \" { mkL LString }
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
{
|
|
Packit |
2cbdf3 |
data Lexeme = L AlexPosn LexemeClass String
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
data LexemeClass
|
|
Packit |
2cbdf3 |
= LInteger
|
|
Packit |
2cbdf3 |
| LFloat
|
|
Packit |
2cbdf3 |
| LChar
|
|
Packit |
2cbdf3 |
| LString
|
|
Packit |
2cbdf3 |
| LSpecial
|
|
Packit |
2cbdf3 |
| LReservedId
|
|
Packit |
2cbdf3 |
| LReservedOp
|
|
Packit |
2cbdf3 |
| LVarId
|
|
Packit |
2cbdf3 |
| LQVarId
|
|
Packit |
2cbdf3 |
| LConId
|
|
Packit |
2cbdf3 |
| LQConId
|
|
Packit |
2cbdf3 |
| LVarSym
|
|
Packit |
2cbdf3 |
| LQVarSym
|
|
Packit |
2cbdf3 |
| LConSym
|
|
Packit |
2cbdf3 |
| LQConSym
|
|
Packit |
2cbdf3 |
| LEOF
|
|
Packit |
2cbdf3 |
deriving Eq
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme
|
|
Packit |
2cbdf3 |
mkL c (p,_,_,str) len = return (L p c (take len str))
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
nested_comment :: AlexInput -> Int -> Alex Lexeme
|
|
Packit |
2cbdf3 |
nested_comment _ _ = do
|
|
Packit |
2cbdf3 |
input <- alexGetInput
|
|
Packit |
2cbdf3 |
go 1 input
|
|
Packit |
2cbdf3 |
where go 0 input = do alexSetInput input; alexMonadScan
|
|
Packit |
2cbdf3 |
go n input = do
|
|
Packit |
2cbdf3 |
case alexGetByte input of
|
|
Packit |
2cbdf3 |
Nothing -> err input
|
|
Packit |
2cbdf3 |
Just (c,input) -> do
|
|
Packit |
2cbdf3 |
case chr (fromIntegral c) of
|
|
Packit |
2cbdf3 |
'-' -> do
|
|
Packit |
2cbdf3 |
case alexGetByte input of
|
|
Packit |
2cbdf3 |
Nothing -> err input
|
|
Packit |
2cbdf3 |
Just (125,input) -> go (n-1) input
|
|
Packit |
2cbdf3 |
Just (c,input) -> go n input
|
|
Packit |
2cbdf3 |
'\123' -> do
|
|
Packit |
2cbdf3 |
case alexGetByte input of
|
|
Packit |
2cbdf3 |
Nothing -> err input
|
|
Packit |
2cbdf3 |
Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input
|
|
Packit |
2cbdf3 |
Just (c,input) -> go n input
|
|
Packit |
2cbdf3 |
c -> go n input
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
err input = do alexSetInput input; lexError "error in nested comment"
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
lexError s = do
|
|
Packit |
2cbdf3 |
(p,c,_,input) <- alexGetInput
|
|
Packit |
2cbdf3 |
alexError (showPosn p ++ ": " ++ s ++
|
|
Packit |
2cbdf3 |
(if (not (null input))
|
|
Packit |
2cbdf3 |
then " before " ++ show (head input)
|
|
Packit |
2cbdf3 |
else " at end of file"))
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
scanner str = runAlex str $ do
|
|
Packit |
2cbdf3 |
let loop i = do tok@(L _ cl _) <- alexMonadScan;
|
|
Packit |
2cbdf3 |
if cl == LEOF
|
|
Packit |
2cbdf3 |
then return i
|
|
Packit |
2cbdf3 |
else do loop $! (i+1)
|
|
Packit |
2cbdf3 |
loop 0
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
alexEOF = return (L undefined LEOF "")
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
showPosn (AlexPn _ line col) = show line ++ ':': show col
|
|
Packit |
2cbdf3 |
|
|
Packit |
2cbdf3 |
main = do
|
|
Packit |
2cbdf3 |
s <- getContents
|
|
Packit |
2cbdf3 |
print (scanner s)
|
|
Packit |
2cbdf3 |
}
|