Blob Blame History Raw
--
-- Lexical syntax for Haskell 98.
--
-- (c) Simon Marlow 2003, with the caveat that much of this is
-- translated directly from the syntax in the Haskell 98 report.
--
-- This isn't a complete Haskell 98 lexer - it doesn't handle layout
-- for one thing.  However, it could be adapted with a small
-- amount of effort.
--

{
module Main (main) where
import Data.Char (chr)
}

%wrapper "monad"

$whitechar = [ \t\n\r\f\v]
$special   = [\(\)\,\;\[\]\`\{\}]

$ascdigit  = 0-9
$unidigit  = [] -- TODO
$digit     = [$ascdigit $unidigit]

$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
$unisymbol = [] -- TODO
$symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']

$large     = [A-Z \xc0-\xd6 \xd8-\xde]
$small     = [a-z \xdf-\xf6 \xf8-\xff \_]
$alpha     = [$small $large]

$graphic   = [$small $large $symbol $digit $special \:\"\']

$octit	   = 0-7
$hexit     = [0-9 A-F a-f]
$idchar    = [$alpha $digit \']
$symchar   = [$symbol \:]
$nl        = [\n\r]

@reservedid = 
	as|case|class|data|default|deriving|do|else|hiding|if|
	import|in|infix|infixl|infixr|instance|let|module|newtype|
	of|qualified|then|type|where

@reservedop =
	".." | ":" | "::" | "=" | \\ | "|" | "<-" | "->" | "@" | "~" | "=>"

@varid  = $small $idchar*
@conid  = $large $idchar*
@varsym = $symbol $symchar*
@consym = \: $symchar*

@decimal     = $digit+
@octal       = $octit+
@hexadecimal = $hexit+
@exponent    = [eE] [\-\+] @decimal

$cntrl   = [$large \@\[\\\]\^\_]
@ascii   = \^ $cntrl | NUL | SOH | STX | ETX | EOT | ENQ | ACK
	 | BEL | BS | HT | LF | VT | FF | CR | SO | SI | DLE
	 | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM
	 | SUB | ESC | FS | GS | RS | US | SP | DEL
$charesc = [abfnrtv\\\"\'\&]
@escape  = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal)
@gap     = \\ $whitechar+ \\
@string  = $graphic # [\"\\] | " " | @escape | @gap

haskell :-

<0> $white+			{ skip }
<0> "--"\-*[^$symbol].*		{ skip }

"{-"				{ nested_comment }

<0> $special			{ mkL LSpecial }

<0> @reservedid			{ mkL LReservedId }
<0> @conid \. @varid		{ mkL LQVarId }
<0> @conid \. @conid		{ mkL LQConId }
<0> @varid			{ mkL LVarId }
<0> @conid			{ mkL LConId }

<0> @reservedop			{ mkL LReservedOp }
<0> @conid \. @varsym		{ mkL LVarSym }
<0> @conid \. @consym		{ mkL LConSym }
<0> @varsym			{ mkL LVarSym }
<0> @consym			{ mkL LConSym }

<0> @decimal 
  | 0[oO] @octal
  | 0[xX] @hexadecimal		{ mkL LInteger }

<0> @decimal \. @decimal @exponent?
  | @decimal @exponent		{ mkL LFloat }

<0> \' ($graphic # [\'\\] | " " | @escape) \'
				{ mkL LChar }

<0> \" @string* \"		{ mkL LString }

{
data Lexeme = L AlexPosn LexemeClass String

data LexemeClass
  = LInteger
  | LFloat
  | LChar
  | LString
  | LSpecial
  | LReservedId
  | LReservedOp
  | LVarId
  | LQVarId
  | LConId
  | LQConId
  | LVarSym
  | LQVarSym
  | LConSym
  | LQConSym
  | LEOF
  deriving Eq
  
mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme
mkL c (p,_,_,str) len = return (L p c (take len str))

nested_comment :: AlexInput -> Int -> Alex Lexeme
nested_comment _ _ = do
  input <- alexGetInput
  go 1 input
  where go 0 input = do alexSetInput input; alexMonadScan
	go n input = do
          case alexGetByte input of
	    Nothing  -> err input
	    Just (c,input) -> do
              case chr (fromIntegral c) of
	    	'-' -> do
                  case alexGetByte input of
		    Nothing  -> err input
                    Just (125,input) -> go (n-1) input
                    Just (c,input)   -> go n input
	     	'\123' -> do
                  case alexGetByte input of
		    Nothing  -> err input
                    Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input
		    Just (c,input)   -> go n input
	    	c -> go n input

        err input = do alexSetInput input; lexError "error in nested comment"  

lexError s = do
  (p,c,_,input) <- alexGetInput
  alexError (showPosn p ++ ": " ++ s ++ 
		   (if (not (null input))
		     then " before " ++ show (head input)
		     else " at end of file"))

scanner str = runAlex str $ do
  let loop i = do tok@(L _ cl _) <- alexMonadScan; 
		  if cl == LEOF
			then return i
			else do loop $! (i+1)
  loop 0

alexEOF = return (L undefined LEOF "")

showPosn (AlexPn _ line col) = show line ++ ':': show col

main = do
  s <- getContents
  print (scanner s)
}