Blob Blame History Raw
{-# LANGUAGE CPP #-}
-- -----------------------------------------------------------------------------
--
-- Main.hs, part of Alex
--
-- (c) Chris Dornan 1995-2000, Simon Marlow 2003
--
-- ----------------------------------------------------------------------------}

module Main (main) where

import AbsSyn
import CharSet
import DFA
import DFAMin
import NFA
import Info
import Map ( Map )
import qualified Map hiding ( Map )
import Output
import ParseMonad ( runP )
import Parser
import Scan
import Util ( hline )
import Paths_alex ( version, getDataDir )

#if __GLASGOW_HASKELL__ < 610
import Control.Exception as Exception ( block, unblock, catch, throw )
#endif
#if __GLASGOW_HASKELL__ >= 610
import Control.Exception ( bracketOnError )
#endif
import Control.Monad ( when, liftM )
import Data.Char ( chr )
import Data.List ( isSuffixOf, nub )
import Data.Maybe ( isJust, fromJust )
import Data.Version ( showVersion )
import System.Console.GetOpt ( getOpt, usageInfo, ArgOrder(..), OptDescr(..), ArgDescr(..) )
import System.Directory ( removeFile )
import System.Environment ( getProgName, getArgs )
import System.Exit ( ExitCode(..), exitWith )
import System.IO ( stderr, Handle, IOMode(..), openFile, hClose, hPutStr, hPutStrLn )
#if __GLASGOW_HASKELL__ >= 612
import System.IO ( hGetContents, hSetEncoding, utf8 )
#endif

-- We need to force every file we open to be read in
-- as UTF8
alexReadFile :: FilePath -> IO String
#if __GLASGOW_HASKELL__ >= 612
alexReadFile file = do
  h <- alexOpenFile file ReadMode
  hGetContents h
#else
alexReadFile = readFile
#endif

-- We need to force every file we write to be written
-- to as UTF8
alexOpenFile :: FilePath -> IOMode -> IO Handle
#if __GLASGOW_HASKELL__ >= 612
alexOpenFile file mode = do
  h <- openFile file mode
  hSetEncoding h utf8
  return h
#else
alexOpenFile = openFile
#endif

-- `main' decodes the command line arguments and calls `alex'.

main:: IO ()
main =  do
 args <- getArgs
 case getOpt Permute argInfo args of
    (cli,_,[]) | DumpHelp `elem` cli -> do
        prog <- getProgramName
        bye (usageInfo (usageHeader prog) argInfo)
    (cli,_,[]) | DumpVersion `elem` cli ->
        bye copyright
    (cli,[file],[]) ->
        runAlex cli file
    (_,_,errors) -> do
        prog <- getProgramName
        die (concat errors ++ usageInfo (usageHeader prog) argInfo)

projectVersion :: String
projectVersion = showVersion version

copyright :: String
copyright = "Alex version " ++ projectVersion ++ ", (c) 2003 Chris Dornan and Simon Marlow\n"

usageHeader :: String -> String
usageHeader prog = "Usage: " ++ prog ++ " [OPTION...] file\n"

runAlex :: [CLIFlags] -> FilePath -> IO ()
runAlex cli file = do
  basename <- case (reverse file) of
                'x':'.':r -> return (reverse r)
                _         -> die (file ++ ": filename must end in \'.x\'\n")

  prg <- alexReadFile file
  script <- parseScript file prg
  alex cli file basename script

parseScript :: FilePath -> String
  -> IO (Maybe (AlexPosn,Code), [Directive], Scanner, Maybe (AlexPosn,Code))
parseScript file prg =
  case runP prg initialParserEnv parse of
        Left (Just (AlexPn _ line col),err) ->
                die (file ++ ":" ++ show line ++ ":" ++ show col
                                 ++ ": " ++ err ++ "\n")
        Left (Nothing, err) ->
                die (file ++ ": " ++ err ++ "\n")

        Right script -> return script

alex :: [CLIFlags] -> FilePath -> FilePath
     -> (Maybe (AlexPosn, Code), [Directive], Scanner, Maybe (AlexPosn, Code))
     -> IO ()
alex cli file basename script = do
   (put_info, finish_info) <-
      case [ f | OptInfoFile f <- cli ] of
           []  -> return (\_ -> return (), return ())
           [Nothing] -> infoStart file (basename ++ ".info")
           [Just f]  -> infoStart file f
           _   -> dieAlex "multiple -i/--info options"

   o_file <- case [ f | OptOutputFile f <- cli ] of
                []  -> return (basename ++ ".hs")
                [f] -> return f
                _   -> dieAlex "multiple -o/--outfile options"

   tab_size <- case [ s | OptTabSize s <- cli ] of
                []  -> return (8 :: Int)
                [s] -> case reads s of
                        [(n,"")] -> return n
                        _        -> dieAlex "-s/--tab-size option is not a valid integer"
                _   -> dieAlex "multiple -s/--tab-size options"

   let target
        | OptGhcTarget `elem` cli = GhcTarget
        | otherwise               = HaskellTarget

   let encodingsCli
        | OptLatin1 `elem` cli = [Latin1]
        | otherwise            = []

   template_dir  <- templateDir getDataDir cli

   let (maybe_header, directives, scanner1, maybe_footer) = script

   scheme <- getScheme directives

   -- open the output file; remove it if we encounter an error
   bracketOnError
        (alexOpenFile o_file WriteMode)
        (\h -> do hClose h; removeFile o_file)
        $ \out_h -> do

   let
         wrapper_name = wrapperFile template_dir scheme
         (scanner2, scs, sc_hdr) = encodeStartCodes scanner1
         (scanner_final, actions) = extractActions scheme scanner2
         encodingsScript = [ e | EncodingDirective e <- directives ]

   encoding <- case nub (encodingsCli ++ encodingsScript) of
     []  -> return UTF8 -- default
     [e] -> return e
     _ | null encodingsCli -> dieAlex "conflicting %encoding directives"
       | otherwise -> dieAlex "--latin1 flag conflicts with %encoding directive"

   hPutStr out_h (optsToInject target cli)
   injectCode maybe_header file out_h

   hPutStr out_h (importsToInject target cli)

   -- add the wrapper, if necessary
   when (isJust wrapper_name) $
        do str <- alexReadFile (fromJust wrapper_name)
           hPutStr out_h str

   -- Inject the tab size
   hPutStrLn out_h $ "alex_tab_size :: Int"
   hPutStrLn out_h $ "alex_tab_size = " ++ show (tab_size :: Int)

   let dfa = scanner2dfa encoding scanner_final scs
       min_dfa = minimizeDFA dfa
       nm  = scannerName scanner_final
       usespreds = usesPreds min_dfa


   put_info "\nStart codes\n"
   put_info (show $ scs)
   put_info "\nScanner\n"
   put_info (show $ scanner_final)
   put_info "\nNFA\n"
   put_info (show $ scanner2nfa encoding scanner_final scs)
   put_info "\nDFA"
   put_info (infoDFA 1 nm dfa "")
   put_info "\nMinimized DFA"
   put_info (infoDFA 1 nm min_dfa "")
   hPutStr out_h (outputDFA target 1 nm scheme min_dfa "")

   injectCode maybe_footer file out_h

   hPutStr out_h (sc_hdr "")
   hPutStr out_h (actions "")

   -- add the template
   let template_name = templateFile template_dir target usespreds cli
   tmplt <- alexReadFile template_name
   hPutStr out_h tmplt

   hClose out_h
   finish_info

getScheme :: [Directive] -> IO Scheme
getScheme directives =
  do
    token <- case [ ty | TokenType ty <- directives ] of
      [] -> return Nothing
      [res] -> return (Just res)
      _ -> dieAlex "multiple %token directives"

    action <- case [ ty | ActionType ty <- directives ] of
      [] -> return Nothing
      [res] -> return (Just res)
      _ -> dieAlex "multiple %action directives"

    typeclass <- case [ tyclass | TypeClass tyclass <- directives ] of
      [] -> return Nothing
      [res] -> return (Just res)
      _ -> dieAlex "multiple %typeclass directives"

    case [ f | WrapperDirective f <- directives ] of
        []  ->
          case (typeclass, token, action) of
            (Nothing, Nothing, Nothing) ->
              return Default { defaultTypeInfo = Nothing }
            (Nothing, Nothing, Just actionty) ->
              return Default { defaultTypeInfo = Just (Nothing, actionty) }
            (Just _, Nothing, Just actionty) ->
              return Default { defaultTypeInfo = Just (typeclass, actionty) }
            (_, Just _, _) ->
              dieAlex "%token directive only allowed with a wrapper"
            (Just _, Nothing, Nothing) ->
              dieAlex "%typeclass directive without %token directive"
        [single]
          | single == "gscan" ->
            case (typeclass, token, action) of
              (Nothing, Nothing, Nothing) ->
                return GScan { gscanTypeInfo = Nothing }
              (Nothing, Just tokenty, Nothing) ->
                return GScan { gscanTypeInfo = Just (Nothing, tokenty) }
              (Just _, Just tokenty, Nothing) ->
                return GScan { gscanTypeInfo = Just (typeclass, tokenty) }
              (_, _, Just _) ->
                dieAlex "%action directive not allowed with a wrapper"
              (Just _, Nothing, Nothing) ->
                dieAlex "%typeclass directive without %token directive"
          | single == "basic" || single == "basic-bytestring" ||
            single == "strict-bytestring" ->
            let
              strty = case single of
                "basic" -> Str
                "basic-bytestring" -> Lazy
                "strict-bytestring" -> Strict
                _ -> error "Impossible case"
            in case (typeclass, token, action) of
              (Nothing, Nothing, Nothing) ->
                return Basic { basicStrType = strty,
                               basicTypeInfo = Nothing }
              (Nothing, Just tokenty, Nothing) ->
                return Basic { basicStrType = strty,
                               basicTypeInfo = Just (Nothing, tokenty) }
              (Just _, Just tokenty, Nothing) ->
                return Basic { basicStrType = strty,
                               basicTypeInfo = Just (typeclass, tokenty) }
              (_, _, Just _) ->
                dieAlex "%action directive not allowed with a wrapper"
              (Just _, Nothing, Nothing) ->
                dieAlex "%typeclass directive without %token directive"
          | single == "posn" || single == "posn-bytestring" ->
            let
              isByteString = single == "posn-bytestring"
            in case (typeclass, token, action) of
              (Nothing, Nothing, Nothing) ->
                return Posn { posnByteString = isByteString,
                              posnTypeInfo = Nothing }
              (Nothing, Just tokenty, Nothing) ->
                return Posn { posnByteString = isByteString,
                              posnTypeInfo = Just (Nothing, tokenty) }
              (Just _, Just tokenty, Nothing) ->
                return Posn { posnByteString = isByteString,
                              posnTypeInfo = Just (typeclass, tokenty) }
              (_, _, Just _) ->
                  dieAlex "%action directive not allowed with a wrapper"
              (Just _, Nothing, Nothing) ->
                dieAlex "%typeclass directive without %token directive"
          | single == "monad" || single == "monad-bytestring" ||
            single == "monadUserState" ||
            single == "monadUserState-bytestring" ->
            let
              isByteString = single == "monad-bytestring" ||
                             single == "monadUserState-bytestring"
              userState = single == "monadUserState" ||
                          single == "monadUserState-bytestring"
            in case (typeclass, token, action) of
              (Nothing, Nothing, Nothing) ->
                return Monad { monadByteString = isByteString,
                               monadUserState = userState,
                               monadTypeInfo = Nothing }
              (Nothing, Just tokenty, Nothing) ->
                return Monad { monadByteString = isByteString,
                               monadUserState = userState,
                               monadTypeInfo = Just (Nothing, tokenty) }
              (Just _, Just tokenty, Nothing) ->
                return Monad { monadByteString = isByteString,
                               monadUserState = userState,
                               monadTypeInfo = Just (typeclass, tokenty) }
              (_, _, Just _) ->
                  dieAlex "%action directive not allowed with a wrapper"
              (Just _, Nothing, Nothing) ->
                dieAlex "%typeclass directive without %token directive"
          | otherwise -> dieAlex ("unknown wrapper type " ++ single)
        _many -> dieAlex "multiple %wrapper directives"

-- inject some code, and add a {-# LINE #-} pragma at the top
injectCode :: Maybe (AlexPosn,Code) -> FilePath -> Handle -> IO ()
injectCode Nothing _ _ = return ()
injectCode (Just (AlexPn _ ln _,code)) filename hdl = do
  hPutStrLn hdl ("{-# LINE " ++ show ln ++ " \"" ++ filename ++ "\" #-}")
  hPutStrLn hdl code

optsToInject :: Target -> [CLIFlags] -> String
optsToInject GhcTarget _ = optNoWarnings ++ "{-# LANGUAGE CPP,MagicHash #-}\n"
optsToInject _         _ = optNoWarnings ++ "{-# LANGUAGE CPP #-}\n"

optNoWarnings :: String
optNoWarnings = "{-# OPTIONS_GHC -fno-warn-unused-binds -fno-warn-missing-signatures #-}\n"

importsToInject :: Target -> [CLIFlags] -> String
importsToInject _ cli = always_imports ++ debug_imports ++ glaexts_import
  where
        glaexts_import | OptGhcTarget `elem` cli    = import_glaexts
                       | otherwise                  = ""

        debug_imports  | OptDebugParser `elem` cli = import_debug
                       | otherwise                 = ""

-- CPP is turned on for -fglasogw-exts, so we can use conditional
-- compilation.  We need to #include "config.h" to get hold of
-- WORDS_BIGENDIAN (see GenericTemplate.hs).

always_imports :: String
always_imports = "#if __GLASGOW_HASKELL__ >= 603\n" ++
                 "#include \"ghcconfig.h\"\n" ++
                 "#elif defined(__GLASGOW_HASKELL__)\n" ++
                 "#include \"config.h\"\n" ++
                 "#endif\n" ++
                 "#if __GLASGOW_HASKELL__ >= 503\n" ++
                 "import Data.Array\n" ++
                 "import Data.Array.Base (unsafeAt)\n" ++
                 "#else\n" ++
                 "import Array\n" ++
                 "#endif\n"

import_glaexts :: String
import_glaexts = "#if __GLASGOW_HASKELL__ >= 503\n" ++
                 "import GHC.Exts\n" ++
                 "#else\n" ++
                 "import GlaExts\n" ++
                 "#endif\n"

import_debug :: String
import_debug   = "#if __GLASGOW_HASKELL__ >= 503\n" ++
                 "import System.IO\n" ++
                 "import System.IO.Unsafe\n" ++
                 "import Debug.Trace\n" ++
                 "#else\n" ++
                 "import IO\n" ++
                 "import IOExts\n" ++
                 "#endif\n"

templateDir :: IO FilePath -> [CLIFlags] -> IO FilePath
templateDir def cli
  = case [ d | OptTemplateDir d <- cli ] of
      [] -> def
      ds -> return (last ds)

templateFile :: FilePath -> Target -> UsesPreds -> [CLIFlags] -> FilePath
templateFile dir target usespreds cli
  = dir ++ "/AlexTemplate" ++ maybe_ghc ++ maybe_debug ++ maybe_nopred
  where
        maybe_ghc = case target of
                      GhcTarget -> "-ghc"
                      _         -> ""

        maybe_debug
          | OptDebugParser `elem` cli  = "-debug"
          | otherwise                  = ""

        maybe_nopred =
          case usespreds of
            DoesntUsePreds | not (null maybe_ghc)
                          && null maybe_debug -> "-nopred"
            _                                 -> ""

wrapperFile :: FilePath -> Scheme -> Maybe FilePath
wrapperFile dir scheme =
  do
    f <- wrapperName scheme
    return (dir ++ "/AlexWrapper-" ++ f)

infoStart :: FilePath -> FilePath -> IO (String -> IO (), IO ())
infoStart x_file info_file = do
  bracketOnError
        (alexOpenFile info_file WriteMode)
        (\h -> do hClose h; removeFile info_file)
        (\h -> do infoHeader h x_file
                  return (hPutStr h, hClose h)
        )

infoHeader :: Handle -> FilePath -> IO ()
infoHeader h file = do
--  hSetBuffering h NoBuffering
  hPutStrLn h ("Info file produced by Alex version " ++ projectVersion ++
                ", from " ++ file)
  hPutStrLn h hline
  hPutStr h "\n"

initialParserEnv :: (Map String CharSet, Map String RExp)
initialParserEnv = (initSetEnv, initREEnv)

initSetEnv :: Map String CharSet
initSetEnv = Map.fromList [("white", charSet " \t\n\v\f\r"),
                           ("printable", charSetRange (chr 32) (chr 0x10FFFF)), -- FIXME: Look it up the unicode standard
                           (".", charSetComplement emptyCharSet
                            `charSetMinus` charSetSingleton '\n')]

initREEnv :: Map String RExp
initREEnv = Map.empty

-- -----------------------------------------------------------------------------
-- Command-line flags

data CLIFlags
  = OptDebugParser
  | OptGhcTarget
  | OptOutputFile FilePath
  | OptInfoFile (Maybe FilePath)
  | OptTabSize String
  | OptTemplateDir FilePath
  | OptLatin1
  | DumpHelp
  | DumpVersion
  deriving Eq

argInfo :: [OptDescr CLIFlags]
argInfo  = [
   Option ['o'] ["outfile"] (ReqArg OptOutputFile "FILE")
        "write the output to FILE (default: file.hs)",
   Option ['i'] ["info"] (OptArg OptInfoFile "FILE")
        "put detailed state-machine info in FILE (or file.info)",
   Option ['t'] ["template"] (ReqArg OptTemplateDir "DIR")
        "look in DIR for template files",
   Option ['g'] ["ghc"]    (NoArg OptGhcTarget)
        "use GHC extensions",
   Option ['l'] ["latin1"]    (NoArg OptLatin1)
        "generated lexer will use the Latin-1 encoding instead of UTF-8",
   Option ['s'] ["tab-size"] (ReqArg OptTabSize "NUMBER")
        "set tab size to be used in the generated lexer (default: 8)",
   Option ['d'] ["debug"] (NoArg OptDebugParser)
        "produce a debugging scanner",
   Option ['?'] ["help"] (NoArg DumpHelp)
        "display this help and exit",
   Option ['V','v'] ["version"] (NoArg DumpVersion)  -- ToDo: -v is deprecated!
        "output version information and exit"
  ]

-- -----------------------------------------------------------------------------
-- Utils

getProgramName :: IO String
getProgramName = liftM (`withoutSuffix` ".bin") getProgName
   where str `withoutSuffix` suff
            | suff `isSuffixOf` str = take (length str - length suff) str
            | otherwise             = str

bye :: String -> IO a
bye s = putStr s >> exitWith ExitSuccess

die :: String -> IO a
die s = hPutStr stderr s >> exitWith (ExitFailure 1)

dieAlex :: String -> IO a
dieAlex s = getProgramName >>= \prog -> die (prog ++ ": " ++ s)

#if __GLASGOW_HASKELL__ < 610
bracketOnError
        :: IO a         -- ^ computation to run first (\"acquire resource\")
        -> (a -> IO b)  -- ^ computation to run last (\"release resource\")
        -> (a -> IO c)  -- ^ computation to run in-between
        -> IO c         -- returns the value from the in-between computation
bracketOnError before after thing =
  block (do
    a <- before
    r <- Exception.catch
           (unblock (thing a))
           (\e -> do { after a; throw e })
    return r
 )
#endif