Blob Blame History Raw
-- -----------------------------------------------------------------------------
--
-- Output.hs, part of Alex
--
-- (c) Simon Marlow 2003
--
-- Code-outputing and table-generation routines
--
-- ----------------------------------------------------------------------------}

module Output (outputDFA) where

import AbsSyn
import CharSet
import Util
import qualified Map
import qualified Data.IntMap as IntMap

import Control.Monad.ST ( ST, runST )
import Data.Array ( Array )
import Data.Array.Base ( unsafeRead )
import Data.Array.ST ( STUArray, newArray, readArray, writeArray, freeze )
import Data.Array.Unboxed ( UArray, elems, (!), array, listArray )
import Data.Maybe (isJust)
import Data.Bits
import Data.Char ( ord, chr )
import Data.List ( maximumBy, sortBy, groupBy, mapAccumR )

-- -----------------------------------------------------------------------------
-- Printing the output

outputDFA :: Target -> Int -> String -> Scheme -> DFA SNum Code -> ShowS
outputDFA target _ _ scheme dfa
  = interleave_shows nl
        [outputBase, outputTable, outputCheck, outputDefault,
         outputAccept, outputActions, outputSigs]
  where
    (base, table, check, deflt, accept) = mkTables dfa

    intty = case target of
      GhcTarget -> "Int#"
      HaskellTarget -> "Int"

    table_size = length table - 1
    n_states   = length base - 1

    base_nm   = "alex_base"
    table_nm  = "alex_table"
    check_nm  = "alex_check"
    deflt_nm  = "alex_deflt"
    accept_nm = "alex_accept"
    actions_nm = "alex_actions"

    outputBase    = do_array hexChars32 base_nm  n_states   base
    outputTable   = do_array hexChars16 table_nm table_size table
    outputCheck   = do_array hexChars16 check_nm table_size check
    outputDefault = do_array hexChars16 deflt_nm n_states   deflt

    formatArray :: String -> Int -> [ShowS] -> ShowS
    formatArray constructFunction size contents =
        str constructFunction
      . str " (0 :: Int, " . shows size . str ")\n"
      . str "  [ "
      . interleave_shows (str "\n  , ") contents
      . str "\n  ]"

    do_array hex_chars nm upper_bound ints = -- trace ("do_array: " ++ nm) $
     case target of
      GhcTarget ->
          str nm . str " :: AlexAddr\n"
        . str nm . str " = AlexA#\n"
        . str "  \"" . str (hex_chars ints) . str "\"#\n"

      _ ->
          str nm . str " :: Array Int Int\n"
        . str nm . str " = "
        . formatArray "listArray" upper_bound (map shows ints)
        . nl

    outputAccept :: ShowS
    outputAccept =
      -- Don't emit explicit type signature as it contains unknown user type,
      -- see: https://github.com/simonmar/alex/issues/98
      -- str accept_nm . str " :: Array Int (AlexAcc " . str userStateTy . str ")\n"
        str accept_nm . str " = "
      . formatArray "listArray" n_states (snd (mapAccumR outputAccs 0 accept))
      . nl

    gscanActionType res =
        str "AlexPosn -> Char -> String -> Int -> ((Int, state) -> "
      . str res . str ") -> (Int, state) -> " . str res

    outputActions = signature . body
      where
        (nacts, acts) = mapAccumR outputActs 0 accept
        actionsArray :: ShowS
        actionsArray = formatArray "array" nacts (concat acts)
        body :: ShowS
        body = str actions_nm . str " = " . actionsArray . nl
        signature :: ShowS
        signature = case scheme of
          Default { defaultTypeInfo = Just (Nothing, actionty) } ->
              str actions_nm . str " :: Array Int (" . str actionty . str ")\n"
          Default { defaultTypeInfo = Just (Just tyclasses, actionty) } ->
              str actions_nm . str " :: (" . str tyclasses
            . str ") => Array Int (" . str actionty . str ")\n"
          GScan { gscanTypeInfo = Just (Nothing, toktype) } ->
              str actions_nm . str " :: Array Int ("
            . gscanActionType toktype . str ")\n"
          GScan { gscanTypeInfo = Just (Just tyclasses, toktype) } ->
              str actions_nm . str " :: (" . str tyclasses
            . str ") => Array Int ("
            . gscanActionType toktype . str ")\n"
          Basic { basicStrType = strty,
                  basicTypeInfo = Just (Nothing, toktype) } ->
              str actions_nm . str " :: Array Int ("
            . str (show strty) . str " -> " . str toktype
            . str ")\n"
          Basic { basicStrType = strty,
                  basicTypeInfo = Just (Just tyclasses, toktype) } ->
              str actions_nm . str " :: (" . str tyclasses
            . str ") => Array Int ("
            . str (show strty) . str " -> " . str toktype
            . str ")\n"
          Posn { posnByteString = isByteString,
                 posnTypeInfo = Just (Nothing, toktype) } ->
              str actions_nm . str " :: Array Int (AlexPosn -> "
            . str (strtype isByteString) . str " -> " . str toktype
            . str ")\n"
          Posn { posnByteString = isByteString,
                 posnTypeInfo = Just (Just tyclasses, toktype) } ->
              str actions_nm . str " :: (" . str tyclasses
            . str ") => Array Int (AlexPosn -> "
            . str (strtype isByteString) . str " -> " . str toktype
            . str ")\n"
          Monad { monadByteString = isByteString,
                  monadTypeInfo = Just (Nothing, toktype) } ->
            let
              actintty = if isByteString then "Int64" else "Int"
            in
              str actions_nm . str " :: Array Int (AlexInput -> "
            . str actintty . str " -> Alex(" . str toktype . str "))\n"
          Monad { monadByteString = isByteString,
                  monadTypeInfo = Just (Just tyclasses, toktype) } ->
            let
              actintty = if isByteString then "Int64" else "Int"
            in
              str actions_nm . str " :: (" . str tyclasses
            . str ") => Array Int (AlexInput -> "
            . str actintty . str " -> Alex(" . str toktype . str "))\n"
          _ ->
              -- No type signature: we don't know what the type of the actions is.
              -- str accept_nm . str " :: Array Int (Accept Code)\n"
              id


    outputSigs
        = case scheme of
          Default { defaultTypeInfo = Just (Nothing, toktype) } ->
              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn ("
            . str toktype . str ")\n"
            . str "alexScan :: AlexInput -> Int -> AlexReturn ("
            . str toktype . str ")\n"
          Default { defaultTypeInfo = Just (Just tyclasses, toktype) } ->
              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: (" . str tyclasses
            . str ") => () -> AlexInput -> Int -> AlexReturn ("
            . str toktype . str ")\n"
            . str "alexScan :: (" . str tyclasses
            . str ") => AlexInput -> Int -> AlexReturn ("
            . str toktype . str ")\n"
          GScan { gscanTypeInfo = Just (Nothing, toktype) } ->
              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: () -> AlexInput -> Int -> "
            . str "AlexReturn (" . gscanActionType toktype . str ")\n"
            . str "alexScan :: AlexInput -> Int -> AlexReturn ("
            . gscanActionType toktype . str ")\n"
          GScan { gscanTypeInfo = Just (Just tyclasses, toktype) } ->
              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: (" . str tyclasses
            . str ") => () -> AlexInput -> Int -> AlexReturn ("
            . gscanActionType toktype . str ")\n"
            . str "alexScan :: (" . str tyclasses
            . str ") => AlexInput -> Int -> AlexReturn ("
            . gscanActionType toktype . str ")\n"
          Basic { basicStrType = strty,
                  basicTypeInfo = Just (Nothing, toktype) } ->
              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn ("
            . str (show strty) . str " -> " . str toktype . str ")\n"
            . str "alexScan :: AlexInput -> Int -> AlexReturn ("
            . str (show strty) . str " -> " . str toktype . str ")\n"
          Basic { basicStrType = strty,
                  basicTypeInfo = Just (Just tyclasses, toktype) } ->
              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: (" . str tyclasses
            . str ") => () -> AlexInput -> Int -> AlexReturn ("
            . str (show strty) . str " -> " . str toktype . str ")\n"
            . str "alexScan :: (" . str tyclasses
            . str ") => AlexInput -> Int -> AlexReturn ("
            . str (show strty) . str " -> " . str toktype . str ")\n"
          Posn { posnByteString = isByteString,
                 posnTypeInfo = Just (Nothing, toktype) } ->
              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn (AlexPosn -> "
            . str (strtype isByteString) . str " -> " . str toktype . str ")\n"
            . str "alexScan :: AlexInput -> Int -> AlexReturn (AlexPosn -> "
            . str (strtype isByteString) . str " -> " . str toktype . str ")\n"
          Posn { posnByteString = isByteString,
                 posnTypeInfo = Just (Just tyclasses, toktype) } ->
              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: (" . str tyclasses
            . str ") => () -> AlexInput -> Int -> AlexReturn (AlexPosn -> "
            . str (strtype isByteString) . str " -> " . str toktype . str ")\n"
            . str "alexScan :: (" . str tyclasses
            . str ") => AlexInput -> Int -> AlexReturn (AlexPosn -> "
            . str (strtype isByteString) . str " -> " . str toktype . str ")\n"
          Monad { monadTypeInfo = Just (Nothing, toktype),
                  monadByteString = isByteString,
                  monadUserState = userState } ->
            let
              actintty = if isByteString then "Int64" else "Int"
              userStateTy | userState = "AlexUserState"
                          | otherwise = "()"
            in
              str "alex_scan_tkn :: " . str userStateTy
            . str " -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: " . str userStateTy
            . str " -> AlexInput -> Int -> AlexReturn ("
            . str "AlexInput -> " . str actintty . str " -> Alex ("
            . str toktype . str "))\n"
            . str "alexScan :: AlexInput -> Int -> AlexReturn ("
            . str "AlexInput -> " . str actintty
            . str " -> Alex (" . str toktype . str "))\n"
            . str "alexMonadScan :: Alex (" . str toktype . str ")\n"
          Monad { monadTypeInfo = Just (Just tyclasses, toktype),
                  monadByteString = isByteString,
                  monadUserState = userState } ->
            let
              actintty = if isByteString then "Int64" else "Int"
              userStateTy | userState = "AlexUserState"
                          | otherwise = "()"
            in
              str "alex_scan_tkn :: " . str userStateTy
            . str " -> AlexInput -> " . str intty
            . str " -> " . str "AlexInput -> " . str intty
            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
            . str "alexScanUser :: (" . str tyclasses . str ") => "
            . str userStateTy . str " -> AlexInput -> Int -> AlexReturn ("
            . str "AlexInput -> " . str actintty
            . str " -> Alex (" . str toktype . str "))\n"
            . str "alexScan :: (" . str tyclasses
            . str ") => AlexInput -> Int -> AlexReturn ("
            . str "AlexInput -> " . str actintty
            . str " -> Alex (" . str toktype . str "))\n"
            . str "alexMonadScan :: (" . str tyclasses
            . str ") => Alex (" . str toktype . str ")\n"
          _ ->
              str ""

    outputAccs :: Int -> [Accept Code] -> (Int, ShowS)
    outputAccs idx [] = (idx, str "AlexAccNone")
    outputAccs idx (Acc _ Nothing Nothing NoRightContext : [])
      = (idx, str "AlexAccSkip")
    outputAccs idx (Acc _ (Just _) Nothing NoRightContext : [])
      = (idx + 1, str "AlexAcc " . str (show idx))
    outputAccs idx (Acc _ Nothing lctx rctx : rest)
      = let (idx', rest') = outputAccs idx rest
        in (idx', str "AlexAccSkipPred" . space
                 . paren (outputPred lctx rctx)
                 . paren rest')
    outputAccs idx (Acc _ (Just _) lctx rctx : rest)
      = let (idx', rest') = outputAccs idx rest
        in (idx' + 1, str "AlexAccPred" . space
                      . str (show idx') . space
                      . paren (outputPred lctx rctx)
                      . paren rest')

    outputActs :: Int -> [Accept Code] -> (Int, [ShowS])
    outputActs idx =
      let
        outputAct _ (Acc _ Nothing _ _) = error "Shouldn't see this"
        outputAct inneridx (Acc _ (Just act) _ _) =
          (inneridx + 1, paren (shows inneridx . str "," . str act))
      in
        mapAccumR outputAct idx . filter (\(Acc _ act _ _) -> isJust act)

    outputPred (Just set) NoRightContext
        = outputLCtx set
    outputPred Nothing rctx
        = outputRCtx rctx
    outputPred (Just set) rctx
        = outputLCtx set
        . str " `alexAndPred` "
        . outputRCtx rctx

    outputLCtx set = str "alexPrevCharMatches" . str (charSetQuote set)

    outputRCtx NoRightContext = id
    outputRCtx (RightContextRExp sn)
        = str "alexRightContext " . shows sn
    outputRCtx (RightContextCode code)
        = str code

    -- outputArr arr
        -- = str "array " . shows (bounds arr) . space
        -- . shows (assocs arr)

-- -----------------------------------------------------------------------------
-- Generating arrays.

-- Here we use the table-compression algorithm described in section
-- 3.9 of the dragon book, which is a common technique used by lexical
-- analyser generators.

-- We want to generate:
--
--    base :: Array SNum Int
--              maps the current state to an offset in the main table
--
--    table :: Array Int SNum
--              maps (base!state + char) to the next state
--
--    check :: Array Int SNum
--              maps (base!state + char) to state if table entry is valid,
--              otherwise we use the default for this state
--
--    default :: Array SNum SNum
--              default production for this state
--
--    accept :: Array SNum [Accept Code]
--              maps state to list of accept codes for this state
--
-- For each state, we decide what will be the default symbol (pick the
-- most common).  We now have a mapping Char -> SNum, with one special
-- state reserved as the default.


mkTables :: DFA SNum Code
         -> (
              [Int],            -- base
              [Int],            -- table
              [Int],            -- check
              [Int],            -- default
              [[Accept Code]]   -- accept
            )
mkTables dfa = -- trace (show (defaults)) $
               -- trace (show (fmap (length . snd)  dfa_no_defaults)) $
  ( elems base_offs,
     take max_off (elems table),
     take max_off (elems check),
     elems defaults,
     accept
  )
 where
        accept   = [ as | State as _ <- elems dfa_arr ]

        state_assocs = Map.toAscList (dfa_states dfa)
        n_states = length state_assocs
        top_state = n_states - 1

        dfa_arr :: Array SNum (State SNum Code)
        dfa_arr = array (0,top_state) state_assocs

        -- fill in all the error productions
        expand_states =
           [ expand (dfa_arr!state) | state <- [0..top_state] ]

        expand (State _ out) =
           [(i, lookup' out i) | i <- [0..0xff]]
           where lookup' out' i = case IntMap.lookup i out' of
                                        Nothing -> -1
                                        Just s  -> s

        defaults :: UArray SNum SNum
        defaults = listArray (0,top_state) (map best_default expand_states)

        -- find the most common destination state in a given state, and
        -- make it the default.
        best_default :: [(Int,SNum)] -> SNum
        best_default prod_list
           | null sorted = -1
           | otherwise   = snd (head (maximumBy lengths eq))
           where sorted  = sortBy compareSnds prod_list
                 compareSnds (_,a) (_,b) = compare a b
                 eq = groupBy (\(_,a) (_,b) -> a == b) sorted
                 lengths  a b = length a `compare` length b

        -- remove all the default productions from the DFA
        dfa_no_defaults =
          [ (s, prods_without_defaults s out)
          | (s, out) <- zip [0..] expand_states
          ]

        prods_without_defaults s out
          = [ (fromIntegral c, dest) | (c,dest) <- out, dest /= defaults!s ]

        (base_offs, table, check, max_off)
           = runST (genTables n_states 255 dfa_no_defaults)


genTables
         :: Int                         -- number of states
         -> Int                         -- maximum token no.
         -> [(SNum,[(Int,SNum)])]       -- entries for the table
         -> ST s (UArray Int Int,       -- base
                  UArray Int Int,       -- table
                  UArray Int Int,       -- check
                  Int                   -- highest offset in table
            )

genTables n_states max_token entries = do

  base       <- newArray (0, n_states-1) 0
  table      <- newArray (0, mAX_TABLE_SIZE) 0
  check      <- newArray (0, mAX_TABLE_SIZE) (-1)
  off_arr    <- newArray (-max_token, mAX_TABLE_SIZE) 0

  max_off    <- genTables' base table check off_arr entries max_token

  base'      <- freeze base
  table'     <- freeze table
  check'     <- freeze check
  return (base', table',check',max_off+1)

  where mAX_TABLE_SIZE = n_states * (max_token + 1)


genTables'
         :: STUArray s Int Int          -- base
         -> STUArray s Int Int          -- table
         -> STUArray s Int Int          -- check
         -> STUArray s Int Int          -- offset array
         -> [(SNum,[(Int,SNum)])]       -- entries for the table
         -> Int                         -- maximum token no.
         -> ST s Int                    -- highest offset in table

genTables' base table check off_arr entries max_token
        = fit_all entries 0 1
  where

         fit_all [] max_off _ = return max_off
         fit_all (s:ss) max_off fst_zero = do
           (off, new_max_off, new_fst_zero) <- fit s max_off fst_zero
           writeArray off_arr off 1
           fit_all ss new_max_off new_fst_zero

         -- fit a vector into the table.  Return the offset of the vector,
         -- the maximum offset used in the table, and the offset of the first
         -- entry in the table (used to speed up the lookups a bit).
         fit (_,[]) max_off fst_zero = return (0,max_off,fst_zero)

         fit (state_no, state@((t,_):_)) max_off fst_zero = do
                 -- start at offset 1 in the table: all the empty states
                 -- (states with just a default reduction) are mapped to
                 -- offset zero.
           off <- findFreeOffset (-t + fst_zero) check off_arr state
           let new_max_off | furthest_right > max_off = furthest_right
                           | otherwise                = max_off
               furthest_right = off + max_token

           --trace ("fit: state " ++ show state_no ++ ", off " ++ show off ++ ", elems " ++ show state) $ do

           writeArray base state_no off
           addState off table check state
           new_fst_zero <- findFstFreeSlot check fst_zero
           return (off, new_max_off, new_fst_zero)


-- Find a valid offset in the table for this state.
findFreeOffset :: Int
               -> STUArray s Int Int
               -> STUArray s Int Int
               -> [(Int, Int)]
               -> ST s Int
findFreeOffset off check off_arr state = do
    -- offset 0 isn't allowed
  if off == 0 then try_next else do

    -- don't use an offset we've used before
  b <- readArray off_arr off
  if b /= 0 then try_next else do

    -- check whether the actions for this state fit in the table
  ok <- fits off state check
  if ok then return off else try_next
 where
        try_next = findFreeOffset (off+1) check off_arr state

-- This is an inner loop, so we use some strictness hacks, and avoid
-- array bounds checks (unsafeRead instead of readArray) to speed
-- things up a bit.
fits :: Int -> [(Int,Int)] -> STUArray s Int Int -> ST s Bool
fits off [] check = off `seq` check `seq` return True -- strictness hacks
fits off ((t,_):rest) check = do
  i <- unsafeRead check (off+t)
  if i /= -1 then return False
             else fits off rest check

addState :: Int -> STUArray s Int Int -> STUArray s Int Int -> [(Int, Int)]
         -> ST s ()
addState _   _     _     [] = return ()
addState off table check ((t,val):state) = do
   writeArray table (off+t) val
   writeArray check (off+t) t
   addState off table check state

findFstFreeSlot :: STUArray s Int Int -> Int -> ST s Int
findFstFreeSlot table n = do
         i <- readArray table n
         if i == -1 then return n
                    else findFstFreeSlot table (n+1)

-----------------------------------------------------------------------------
-- Convert an integer to a 16-bit number encoded in \xNN\xNN format suitable
-- for placing in a string (copied from Happy's ProduceCode.lhs)

hexChars16 :: [Int] -> String
hexChars16 acts = concat (map conv16 acts)
  where
    conv16 i | i > 0x7fff || i < -0x8000
                = error ("Internal error: hexChars16: out of range: " ++ show i)
             | otherwise
                = hexChar16 i

hexChars32 :: [Int] -> String
hexChars32 acts = concat (map conv32 acts)
  where
    conv32 i = hexChar16 (i .&. 0xffff) ++
                hexChar16 ((i `shiftR` 16) .&. 0xffff)

hexChar16 :: Int -> String
hexChar16 i = toHex (i .&. 0xff)
                 ++ toHex ((i `shiftR` 8) .&. 0xff)  -- force little-endian

toHex :: Int -> String
toHex i = ['\\','x', hexDig (i `div` 16), hexDig (i `mod` 16)]

hexDig :: Int -> Char
hexDig i | i <= 9    = chr (i + ord '0')
         | otherwise = chr (i - 10 + ord 'a')