-- ------------------------------------------------------------
{- |
Module : ColorizeSourceCode
Copyright : Copyright (C) 2009 Uwe Schmidt
License : BSD3
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Portability: portable
Colorize Source Code
Supports Java and Haskell
-}
-- ------------------------------------------------------------
module Main
where
import Control.Arrow
import Data.List
import System.Environment
import System.IO -- import the IO and commandline option stuff
import System.Console.GetOpt
import System.Exit
import Text.Regex.XMLSchema.Generic
import Text.XML.HXT.Core
import Text.XML.HXT.Parser.XhtmlEntities
-- ------------------------------------------------------------
data Process = P { inFilter :: String -> String
, tokenRE :: Regex
, markupRE :: Regex -> Regex
, formatToken :: (String, String) -> String
, formatDoc :: [String] -> String
, outFilter :: String -> String
, input :: Handle
, output :: Handle
, inputFile :: String
}
defaultProcess :: Process
defaultProcess = P { inFilter = id
, tokenRE = plainRE
, markupRE = id
, formatToken = uncurry (++)
, formatDoc = unlines
, outFilter = id
, input = stdin
, output = stdout
, inputFile = " "
}
-- ------------------------------------------------------------
main :: IO ()
main = do
argv <- getArgs
p <- evalArgs (getOpt Permute options argv)
s <- hGetContents (input p)
hPutStr (output p) (process p s)
hFlush (output p)
hClose (output p)
exitWith ExitSuccess
options :: [OptDescr (String, String)]
options = [ Option "h?" ["help"] (NoArg ("help", "1")) "this message"
, Option "" ["plain"] (NoArg ("plain", "1")) "don't colorize lines"
, Option "" ["haskell"] (NoArg ("haskell", "1")) "colorize haskell"
, Option "" ["java"] (NoArg ("java", "1")) "colorize java"
, Option "" ["cpp"] (NoArg ("cpp", "1")) "colorize C or C++"
, Option "" ["sh"] (NoArg ("sh", "1")) "colorize sh or bash"
, Option "" ["ruby"] (NoArg ("ruby", "1")) "colorize ruby"
, Option "" ["bnf"] (NoArg ("bnf", "1")) "colorize extended BNF grammar rules"
, Option "" ["ppl"] (NoArg ("ppl", "1")) "colorize ppl"
, Option "" ["pplass"] (NoArg ("pplass", "1")) "colorize ppl assembler"
, Option "n" ["number"] (NoArg ("number", "1")) "with line numbers"
, Option "t" ["tabs"] (NoArg ("tabs", "1")) "substitute tabs by blanks"
, Option "m" ["markup"] (NoArg ("markup", "1")) "text contains embedded markup"
, Option "e" ["erefs"] (NoArg ("erefs", "1")) "resolve HTML entity refs before processing"
, Option "o" ["output"] (ReqArg ((,) "output") "FILE") "output file, \"-\" stands for stdout"
, Option "s" ["scan"] (NoArg ("scan", "1")) "just scan input, for testing"
, Option "x" ["html"] (NoArg ("html", "1")) "html output"
, Option "f" ["full"] (NoArg ("full", "1")) "full HTML document with header and css"
]
exitErr :: String -> IO a
exitErr msg = do
hPutStrLn stderr msg
usage
exitWith (ExitFailure 1)
evalArgs :: ([(String, String)], [FilePath], [String]) -> IO Process
evalArgs (opts, files, errs)
| not (null errs) = exitErr ("illegal arguments " ++ show errs)
| null files = evalOpts opts defaultProcess
| not (null fns) = exitErr ("only one input file allowed")
| otherwise = do
inp <- openFile fn ReadMode
evalOpts opts (defaultProcess { input = inp
, inputFile = fn
}
)
where
(fn:fns) = files
evalOpts :: [(String, String)] -> Process -> IO Process
evalOpts [] res = return res
evalOpts (o:os) res = do
res' <- evalOpt o res
evalOpts os res'
evalOpt :: (String, String) -> Process -> IO Process
evalOpt ("help","1") _ = do
usage
exitWith ExitSuccess
evalOpt ("output", "-") p = return $ p {output = stdout}
evalOpt ("output", fn) p = do
outp <- openFile fn WriteMode
return $ p {output = outp}
evalOpt ("haskell","1") p = return $ p { tokenRE = haskellRE }
evalOpt ("java", "1") p = return $ p { tokenRE = javaRE }
evalOpt ("cpp", "1") p = return $ p { tokenRE = cppRE }
evalOpt ("sh", "1") p = return $ p { tokenRE = shRE }
evalOpt ("ruby", "1") p = return $ p { tokenRE = rubyRE }
evalOpt ("bnf", "1") p = return $ p { tokenRE = bnfRE }
evalOpt ("ppl", "1") p = return $ p { tokenRE = pplRE }
evalOpt ("pplass", "1") p = return $ p { tokenRE = pplassRE }
evalOpt ("plain", "1") p = return $ p { tokenRE = plainRE }
evalOpt ("scan", "1") p = return $ p { tokenRE = plainRE
, formatToken = uncurry formatTok
, formatDoc = formatHList }
evalOpt ("number", "1") p = return $ p { formatDoc = numberLines >>> formatDoc p }
evalOpt ("tabs", "1") p = return $ p { inFilter = inFilter p >>> substTabs }
evalOpt ("erefs", "1") p = return $ p { inFilter = resolveHtmlEntities >>> inFilter p }
evalOpt ("markup", "1") p = return $ p { markupRE = addMarkup }
evalOpt ("html", "1") p = return $ p { formatToken = formatHtmlTok
, formatDoc = formatHtmlDoc }
evalOpt ("full", "1") p = return $ p { outFilter = outFilter p >>> fullHtml (inputFile p) }
evalOpt (opt, _v ) p = exitErr ("illegal option " ++ show opt) >> return p
usage :: IO ()
usage = hPutStrLn stderr use
where
use = usageInfo header options
header = "colorizeSourceCode - colorize source code with HTML, version 0.1.1"
-- ------------------------------------------------------------
process :: Process -> String -> String
process p = inFilter p
>>> tokenizeSubexRE (markupRE p (tokenRE p))
>>> map (formatToken p)
>>> concat
>>> lines
>>> formatDoc p
>>> outFilter p
addMarkup :: Regex -> Regex
addMarkup = mkElse (parseRegexExt . mkLE $ markupT)
tokenizeLines :: String -> [(String, String)]
tokenizeLines = map (\ l -> ("",l ++ "\n")) . lines
numberLines :: [String] -> [String]
numberLines = zipWith addNum [(1::Int)..]
where
addNum i l = "<span class=\"linenr\">" ++ fmt 4 i ++ "</span>" ++ l
fmt l = sed (const " ") " "
. reverse
. take l
. reverse
. (replicate l ' ' ++)
. show
substTabs :: String -> String
substTabs = subs 0
subs :: Int -> String -> String
subs _ "" = ""
subs i (x:xs)
| x == '\t' = replicate (8 - (i `mod` 8)) ' ' ++ subs 0 xs
| x == '\n' = x : subs 0 xs
| otherwise = x : subs (i+1) xs
-- ------------------------------------------------------------
resolveHtmlEntities :: String -> String
resolveHtmlEntities = sed (replaceEntity . drop 1 . init) "&\\i\\c*;"
where
replaceEntity e = maybe ("&" ++ e ++ ";") ((:[]) . toEnum)
. lookup e $ xhtmlEntities
-- ------------------------------------------------------------
formatHList :: [String] -> String
formatHList = ("[" ++) . (++ "\n]") . intercalate "\n, "
formatTok :: String -> String -> String
formatTok kw tok = " (" ++ show kw ++ ",\t" ++ show tok ++ "\t)\n"
formatHtmlDoc :: [String] -> String
formatHtmlDoc = map (("<div class=\"codeline\">" ++) . (++ "</div>") . preserveEmptyLines)
>>> ("<div class=\"codeblock\">" :)
>>> (++ ["</div>"])
>>> unlines
where
preserveEmptyLines "" = " "
preserveEmptyLines l = l
formatHtmlTok :: (String, String) -> String
formatHtmlTok ("markup", t@(x:_))
| x `elem` "<&" = t
formatHtmlTok (m, t)
| otherwise = colorizeTokens m (escapeText >>> sed (const " ") " " $ t)
escapeText :: String -> String
escapeText = foldr cquote ""
where
cquote = fst escapeHtmlRefs
-- escapeText = concat . runLA (xshowEscapeXml mkText)
fullHtml :: String -> String -> String
fullHtml fn s = unlines
[ "<html>"
, "<head>"
, "<title>" ++ fn ++ "</title>"
, "<style>"
, css
, "</style>"
, "</head>"
, "<body>"
, s
, "</body>"
, "</html>"
]
css :: String
css = unlines
[ ".typename { color: #0000dd; }"
, ".varname { color: #000000; }"
, ".opname { color: #770000; }"
, ".operator { color: #770000; /* font-weight:bold; */ }"
, ".keyglyph { color: #3070A0; /* font-weight:bold; */ }"
, ".par { }"
, ""
, ".keyword { color: #3070A0; /* font-weight:bold; */ }"
, ".typekeyword { color: #3070A0; /* font-weight:bold; */ }"
, ".strconst { color: #228B22; }"
, ".charconst { color: #228B22; }"
, ".labelname { color: #FF00FF; font-weight:bold; }"
, ".cppcommand { color: #0000CD; }"
, ".specialword { color: #c80000; }"
, ".classname { color: #8B2323; }"
, ".comment { color: #00008B; }"
, ".bnfnt { color: #0000CD; }"
, ".bnfmeta { color: #ff0000; font-weight:bold; }"
, ".bnfterminal { color: #008800; font-weight:bold; }"
, ".tclproc { color: #FF6000; }"
, ".tclvar { color: #0000CD; }"
, ".tclcomment { color: #c80000; }"
, ""
, ".linenr { color: #909090; padding-right: 2em; }"
, "div.codeline { font-family: monospace; width: 100%; white-space: pre; border-width: 1px; border-style: solid; border-color: transparent; padding-left: 0.3em; }"
, "div.codeline:hover { background-color:#ddddff; color:#c80000; border-width: 1px; border-style: solid; border-color: #c80000; }"
]
-- ------------------------------------------------------------
colorizeTokens :: String -> String -> String
colorizeTokens tok
| tok `elem` [ "comment"
, "keyword"
, "keyglyph"
, "typekeyword"
, "varname", "typename", "labelname", "instancename", "globalname"
, "opname"
, "par"
, "operator"
, "strconst", "charconst"
, "bnfnt", "bnfmeta"
, "cppcommand"
, "specialword"
]
= wrap
| tok == "longcomment" = wrap' "comment" . mlc
| tok == "bnfterminal" = wrap . drop 1 . init
-- | tok == "markupstart" = (("<span class=\"" ) ++) . (++ ("\">")) . drop 4 . init
-- | tok == "markupend" = const "</span>"
| null tok = const ""
| otherwise = id
where
wrap = wrap' tok
wrap' tok' = (("<span class=\"" ++ tok' ++ "\">") ++) . (++ "</span>")
mlc = sed (("</span>" ++) . (++ "<span class=\"comment\">")) "(\\n\r?)"
-- ------------------------------------------------------------
buildRegex :: [(String, String)] -> Regex
buildRegex = foldr1 mkElse . map (uncurry mkBr') . map (second parseRegexExt)
where
mkBr' "" = id
mkBr' l = mkBr l
buildKeywords :: [String] -> String
buildKeywords = intercalate "|"
untilRE :: String -> String
untilRE re = "(\\A{" ++ "\\}\\A" ++ re ++ "\\A)" ++ re
mkLE :: (String, String) -> String
mkLE (l, re) = "({" ++ l ++ "}(" ++ re ++ "))"
ws1RE, ws1RE',ws0RE :: String
ws1RE = "\\s+"
ws1RE' = "[ \t]+"
ws0RE = "[ \t]*"
ws, ws', javacmt1, javacmt, shcmt1, strconst,
markupT,
charconst, number,
par, xxx :: (String, String)
-- markupS = ("markupstart", "<[a-zA-Z0-9]+>" )
-- markupE = ("markupend", "</[a-zA-Z0-9]+>" )
markupT = ("markup", ( "</?" ++ xname ++ "(" ++ xattr ++ ")*" ++ "\\s*>"
++ "|" ++
"&" ++ xname ++ ";"
)
)
where
xname = "[A-Za-z][-_:A-Za-z0-9]*"
xattr = ws1RE ++ xname ++ eq ++ "(" ++ dq ++ "|" ++ sq ++ ")"
eq = "\\s*=\\s*"
dq = "\"[^\"]*\""
sq = "\'[^\']*\'"
ws = ("ws", ws1RE )
ws' = ("ws", ws1RE' )
javacmt1 = ("comment", "//.*" )
javacmt = ("longcomment", "/\\*" ++ untilRE "\\*/" )
shcmt1 = ("comment", "#.*" )
strconst = ("strconst", "\"([^\"\\\\\n\r]|\\\\.)*\"" )
charconst = ("charconst", "\'([^\'\\\\\n\r]|\\\\.)*\'" )
number = ("number", "[0-9]+(\\.[0-9]*([eE][-+]?[0-9]+)?)?" )
par = ("par", "[\\(\\)\\[\\]\\{\\}]" )
xxx = ("xxx", "." )
-- ------------------------------------------------------------
plainRE :: Regex
plainRE = buildRegex
[ ("xxx", "[^<&\n]+" )
, ("xxx", "[<&\n]" )
]
-- ------------------------------------------------------------
haskellRE :: Regex
haskellRE = buildRegex
[ ws
, ("comment", "(-)- .*" )
, ("longcomment", "\\{" ++ untilRE "-\\}" )
, ("keyword", buildKeywords
[ "case", "class"
, "data", "default", "deriving", "do"
, "else"
, "forall"
, "if", "import", "in"
, "infix", "infixl", "infixr"
, "instance"
, "let"
, "module"
, "newtype"
, "of"
, "qualified"
, "then", "type"
, "where"
, "_"
, "as", "ccall", "foreign", "hiding", "proc", "safe", "unsafe"
]
)
, ("keyglyph", buildKeywords
["\\.\\.","::","=","\\\\","\\|","<-","->","-<","@","~","=>","!",",",";"]
)
, ("varname" , varname )
, ("typename", "[A-Z_][a-zA-Z0-9_]*[']*" )
, ("opname", "`" ++ varname ++ "`" )
, strconst
, charconst
, number
, par
, ("operator", "[-!#$%&\\*\\+./<=>\\?@\\\\^\\|~]+")
, xxx
]
where
varname = "[a-z_][a-zA-Z0-9_]*[']*"
-- ------------------------------------------------------------
javaRE :: Regex
javaRE = buildRegex
[ ws
, javacmt1
, javacmt
, ("keyword", buildKeywords
[ "abstract", "assert"
, "break"
, "case", "catch", "class", "continue"
, "default", "do"
, "else", "extends"
, "final", "finally", "for"
, "if", "implements", "import", "instanceof", "interface"
, "native", "new"
, "package", "private", "protected", "public"
, "return"
, "static", "super", "switch", "synchronized"
, "this", "throw", "throws", "transient", "try"
, "volatile"
, "while"
] )
, ("typekeyword",
buildKeywords
[ "boolean", "byte"
, "char"
, "double"
, "false", "float"
, "int"
, "long"
, "null"
, "short"
, "true"
, "void"
] )
, ("labelname", "(" ++ varname ++ "{\\}default):" )
, ("", ( mkLE ("keyword", "break|continue")
++ mkLE ws ++
mkLE ("labelname", varname)
)
)
, ("varname", varname )
, ("typename", "[A-Z][a-zA-Z0-9_]*" )
, strconst
, charconst
, number
, par
, ("delimiter", "[.,;]" )
, ("operator", "[-+!%&/=\\*\\?~|<>:]+" )
, xxx
]
where
varname = "[a-z][a-zA-Z0-9_]*"
-- ------------------------------------------------------------
bnfRE :: Regex
bnfRE = buildRegex
[ ws
, ("bnfnt" , "[A-Z][a-zA-Z0-9_]*" )
, ("bnfterminal", "\"([^\"\\\\\n\r]|\\\\.)*\"" )
, ("bnfmeta", buildKeywords
[ "\\["
, "\\]"
, "::="
, "\\|"
, "\\{"
, "\\}"
]
)
, xxx
]
-- ------------------------------------------------------------
cppRE :: Regex
cppRE = buildRegex
[ ws
, javacmt1
, javacmt
, ("keyword", buildKeywords
[ "asm" , "auto"
, "break"
, "case" , "catch" , "class" , "const" , "continue"
, "default" , "delete" , "do"
, "else" , "extern"
, "for" , "friend"
, "goto"
, "if" , "inline"
, "new"
, "operator" , "overload"
, "private" , "protected" , "public"
, "register" , "return"
, "sizeof" , "static" , "switch"
, "template" , "this" , "typedef" , "throw" , "try"
, "virtual" , "volatile"
, "while"
]
)
, ("typekeyword",
buildKeywords
[ "char"
, "double"
, "enum"
, "float"
, "int"
, "long"
, "short"
, "signed"
, "struct"
, "union"
, "unsigned"
, "void"
]
)
, ("cppcommand", ( "#" ++ ws0RE ++ "("
++
buildKeywords
[ "define"
, "else"
, "endif"
, "if"
, "ifdef"
, "ifndef"
, "(include[ \t].*)"
, "undef"
]
++ ")"
)
)
, ("specialword", buildKeywords
[ "assert"
, "exit"
, "free"
, "main"
, "malloc"
]
)
, ("varname", varname )
, ("typename", "[A-Z][a-zA-Z0-9_]*" )
, strconst
, charconst
, number
, par
, ("delimiter", "[.,;]" )
, ("operator", "[-+!%&/=\\*\\?~|<>:]+" )
, xxx
]
where
varname = "[a-z][a-zA-Z0-9_]*"
-- ------------------------------------------------------------
shRE :: Regex
shRE = buildRegex
[ ws
, shcmt1
, ("keyword", buildKeywords
[ "alias"
, "break" , "bg"
, "case" , "cd" , "continue"
, "declare" , "do" , "done"
, "echo" , "elif" , "else" , "env" , "esac" , "eval" , "exec" , "exit" , "export"
, "false" , "fg" , "fi" , "for" , "function"
, "if" , "in"
, "jobs"
, "kill"
, "local"
, "pwd"
, "return"
, "set" , "shift"
, "test" , "then" , "trap" , "true"
, "unalias" , "unset"
, "while" , "wait"
]
)
, ("varname", "[A-Za-z_][a-zA-Z0-9_]*" )
, ("operator", "[-+!%&=\\\\\\*\\?~|<>:@$]+" )
, ("operator", "[\\(\\)\\[\\]\\{\\}]+" )
, strconst
, charconst
, xxx
]
-- ------------------------------------------------------------
rubyRE :: Regex
rubyRE = buildRegex
[ ws
, rubycmt
, ("keyword", buildKeywords
[ "begin" , "break"
, "catch" , "case" , "class"
, "def" , "do"
, "else" , "elif" , "end" , "ensure"
, "false" , "for"
, "if" , "in" , "include" , "initialize"
, "loop"
, "module"
, "new" , "nil"
, "raise" , "require" , "rescue"
, "self"
, "then" , "true" , "type"
, "until"
, "when" , "while"
, "yield"
]
)
, ("typename", "[A-Z][A-Za-z0-9]*" )
, ("varname", "[A-Za-z_][a-zA-Z0-9_]*(!|\\?)?" )
, ("instancename", "(@{1,2}|$)[A-Za-z_][a-zA-Z0-9_]*" )
, ("strconst", "%[qQx]\\{.*\\}" )
, ("strconst", "#\\{.*\\}" )
, ("strconst", ":[a-z][A-Za-z0-9]*" )
, strconst
, charconst
, regex
, xxx
]
where
rubycmt = ("comment", "#(.{\\}\\{)*" )
regex = ("strconst", "/([^/\\\\\n\r]|\\\\.)*/" )
-- ------------------------------------------------------------
pplRE :: Regex
pplRE = buildRegex
[ ws
, ("comment", "(-)- .*" )
, ("keyword", buildKeywords
[ "and"
, "begin"
, "div" , "do"
, "else" , "elseif" , "endif" , "endwhile" , "end"
, "function"
, "if"
, "max" , "min" , "mod"
, "not"
, "of" , "or"
, "procedure"
, "repeat"
, "return"
, "then"
, "until"
, "var"
, "while"
, "xor"
]
)
, ("typekeyword", buildKeywords
[ "boolean"
, "false" , "float"
, "int"
, "list"
, "picture"
, "string"
, "true"
]
)
, ("varname", "[A-Za-z_][a-zA-Z0-9_]*" )
, strconst
, number
, xxx
]
-- ------------------------------------------------------------
pplassRE :: Regex
pplassRE = buildRegex
[ ws
, ("comment", "(-)- .*" )
, ("keyword", buildKeywords
[ "loadi" , "loadf"
, "loads" , "emptyl"
, "undef" , "load"
]
)
, ("typename", buildKeywords
[ "store"
, "pop"
]
)
, ("typekeyword", buildKeywords
[ "jmp"
, "brfalse"
, "brtrue"
, "pushj"
, "popj"
, "svc"
]
)
, ("labelname", "(l[0-9]+:?)|([se]?_[A-Za-z0-9]*:?)" )
, ("varname", "[A-Za-z_][a-zA-Z0-9_]*" )
, strconst
, xxx
]
-- ------------------------------------------------------------