diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..11b706d --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +The MIT License + +Copyright (c) 2005 Uwe Schmidt, Martin Schmidt, Torben Kuseler + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..cd7dc32 --- /dev/null +++ b/Setup.hs @@ -0,0 +1,3 @@ +#!/usr/bin/env runhaskell +import Distribution.Simple +main = defaultMain diff --git a/examples/RegexXMLSchema/Makefile b/examples/RegexXMLSchema/Makefile new file mode 100644 index 0000000..0204cbc --- /dev/null +++ b/examples/RegexXMLSchema/Makefile @@ -0,0 +1,128 @@ +# $Id: Makefile,v 1.9 2006/11/11 15:36:03 hxml Exp $ + +HXT_HOME = ../../.. +PKGFLAGS = +GHCFLAGS = -Wall -O2 +GHC = ghc $(GHCFLAGS) $(PKGFLAGS) + +DIST = $(HXT_HOME)/dist/examples/arrows +DIST_DIR = $(DIST)/RegexXMLSchema + +CNT = 3 + +ropts = +RTS -s -RTS + +prog = ./REtest +prog2 = ./Lines +prog3 = ./RElines +prog4 = ./Words +prog5 = ./REwords +prog0 = ./Copy + +progs = $(prog) $(prog0) $(prog2) $(prog3) $(prog4) $(prog5) + +all : $(progs) + +$(prog) : $(prog).hs + $(GHC) --make -o $@ $< + +local : + $(GHC) --make -o $(prog) -fglasgow-exts -ignore-package hxt -i../../../src $(prog).hs + +$(prog2) : $(prog) + ln -f $(prog) $(prog2) + +$(prog3) : $(prog) + ln -f $(prog) $(prog3) + +$(prog4) : $(prog) + ln -f $(prog) $(prog4) + +$(prog5) : $(prog) + ln -f $(prog) $(prog5) + +$(prog0) : $(prog) + ln -f $(prog) $(prog0) + +# generate and read documents containing a binary tree +# with 2^i leaf nodes containing the numbers 1 to 2^i +# for i up to at least 22 (8M XML elements) output works fine +# for i up to 19 (1M XML elements) input works without swapping +# with i=20 swapping starts, but the program it still terminates +# the size of the XML file for i=20 is about 36Mb +# these tests have run on a box with 1Gb memory + +tests = 18 + +test : $(prog) + $(MAKE) genfiles tests="$(tests)" + $(MAKE) copy tests="$(tests)" + $(MAKE) lines tests="$(tests)" + $(MAKE) relines tests="$(tests)" + $(MAKE) words tests="$(tests)" + $(MAKE) rewords tests="$(tests)" + +perftest : $(prog) + $(MAKE) test tests="2 3 10 11 12 13 14 15 16 17 18 19 20" + +genfiles : + @for i in $(tests) ; \ + do \ + echo time $(prog) $(ropts) $$i ; \ + time $(prog) $(ropts) $$i ; \ + ls -l tree-*$$i.xml ; \ + echo ; \ + done + +copy : + @for i in $(tests) ; \ + do \ + echo time $(prog0) $(ropts) $$i ; \ + time $(prog0) $(ropts) $$i ; \ + ls -l tree-*$$i.xml.copy ; \ + echo ; \ + done + +lines : + @for i in $(tests) ; \ + do \ + echo time $(prog2) $(ropts) $$i ; \ + time $(prog2) $(ropts) $$i ; \ + ls -l tree-*$$i.xml.lines ; \ + echo ; \ + done + +relines : + @for i in $(tests) ; \ + do \ + echo time $(prog3) $(ropts) $$i ; \ + time $(prog3) $(ropts) $$i ; \ + ls -l tree-*$$i.xml.relines ; \ + echo ; \ + done + +words : + @for i in $(tests) ; \ + do \ + echo time $(prog4) $(ropts) $$i ; \ + time $(prog4) $(ropts) $$i ; \ + ls -l tree-*$$i.xml.words ; \ + echo ; \ + done + +rewords : + @for i in $(tests) ; \ + do \ + echo time $(prog5) $$i ; \ + time $(prog5) $(ropts) $$i ; \ + ls -l tree-*$$i.xml.rewords ; \ + echo ; \ + done + +dist : + [ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) + cp Makefile REtest.hs $(DIST_DIR) + +clean : + rm -f $(progs) *.o *.hi *.xml *.xml.* + diff --git a/examples/RegexXMLSchema/REtest.hs b/examples/RegexXMLSchema/REtest.hs new file mode 100644 index 0000000..4c054a8 --- /dev/null +++ b/examples/RegexXMLSchema/REtest.hs @@ -0,0 +1,170 @@ +{-# LANGUAGE BangPatterns#-} + +-- ---------------------------------------- + +module Main(main) +where + +import Text.XML.HXT.Core +import Text.Regex.XMLSchema.Generic + +import Data.String.Unicode + ( unicodeToXmlEntity + ) + +import Control.Monad.State.Strict hiding (when) + +import Data.Maybe + +import System.IO -- import the IO and commandline option stuff +import System.Environment + +-- ---------------------------------------- + +main :: IO () +main + = do + p <- getProgName + al <- getArgs + let i = if null al + then 4 + else (read . head $ al)::Int + main' p i + where + main' p' = fromMaybe main1 . lookup (pn p') $ mpt + mpt = [ ("REtest", main1) + , ("Copy", main2 "copy" (:[])) + , ("Lines", main2 "lines" lines) + , ("RElines", main2 "relines" relines) + , ("Words", main2 "words" words) + , ("REwords", main2 "rewords" rewords) + ] + +-- ---------------------------------------- + +-- generate a document containing a binary tree of 2^i leafs (= 2^(i-1) XML elements) + +main1 :: Int -> IO () +main1 i + = runX (genDoc i (fn i)) + >> return () + +-- ---------------------------------------- + +-- read a document containing a binary tree of 2^i leafs + +main2 :: String -> (String -> [String]) -> Int -> IO () +main2 ext lines' i + = do + hPutStrLn stderr "start processing" + h <- openBinaryFile (fn i) ReadMode + c <- hGetContents h + let ls = lines' c + o <- openBinaryFile (fn i ++ "." ++ ext) WriteMode + mapM_ (hPutStrLn o) ls + hClose o + hClose h + hPutStrLn stderr "end processing" + +relines :: String -> [String] +relines = tokenize "[^\n\r]*" + +rewords :: String -> [String] +rewords = tokenize "[^ \t\n\r]+" + +-- ---------------------------------------- + +pn :: String -> String +pn = reverse . takeWhile (/= '/') . reverse + +fn :: Int -> String +fn = ("tree-" ++) . (++ ".xml") . reverse . take 4 . reverse . ((replicate 4 '0') ++ ) . show + +-- ---------------------------------------- + +genDoc :: Int -> String -> IOSArrow b XmlTree +genDoc d out = constA (mkBTree d) + >>> + xpickleVal xpickle + >>> + indentDoc + >>> + putDoc out + +-- ---------------------------------------- + +type Counter a = State Int a + +incr :: Counter Int +incr = do + modify (+1) + get + +-- ---------------------------------------- + +data BTree = Leaf Int + | Fork BTree BTree + deriving (Show) + +instance XmlPickler BTree where + xpickle = xpAlt tag ps + where + tag (Leaf _ ) = 0 + tag (Fork _ _ ) = 1 + ps = [ xpWrap ( Leaf, \ (Leaf i) -> i) + ( xpElem "leaf" $ xpAttr "value" $ xpickle ) + + , xpWrap ( uncurry Fork, \ (Fork l r) -> (l, r)) + ( xpElem "fork" $ xpPair xpickle xpickle ) + ] + +-- ---------------------------------------- + +mkBTree :: Int -> BTree +mkBTree depth = evalState (mkT depth) 0 + +mkT :: Int -> Counter BTree +mkT 0 = do + i <- incr + return (Leaf i) +mkT n = do + l <- mkT (n-1) + r <- mkT (n-1) + return (Fork l r) + +-- ---------------------------------------- + +-- output is done with low level ops to write the +-- document i a lazy manner +-- adding an xml pi and encoding is done "by hand" +-- latin1 decoding is the identity, so please generate the +-- docs with latin1 encoding. Here ist done even with ASCCI +-- every none ASCII char is represented by a char ref (&nnn;) + +putDoc :: String -> IOStateArrow s XmlTree XmlTree +putDoc dst + = addXmlPi + >>> + addXmlPiEncoding isoLatin1 + >>> + xshow getChildren + >>> + arr unicodeToXmlEntity + >>> + arrIO (\ s -> hPutDocument (\h -> hPutStrLn h s)) + >>> + none + where + isStdout = null dst || dst == "-" + + hPutDocument :: (Handle -> IO()) -> IO() + hPutDocument action + | isStdout + = action stdout + | otherwise + = do + handle <- openBinaryFile dst WriteMode + action handle + hClose handle + +-- ---------------------------------------- diff --git a/examples/colorizeProgs/ColorizeSourceCode.hs b/examples/colorizeProgs/ColorizeSourceCode.hs new file mode 100644 index 0000000..371a425 --- /dev/null +++ b/examples/colorizeProgs/ColorizeSourceCode.hs @@ -0,0 +1,711 @@ +-- ------------------------------------------------------------ + +{- | + Module : ColorizeSourceCode + Copyright : Copyright (C) 2009 Uwe Schmidt + License : BSD3 + + Maintainer : Uwe Schmidt (uwe@fh-wedel.de) + Portability: portable + + Colorize Source Code + + Supports Java and Haskell + +-} + +-- ------------------------------------------------------------ + +module Main +where + +import Control.Arrow + +import Data.List + +import System.Environment +import System.IO -- import the IO and commandline option stuff +import System.Console.GetOpt +import System.Exit + +import Text.Regex.XMLSchema.Generic +import Text.XML.HXT.Core +import Text.XML.HXT.Parser.XhtmlEntities + + +-- ------------------------------------------------------------ + +data Process = P { inFilter :: String -> String + , tokenRE :: Regex + , markupRE :: Regex -> Regex + , formatToken :: (String, String) -> String + , formatDoc :: [String] -> String + , outFilter :: String -> String + , input :: Handle + , output :: Handle + , inputFile :: String + } + +defaultProcess :: Process +defaultProcess = P { inFilter = id + , tokenRE = plainRE + , markupRE = id + , formatToken = uncurry (++) + , formatDoc = unlines + , outFilter = id + , input = stdin + , output = stdout + , inputFile = " " + } + +-- ------------------------------------------------------------ + +main :: IO () +main = do + argv <- getArgs + p <- evalArgs (getOpt Permute options argv) + s <- hGetContents (input p) + hPutStr (output p) (process p s) + hFlush (output p) + hClose (output p) + exitWith ExitSuccess + +options :: [OptDescr (String, String)] +options = [ Option "h?" ["help"] (NoArg ("help", "1")) "this message" + , Option "" ["plain"] (NoArg ("plain", "1")) "don't colorize lines" + , Option "" ["haskell"] (NoArg ("haskell", "1")) "colorize haskell" + , Option "" ["java"] (NoArg ("java", "1")) "colorize java" + , Option "" ["cpp"] (NoArg ("cpp", "1")) "colorize C or C++" + , Option "" ["sh"] (NoArg ("sh", "1")) "colorize sh or bash" + , Option "" ["ruby"] (NoArg ("ruby", "1")) "colorize ruby" + , Option "" ["bnf"] (NoArg ("bnf", "1")) "colorize extended BNF grammar rules" + , Option "" ["ppl"] (NoArg ("ppl", "1")) "colorize ppl" + , Option "" ["pplass"] (NoArg ("pplass", "1")) "colorize ppl assembler" + , Option "n" ["number"] (NoArg ("number", "1")) "with line numbers" + , Option "t" ["tabs"] (NoArg ("tabs", "1")) "substitute tabs by blanks" + , Option "m" ["markup"] (NoArg ("markup", "1")) "text contains embedded markup" + , Option "e" ["erefs"] (NoArg ("erefs", "1")) "resolve HTML entity refs before processing" + , Option "o" ["output"] (ReqArg ((,) "output") "FILE") "output file, \"-\" stands for stdout" + , Option "s" ["scan"] (NoArg ("scan", "1")) "just scan input, for testing" + , Option "x" ["html"] (NoArg ("html", "1")) "html output" + , Option "f" ["full"] (NoArg ("full", "1")) "full HTML document with header and css" + ] + +exitErr :: String -> IO a +exitErr msg = do + hPutStrLn stderr msg + usage + exitWith (ExitFailure 1) + +evalArgs :: ([(String, String)], [FilePath], [String]) -> IO Process +evalArgs (opts, files, errs) + | not (null errs) = exitErr ("illegal arguments " ++ show errs) + | null files = evalOpts opts defaultProcess + | not (null fns) = exitErr ("only one input file allowed") + | otherwise = do + inp <- openFile fn ReadMode + evalOpts opts (defaultProcess { input = inp + , inputFile = fn + } + ) + where + (fn:fns) = files + +evalOpts :: [(String, String)] -> Process -> IO Process +evalOpts [] res = return res +evalOpts (o:os) res = do + res' <- evalOpt o res + evalOpts os res' + +evalOpt :: (String, String) -> Process -> IO Process +evalOpt ("help","1") _ = do + usage + exitWith ExitSuccess + +evalOpt ("output", "-") p = return $ p {output = stdout} + +evalOpt ("output", fn) p = do + outp <- openFile fn WriteMode + return $ p {output = outp} + +evalOpt ("haskell","1") p = return $ p { tokenRE = haskellRE } +evalOpt ("java", "1") p = return $ p { tokenRE = javaRE } +evalOpt ("cpp", "1") p = return $ p { tokenRE = cppRE } +evalOpt ("sh", "1") p = return $ p { tokenRE = shRE } +evalOpt ("ruby", "1") p = return $ p { tokenRE = rubyRE } +evalOpt ("bnf", "1") p = return $ p { tokenRE = bnfRE } +evalOpt ("ppl", "1") p = return $ p { tokenRE = pplRE } +evalOpt ("pplass", "1") p = return $ p { tokenRE = pplassRE } +evalOpt ("plain", "1") p = return $ p { tokenRE = plainRE } +evalOpt ("scan", "1") p = return $ p { tokenRE = plainRE + , formatToken = uncurry formatTok + , formatDoc = formatHList } +evalOpt ("number", "1") p = return $ p { formatDoc = numberLines >>> formatDoc p } +evalOpt ("tabs", "1") p = return $ p { inFilter = inFilter p >>> substTabs } +evalOpt ("erefs", "1") p = return $ p { inFilter = resolveHtmlEntities >>> inFilter p } +evalOpt ("markup", "1") p = return $ p { markupRE = addMarkup } +evalOpt ("html", "1") p = return $ p { formatToken = formatHtmlTok + , formatDoc = formatHtmlDoc } +evalOpt ("full", "1") p = return $ p { outFilter = outFilter p >>> fullHtml (inputFile p) } +evalOpt (opt, _v ) p = exitErr ("illegal option " ++ show opt) >> return p + +usage :: IO () +usage = hPutStrLn stderr use + where + use = usageInfo header options + header = "colorizeSourceCode - colorize source code with HTML, version 0.1.1" + +-- ------------------------------------------------------------ + +process :: Process -> String -> String +process p = inFilter p + >>> tokenizeSubexRE (markupRE p (tokenRE p)) + >>> map (formatToken p) + >>> concat + >>> lines + >>> formatDoc p + >>> outFilter p + +addMarkup :: Regex -> Regex +addMarkup = mkElse (parseRegexExt . mkLE $ markupT) + +tokenizeLines :: String -> [(String, String)] +tokenizeLines = map (\ l -> ("",l ++ "\n")) . lines + +numberLines :: [String] -> [String] +numberLines = zipWith addNum [(1::Int)..] + where + addNum i l = "" ++ fmt 4 i ++ "" ++ l + fmt l = sed (const " ") " " + . reverse + . take l + . reverse + . (replicate l ' ' ++) + . show + +substTabs :: String -> String +substTabs = subs 0 + +subs :: Int -> String -> String +subs _ "" = "" +subs i (x:xs) + | x == '\t' = replicate (8 - (i `mod` 8)) ' ' ++ subs 0 xs + | x == '\n' = x : subs 0 xs + | otherwise = x : subs (i+1) xs + +-- ------------------------------------------------------------ + +resolveHtmlEntities :: String -> String +resolveHtmlEntities = sed (replaceEntity . drop 1 . init) "&\\i\\c*;" + where + replaceEntity e = maybe ("&" ++ e ++ ";") ((:[]) . toEnum) + . lookup e $ xhtmlEntities + +-- ------------------------------------------------------------ + +formatHList :: [String] -> String +formatHList = ("[" ++) . (++ "\n]") . intercalate "\n, " + +formatTok :: String -> String -> String +formatTok kw tok = " (" ++ show kw ++ ",\t" ++ show tok ++ "\t)\n" + +formatHtmlDoc :: [String] -> String +formatHtmlDoc = map (("