|
Packit |
247f4e |
{-# LANGUAGE CPP #-}
|
|
Packit |
247f4e |
{-# OPTIONS_GHC -fno-warn-orphans #-} -- test file, so OK
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
module TagSoup.Benchmark where
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
import Text.HTML.TagSoup
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
import Control.DeepSeq
|
|
Packit |
247f4e |
import Control.Monad
|
|
Packit |
247f4e |
import Data.List
|
|
Packit |
247f4e |
import Data.Maybe
|
|
Packit |
247f4e |
import System.IO.Unsafe(unsafeInterleaveIO)
|
|
Packit |
247f4e |
import qualified Data.ByteString.Char8 as BS
|
|
Packit |
247f4e |
import qualified Data.ByteString.Lazy.Char8 as LBS
|
|
Packit |
247f4e |
import Data.Time.Clock.POSIX(getPOSIXTime)
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
conf = 0.95
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
timefile :: FilePath -> IO ()
|
|
Packit |
247f4e |
timefile file = do
|
|
Packit |
247f4e |
-- use LBS to be most representative of real life
|
|
Packit |
247f4e |
lbs <- LBS.readFile file
|
|
Packit |
247f4e |
let str = LBS.unpack lbs
|
|
Packit |
247f4e |
bs = BS.concat $ LBS.toChunks lbs
|
|
Packit |
247f4e |
() <- LBS.length lbs `seq` length str `seq` BS.length bs `seq` return ()
|
|
Packit |
247f4e |
benchWith (const str, const bs, const lbs) $ benchStatic (toInteger $ LBS.length lbs)
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
sample :: String
|
|
Packit |
247f4e |
sample = "<this is a test with='attributes' and other=\"things"tested\" /><neil> is </here>" ++
|
|
Packit |
247f4e |
" and some just random & test ><<foo></bar><bar><bob href=no>"
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
nsample = genericLength sample :: Integer
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
time :: IO ()
|
|
Packit |
247f4e |
time = benchWith (str,bs,lbs) benchVariable
|
|
Packit |
247f4e |
where
|
|
Packit |
247f4e |
str = \i -> concat $ genericReplicate i sample
|
|
Packit |
247f4e |
bs = let s = BS.pack sample in \i -> BS.concat (genericReplicate i s)
|
|
Packit |
247f4e |
lbs = let s = LBS.pack sample in \i -> LBS.concat (genericReplicate i s)
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
benchWith :: (Integer -> String, Integer -> BS.ByteString, Integer -> LBS.ByteString)
|
|
Packit |
247f4e |
-> ((Integer -> ()) -> IO [String]) -> IO ()
|
|
Packit |
247f4e |
benchWith (str,bs,lbs) bench = do
|
|
Packit |
247f4e |
putStrLn "Timing parseTags in characters/second"
|
|
Packit |
247f4e |
let header = map (:[]) ["(" ++ show (round $ conf * 100) ++ "% confidence)","String","BS","LBS"]
|
|
Packit |
247f4e |
rows <- mapM row $ replicateM 3 [False,True]
|
|
Packit |
247f4e |
mapM_ (putStrLn . strict . grid) $ delay2 $ header : rows
|
|
Packit |
247f4e |
where
|
|
Packit |
247f4e |
row [a,b,c] = do
|
|
Packit |
247f4e |
let header = intercalate "," [g a "pos", g b "warn", g c "merge"]
|
|
Packit |
247f4e |
g b x = (if b then ' ' else '!') : x
|
|
Packit |
247f4e |
f x = bench $ \i -> rnf $ parseTagsOptions parseOptions{optTagPosition=a,optTagWarning=b,optTagTextMerge=c} $ x i
|
|
Packit |
247f4e |
c1 <- f str
|
|
Packit |
247f4e |
c2 <- f bs
|
|
Packit |
247f4e |
c3 <- f lbs
|
|
Packit |
247f4e |
return [[header],c1,c2,c3]
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
strict = reverse . reverse
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
---------------------------------------------------------------------
|
|
Packit |
247f4e |
-- BENCHMARK ON THE SAMPLE INPUT
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
disp xs = showUnit (floor xbar) ++ " (~" ++ rng ++ "%)"
|
|
Packit |
247f4e |
where xbar = mean xs
|
|
Packit |
247f4e |
rng = if length xs <= 1 then "?" else show (ceiling $ (range conf xs) * 100 / xbar)
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
cons x = fmap (x:)
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
aimTime = 0.3 :: Double -- seconds to aim for
|
|
Packit |
247f4e |
minTime = 0.2 :: Double -- below this a test is considered invalid
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
-- given a number of times to repeat sample, return a list of what
|
|
Packit |
247f4e |
-- to display
|
|
Packit |
247f4e |
benchVariable :: (Integer -> ()) -> IO [String]
|
|
Packit |
247f4e |
benchVariable op = cons "?" $ f 10 []
|
|
Packit |
247f4e |
where
|
|
Packit |
247f4e |
f i seen | length seen > 9 = cons (" " ++ disp seen) $ return []
|
|
Packit |
247f4e |
| otherwise = unsafeInterleaveIO $ do
|
|
Packit |
247f4e |
now <- timer $ op i
|
|
Packit |
247f4e |
let cps = if now == 0 then 0 else fromInteger (i * nsample) / now
|
|
Packit |
247f4e |
if now < minTime || (null seen && now < aimTime) then do
|
|
Packit |
247f4e |
let factor = min 7 $ max 2 $ floor $ aimTime / now
|
|
Packit |
247f4e |
cons ("? " ++ disp [cps]) $ f (i * factor) []
|
|
Packit |
247f4e |
else
|
|
Packit |
247f4e |
cons (show (9 - length seen) ++ " " ++ disp (cps:seen)) $ f i (cps:seen)
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
benchStatic :: Integer -> (Integer -> ()) -> IO [String]
|
|
Packit |
247f4e |
benchStatic nsample op = cons "?" $ f []
|
|
Packit |
247f4e |
where
|
|
Packit |
247f4e |
f seen | length seen > 9 = cons (" " ++ disp seen) $ return []
|
|
Packit |
247f4e |
| otherwise = unsafeInterleaveIO $ do
|
|
Packit |
247f4e |
now <- timer $ op $ genericLength seen
|
|
Packit |
247f4e |
let cps = if now == 0 then 0 else fromInteger nsample / now
|
|
Packit |
247f4e |
cons (show (9 - length seen) ++ " " ++ disp (cps:seen)) $ f (cps:seen)
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
---------------------------------------------------------------------
|
|
Packit |
247f4e |
-- UTILITY FUNCTIONS
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
-- | Given a number, show it using a unit and decimal place
|
|
Packit |
247f4e |
showUnit :: Integer -> String
|
|
Packit |
247f4e |
showUnit x = num ++ unit
|
|
Packit |
247f4e |
where
|
|
Packit |
247f4e |
units = " KMGTPEZY"
|
|
Packit |
247f4e |
(use,skip) = splitAt 3 $ show x
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
unit = [units !! ((length skip + 2) `div` 3)]
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
dot = ((length skip - 1) `mod` 3) + 1
|
|
Packit |
247f4e |
num = a ++ ['.' | b /= ""] ++ b
|
|
Packit |
247f4e |
where (a,b) = splitAt dot use
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
-- copied from the criterion package
|
|
Packit |
247f4e |
getTime :: IO Double
|
|
Packit |
247f4e |
getTime = (fromRational . toRational) `fmap` getPOSIXTime
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
timer :: () -> IO Double
|
|
Packit |
247f4e |
timer x = do
|
|
Packit |
247f4e |
start <- getTime
|
|
Packit |
247f4e |
() <- return x
|
|
Packit |
247f4e |
end <- getTime
|
|
Packit |
247f4e |
return $ end - start
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
-- display a grid
|
|
Packit |
247f4e |
grid :: [[String]] -> String
|
|
Packit |
247f4e |
grid xs = unlines $ map (concat . zipWith f cols) xs
|
|
Packit |
247f4e |
where cols = map (maximum . map length) $ transpose xs
|
|
Packit |
247f4e |
f n x = x ++ replicate (n+1 - length x) ' '
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
-- display a series of grids over time
|
|
Packit |
247f4e |
-- when a grid gets to [] keep its value at that
|
|
Packit |
247f4e |
-- when all grids get to [] return []
|
|
Packit |
247f4e |
delay2 :: [[[String]]] -> [[[String]]]
|
|
Packit |
247f4e |
delay2 xs = map (map head) xs : (if all (null . tail) (concat xs) then [] else delay2 $ map (map tl) xs)
|
|
Packit |
247f4e |
where tl (x:xs) = if null xs then x:xs else xs
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
---------------------------------------------------------------------
|
|
Packit |
247f4e |
-- INSTANCES
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
instance NFData a => NFData (Tag a) where
|
|
Packit |
247f4e |
rnf (TagOpen x y) = rnf x `seq` rnf y
|
|
Packit |
247f4e |
rnf (TagClose x) = rnf x
|
|
Packit |
247f4e |
rnf (TagText x) = rnf x
|
|
Packit |
247f4e |
rnf (TagComment x) = rnf x
|
|
Packit |
247f4e |
rnf (TagWarning x) = rnf x
|
|
Packit |
247f4e |
rnf (TagPosition x y) = () -- both are already ! bound
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
#ifndef BYTESTRING_HAS_NFDATA
|
|
Packit |
247f4e |
# ifdef MIN_VERSION_bytestring
|
|
Packit |
247f4e |
# define BYTESTRING_HAS_NFDATA (MIN_VERSION_bytestring(0,10,0))
|
|
Packit |
247f4e |
# else
|
|
Packit |
247f4e |
# define BYTESTRING_HAS_NFDATA (__GLASGOW_HASKELL__ >= 706)
|
|
Packit |
247f4e |
# endif
|
|
Packit |
247f4e |
#endif
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
#if !BYTESTRING_HAS_NFDATA
|
|
Packit |
247f4e |
instance NFData LBS.ByteString where
|
|
Packit |
247f4e |
rnf x = LBS.length x `seq` ()
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
instance NFData BS.ByteString where
|
|
Packit |
247f4e |
rnf x = BS.length x `seq` ()
|
|
Packit |
247f4e |
#endif
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
---------------------------------------------------------------------
|
|
Packit |
247f4e |
-- STATISTICS
|
|
Packit |
247f4e |
-- Provided by Emily Mitchell
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
confNs = let (*) = (,) in
|
|
Packit |
247f4e |
[0.95 * 1.96
|
|
Packit |
247f4e |
,0.90 * 1.644]
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
size :: [Double] -> Double
|
|
Packit |
247f4e |
size = genericLength
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
mean :: [Double] -> Double
|
|
Packit |
247f4e |
mean xs = sum xs / size xs
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
stddev :: [Double] -> Double
|
|
Packit |
247f4e |
stddev xs = sqrt $ sum [sqr (x - xbar) | x <- xs] / size xs
|
|
Packit |
247f4e |
where xbar = mean xs
|
|
Packit |
247f4e |
sqr x = x * x
|
|
Packit |
247f4e |
|
|
Packit |
247f4e |
-- given a sample, and a required confidence
|
|
Packit |
247f4e |
-- of the mean (i.e. 2.5% = 0.025)
|
|
Packit |
247f4e |
range ::Double -> [Double] -> Double
|
|
Packit |
247f4e |
range conf xs = conf2 * stddev xs / sqrt (size xs)
|
|
Packit |
247f4e |
where conf2 = fromMaybe (error $ "Unknown confidence interval: " ++ show conf) $ lookup conf confNs
|