Blame test/TagSoup/Benchmark.hs

Packit 247f4e
{-# LANGUAGE CPP #-}
Packit 247f4e
{-# OPTIONS_GHC -fno-warn-orphans #-} -- test file, so OK
Packit 247f4e
Packit 247f4e
module TagSoup.Benchmark where
Packit 247f4e
Packit 247f4e
import Text.HTML.TagSoup
Packit 247f4e
Packit 247f4e
import Control.DeepSeq
Packit 247f4e
import Control.Monad
Packit 247f4e
import Data.List
Packit 247f4e
import Data.Maybe
Packit 247f4e
import System.IO.Unsafe(unsafeInterleaveIO)
Packit 247f4e
import qualified Data.ByteString.Char8 as BS
Packit 247f4e
import qualified Data.ByteString.Lazy.Char8 as LBS
Packit 247f4e
import Data.Time.Clock.POSIX(getPOSIXTime)
Packit 247f4e
Packit 247f4e
conf = 0.95
Packit 247f4e
Packit 247f4e
Packit 247f4e
timefile :: FilePath -> IO ()
Packit 247f4e
timefile file = do
Packit 247f4e
    -- use LBS to be most representative of real life
Packit 247f4e
    lbs <- LBS.readFile file
Packit 247f4e
    let str = LBS.unpack lbs
Packit 247f4e
        bs = BS.concat $ LBS.toChunks lbs
Packit 247f4e
    () <- LBS.length lbs `seq` length str `seq` BS.length bs `seq` return ()
Packit 247f4e
    benchWith (const str, const bs, const lbs) $ benchStatic (toInteger $ LBS.length lbs)
Packit 247f4e
Packit 247f4e
Packit 247f4e
sample :: String
Packit 247f4e
sample = "<this is a test with='attributes' and other=\"things"tested\" /><neil> is </here>" ++
Packit 247f4e
         " and some just random & test ><<foo></bar><bar><bob href=no>"
Packit 247f4e
Packit 247f4e
nsample = genericLength sample :: Integer
Packit 247f4e
Packit 247f4e
time :: IO ()
Packit 247f4e
time = benchWith (str,bs,lbs) benchVariable
Packit 247f4e
    where
Packit 247f4e
        str = \i -> concat $ genericReplicate i sample
Packit 247f4e
        bs  = let s = BS.pack sample in \i -> BS.concat (genericReplicate i s)
Packit 247f4e
        lbs = let s = LBS.pack sample in \i -> LBS.concat (genericReplicate i s)
Packit 247f4e
Packit 247f4e
Packit 247f4e
Packit 247f4e
benchWith :: (Integer -> String, Integer -> BS.ByteString, Integer -> LBS.ByteString)
Packit 247f4e
          -> ((Integer -> ()) -> IO [String]) -> IO ()
Packit 247f4e
benchWith (str,bs,lbs) bench = do
Packit 247f4e
        putStrLn "Timing parseTags in characters/second"
Packit 247f4e
        let header = map (:[]) ["(" ++ show (round $ conf * 100) ++ "% confidence)","String","BS","LBS"]
Packit 247f4e
        rows <- mapM row $ replicateM 3 [False,True]
Packit 247f4e
        mapM_ (putStrLn . strict . grid) $ delay2 $ header : rows
Packit 247f4e
    where
Packit 247f4e
        row [a,b,c] = do
Packit 247f4e
            let header = intercalate "," [g a "pos", g b "warn", g c "merge"]
Packit 247f4e
                g b x = (if b then ' ' else '!') : x
Packit 247f4e
                f x = bench $ \i -> rnf $ parseTagsOptions parseOptions{optTagPosition=a,optTagWarning=b,optTagTextMerge=c} $ x i
Packit 247f4e
            c1 <- f str
Packit 247f4e
            c2 <- f bs
Packit 247f4e
            c3 <- f lbs
Packit 247f4e
            return [[header],c1,c2,c3]
Packit 247f4e
Packit 247f4e
        strict = reverse . reverse
Packit 247f4e
Packit 247f4e
Packit 247f4e
---------------------------------------------------------------------
Packit 247f4e
-- BENCHMARK ON THE SAMPLE INPUT
Packit 247f4e
Packit 247f4e
disp xs = showUnit (floor xbar) ++ " (~" ++ rng ++ "%)"
Packit 247f4e
    where xbar = mean xs
Packit 247f4e
          rng = if length xs <= 1 then "?" else show (ceiling $ (range conf xs) * 100 / xbar) 
Packit 247f4e
Packit 247f4e
cons x = fmap (x:)
Packit 247f4e
Packit 247f4e
Packit 247f4e
aimTime = 0.3 :: Double -- seconds to aim for
Packit 247f4e
minTime = 0.2 :: Double -- below this a test is considered invalid
Packit 247f4e
Packit 247f4e
Packit 247f4e
-- given a number of times to repeat sample, return a list of what
Packit 247f4e
-- to display
Packit 247f4e
benchVariable :: (Integer -> ()) -> IO [String]
Packit 247f4e
benchVariable op = cons "?" $ f 10 []
Packit 247f4e
    where
Packit 247f4e
        f i seen | length seen > 9 = cons ("  " ++ disp seen) $ return []
Packit 247f4e
                 | otherwise = unsafeInterleaveIO $ do
Packit 247f4e
            now <- timer $ op i
Packit 247f4e
            let cps = if now == 0 then 0 else fromInteger (i * nsample) / now
Packit 247f4e
            if now < minTime || (null seen && now < aimTime) then do
Packit 247f4e
                let factor = min 7 $ max 2 $ floor $ aimTime / now
Packit 247f4e
                cons ("? " ++ disp [cps]) $ f (i * factor) []
Packit 247f4e
             else
Packit 247f4e
                cons (show (9 - length seen) ++ " " ++ disp (cps:seen)) $ f i (cps:seen)
Packit 247f4e
Packit 247f4e
Packit 247f4e
Packit 247f4e
benchStatic :: Integer -> (Integer -> ()) -> IO [String]
Packit 247f4e
benchStatic nsample op = cons "?" $ f []
Packit 247f4e
    where
Packit 247f4e
        f seen | length seen > 9 = cons ("  " ++ disp seen) $ return []
Packit 247f4e
               | otherwise = unsafeInterleaveIO $ do
Packit 247f4e
            now <- timer $ op $ genericLength seen
Packit 247f4e
            let cps = if now == 0 then 0 else fromInteger nsample / now
Packit 247f4e
            cons (show (9 - length seen) ++ " " ++ disp (cps:seen)) $ f (cps:seen)
Packit 247f4e
Packit 247f4e
Packit 247f4e
---------------------------------------------------------------------
Packit 247f4e
-- UTILITY FUNCTIONS
Packit 247f4e
Packit 247f4e
-- | Given a number, show it using a unit and decimal place
Packit 247f4e
showUnit :: Integer -> String
Packit 247f4e
showUnit x = num ++ unit
Packit 247f4e
    where
Packit 247f4e
        units = " KMGTPEZY"
Packit 247f4e
        (use,skip) = splitAt 3 $ show x
Packit 247f4e
Packit 247f4e
        unit = [units !! ((length skip + 2) `div` 3)]
Packit 247f4e
Packit 247f4e
        dot = ((length skip - 1) `mod` 3) + 1
Packit 247f4e
        num = a ++ ['.' | b /= ""] ++ b
Packit 247f4e
            where (a,b) = splitAt dot use
Packit 247f4e
Packit 247f4e
Packit 247f4e
-- copied from the criterion package
Packit 247f4e
getTime :: IO Double
Packit 247f4e
getTime = (fromRational . toRational) `fmap` getPOSIXTime
Packit 247f4e
Packit 247f4e
timer :: () -> IO Double
Packit 247f4e
timer x = do
Packit 247f4e
    start <- getTime
Packit 247f4e
    () <- return x
Packit 247f4e
    end <- getTime
Packit 247f4e
    return $ end - start
Packit 247f4e
Packit 247f4e
Packit 247f4e
-- display a grid
Packit 247f4e
grid :: [[String]] -> String
Packit 247f4e
grid xs = unlines $ map (concat . zipWith f cols) xs
Packit 247f4e
    where cols = map (maximum . map length) $ transpose xs
Packit 247f4e
          f n x = x ++ replicate (n+1 - length x) ' '
Packit 247f4e
Packit 247f4e
Packit 247f4e
-- display a series of grids over time
Packit 247f4e
-- when a grid gets to [] keep its value at that
Packit 247f4e
-- when all grids get to [] return []
Packit 247f4e
delay2 :: [[[String]]] -> [[[String]]]
Packit 247f4e
delay2 xs = map (map head) xs : (if all (null . tail) (concat xs) then [] else delay2 $ map (map tl) xs)
Packit 247f4e
    where tl (x:xs) = if null xs then x:xs else xs
Packit 247f4e
Packit 247f4e
Packit 247f4e
---------------------------------------------------------------------
Packit 247f4e
-- INSTANCES
Packit 247f4e
Packit 247f4e
instance NFData a => NFData (Tag a) where
Packit 247f4e
    rnf (TagOpen x y) = rnf x `seq` rnf y
Packit 247f4e
    rnf (TagClose x) = rnf x
Packit 247f4e
    rnf (TagText x) = rnf x
Packit 247f4e
    rnf (TagComment x) = rnf x
Packit 247f4e
    rnf (TagWarning x) = rnf x
Packit 247f4e
    rnf (TagPosition x y) = () -- both are already ! bound
Packit 247f4e
Packit 247f4e
Packit 247f4e
#ifndef BYTESTRING_HAS_NFDATA
Packit 247f4e
# ifdef MIN_VERSION_bytestring
Packit 247f4e
#  define BYTESTRING_HAS_NFDATA (MIN_VERSION_bytestring(0,10,0))
Packit 247f4e
# else
Packit 247f4e
#  define BYTESTRING_HAS_NFDATA (__GLASGOW_HASKELL__ >= 706)
Packit 247f4e
# endif
Packit 247f4e
#endif
Packit 247f4e
Packit 247f4e
#if !BYTESTRING_HAS_NFDATA
Packit 247f4e
instance NFData LBS.ByteString where
Packit 247f4e
    rnf x = LBS.length x `seq` ()
Packit 247f4e
Packit 247f4e
instance NFData BS.ByteString where
Packit 247f4e
    rnf x = BS.length x `seq` ()
Packit 247f4e
#endif
Packit 247f4e
Packit 247f4e
Packit 247f4e
---------------------------------------------------------------------
Packit 247f4e
-- STATISTICS
Packit 247f4e
-- Provided by Emily Mitchell
Packit 247f4e
Packit 247f4e
confNs = let (*) = (,) in
Packit 247f4e
    [0.95 * 1.96
Packit 247f4e
    ,0.90 * 1.644]
Packit 247f4e
Packit 247f4e
size :: [Double] -> Double
Packit 247f4e
size = genericLength
Packit 247f4e
Packit 247f4e
mean :: [Double] -> Double
Packit 247f4e
mean xs = sum xs / size xs
Packit 247f4e
Packit 247f4e
stddev :: [Double] -> Double
Packit 247f4e
stddev xs = sqrt $ sum [sqr (x - xbar) | x <- xs] / size xs
Packit 247f4e
    where xbar = mean xs
Packit 247f4e
          sqr x = x * x
Packit 247f4e
Packit 247f4e
-- given a sample, and a required confidence
Packit 247f4e
-- of the mean (i.e. 2.5% = 0.025)
Packit 247f4e
range ::Double -> [Double] -> Double
Packit 247f4e
range conf xs = conf2 * stddev xs / sqrt (size xs)
Packit 247f4e
    where conf2 = fromMaybe (error $ "Unknown confidence interval: " ++ show conf) $ lookup conf confNs