|
Packit |
4b2029 |
{-# LANGUAGE FlexibleContexts, OverloadedStrings #-}
|
|
Packit |
4b2029 |
module Data.Conduit.TextSpec (spec) where
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
import qualified Data.Conduit.Text as CT
|
|
Packit |
4b2029 |
import qualified Data.Conduit as C
|
|
Packit |
4b2029 |
import qualified Data.Conduit.Lift as C
|
|
Packit |
4b2029 |
import qualified Data.Conduit.List as CL
|
|
Packit |
4b2029 |
import Test.Hspec
|
|
Packit |
4b2029 |
import Test.Hspec.QuickCheck
|
|
Packit |
4b2029 |
import Data.Monoid
|
|
Packit |
4b2029 |
import Control.Monad.ST
|
|
Packit |
4b2029 |
import qualified Data.Text as T
|
|
Packit |
4b2029 |
import qualified Data.Text.Encoding as TE
|
|
Packit |
4b2029 |
import qualified Data.Text.Encoding.Error as TEE
|
|
Packit |
4b2029 |
import qualified Data.Text.Lazy.Encoding as TLE
|
|
Packit |
4b2029 |
import Data.Functor.Identity
|
|
Packit |
4b2029 |
import Control.Arrow
|
|
Packit |
4b2029 |
import Control.Applicative
|
|
Packit |
4b2029 |
import Control.Monad.Trans.Resource
|
|
Packit |
4b2029 |
import qualified Data.ByteString as S
|
|
Packit |
4b2029 |
import qualified Data.Text.Lazy as TL
|
|
Packit |
4b2029 |
import qualified Data.ByteString.Lazy as L
|
|
Packit |
4b2029 |
import Control.Monad.Trans.Resource (runExceptionT_)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
spec :: Spec
|
|
Packit |
4b2029 |
spec = describe "Data.Conduit.Text" $ do
|
|
Packit |
4b2029 |
describe "text" $ do
|
|
Packit |
4b2029 |
let go enc tenc tdec cenc = describe enc $ do
|
|
Packit |
4b2029 |
prop "single chunk" $ \chars -> runST $ runExceptionT_ $ do
|
|
Packit |
4b2029 |
let tl = TL.pack chars
|
|
Packit |
4b2029 |
lbs = tenc tl
|
|
Packit |
4b2029 |
src = CL.sourceList $ L.toChunks lbs
|
|
Packit |
4b2029 |
ts <- src C.$= CT.decode cenc C.$$ CL.consume
|
|
Packit |
4b2029 |
return $ TL.fromChunks ts == tl
|
|
Packit |
4b2029 |
prop "many chunks" $ \chars -> runIdentity $ runExceptionT_ $ do
|
|
Packit |
4b2029 |
let tl = TL.pack chars
|
|
Packit |
4b2029 |
lbs = tenc tl
|
|
Packit |
4b2029 |
src = mconcat $ map (CL.sourceList . return . S.singleton) $ L.unpack lbs
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
ts <- src C.$= CT.decode cenc C.$$ CL.consume
|
|
Packit |
4b2029 |
return $ TL.fromChunks ts == tl
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- Check whether raw bytes are decoded correctly, in
|
|
Packit |
4b2029 |
-- particular that Text decoding produces an error if
|
|
Packit |
4b2029 |
-- and only if Conduit does.
|
|
Packit |
4b2029 |
prop "raw bytes" $ \bytes ->
|
|
Packit |
4b2029 |
let lbs = L.pack bytes
|
|
Packit |
4b2029 |
src = CL.sourceList $ L.toChunks lbs
|
|
Packit |
4b2029 |
etl = runException $ src C.$= CT.decode cenc C.$$ CL.consume
|
|
Packit |
4b2029 |
tl' = tdec lbs
|
|
Packit |
4b2029 |
in case etl of
|
|
Packit |
4b2029 |
(Left _) -> (return $! TL.toStrict tl') `shouldThrow` anyException
|
|
Packit |
4b2029 |
(Right tl) -> TL.fromChunks tl `shouldBe` tl'
|
|
Packit |
4b2029 |
prop "encoding" $ \chars -> runIdentity $ runExceptionT_ $ do
|
|
Packit |
4b2029 |
let tss = map T.pack chars
|
|
Packit |
4b2029 |
lbs = tenc $ TL.fromChunks tss
|
|
Packit |
4b2029 |
src = mconcat $ map (CL.sourceList . return) tss
|
|
Packit |
4b2029 |
bss <- src C.$= CT.encode cenc C.$$ CL.consume
|
|
Packit |
4b2029 |
return $ L.fromChunks bss == lbs
|
|
Packit |
4b2029 |
prop "valid then invalid" $ \x y chars -> runIdentity $ runExceptionT_ $ do
|
|
Packit |
4b2029 |
let tss = map T.pack ([x, y]:chars)
|
|
Packit |
4b2029 |
ts = T.concat tss
|
|
Packit |
4b2029 |
lbs = tenc (TL.fromChunks tss) `L.append` "\0\0\0\0\0\0\0"
|
|
Packit |
4b2029 |
src = mapM_ C.yield $ L.toChunks lbs
|
|
Packit |
4b2029 |
Just x' <- src C.$$ CT.decode cenc C.=$ C.await
|
|
Packit |
4b2029 |
return $ x' `T.isPrefixOf` ts
|
|
Packit |
4b2029 |
go "utf8" TLE.encodeUtf8 TLE.decodeUtf8 CT.utf8
|
|
Packit |
4b2029 |
go "utf16_le" TLE.encodeUtf16LE TLE.decodeUtf16LE CT.utf16_le
|
|
Packit |
4b2029 |
go "utf16_be" TLE.encodeUtf16BE TLE.decodeUtf16BE CT.utf16_be
|
|
Packit |
4b2029 |
go "utf32_le" TLE.encodeUtf32LE TLE.decodeUtf32LE CT.utf32_le
|
|
Packit |
4b2029 |
go "utf32_be" TLE.encodeUtf32BE TLE.decodeUtf32BE CT.utf32_be
|
|
Packit |
4b2029 |
it "mixed utf16 and utf8" $ do
|
|
Packit |
4b2029 |
let bs = "8\NUL:\NULu\NUL\215\216\217\218"
|
|
Packit |
4b2029 |
src = C.yield bs C.$= CT.decode CT.utf16_le
|
|
Packit |
4b2029 |
text <- src C.$$ C.await
|
|
Packit |
4b2029 |
text `shouldBe` Just "8:u"
|
|
Packit |
4b2029 |
(src C.$$ CL.sinkNull) `shouldThrow` anyException
|
|
Packit |
4b2029 |
it "invalid utf8" $ do
|
|
Packit |
4b2029 |
let bs = S.pack [0..255]
|
|
Packit |
4b2029 |
src = C.yield bs C.$= CT.decode CT.utf8
|
|
Packit |
4b2029 |
text <- src C.$$ C.await
|
|
Packit |
4b2029 |
text `shouldBe` Just (T.pack $ map toEnum [0..127])
|
|
Packit |
4b2029 |
(src C.$$ CL.sinkNull) `shouldThrow` anyException
|
|
Packit |
4b2029 |
it "catch UTF8 exceptions" $ do
|
|
Packit |
4b2029 |
let badBS = "this is good\128\128\0that was bad"
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
grabExceptions inner = C.catchC
|
|
Packit |
4b2029 |
(inner C.=$= CL.map Right)
|
|
Packit |
4b2029 |
(\e -> C.yield (Left (e :: CT.TextException)))
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
res <- C.yield badBS C.$$ (,)
|
|
Packit |
4b2029 |
<$> (grabExceptions (CT.decode CT.utf8) C.=$ CL.consume)
|
|
Packit |
4b2029 |
<*> CL.consume
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
first (map (either (Left . show) Right)) res `shouldBe`
|
|
Packit |
4b2029 |
( [ Right "this is good"
|
|
Packit |
4b2029 |
, Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
|
|
Packit |
4b2029 |
]
|
|
Packit |
4b2029 |
, ["\128\128\0that was bad"]
|
|
Packit |
4b2029 |
)
|
|
Packit |
4b2029 |
it "catch UTF8 exceptions, pure" $ do
|
|
Packit |
4b2029 |
let badBS = "this is good\128\128\0that was bad"
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
grabExceptions inner = do
|
|
Packit |
4b2029 |
res <- C.runCatchC $ inner C.=$= CL.map Right
|
|
Packit |
4b2029 |
case res of
|
|
Packit |
4b2029 |
Left e -> C.yield $ Left e
|
|
Packit |
4b2029 |
Right () -> return ()
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
let res = runIdentity $ C.yield badBS C.$$ (,)
|
|
Packit |
4b2029 |
<$> (grabExceptions (CT.decode CT.utf8) C.=$ CL.consume)
|
|
Packit |
4b2029 |
<*> CL.consume
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
first (map (either (Left . show) Right)) res `shouldBe`
|
|
Packit |
4b2029 |
( [ Right "this is good"
|
|
Packit |
4b2029 |
, Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
|
|
Packit |
4b2029 |
]
|
|
Packit |
4b2029 |
, ["\128\128\0that was bad"]
|
|
Packit |
4b2029 |
)
|
|
Packit |
4b2029 |
it "catch UTF8 exceptions, catchExceptionC" $ do
|
|
Packit |
4b2029 |
let badBS = "this is good\128\128\0that was bad"
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
grabExceptions inner = C.catchCatchC
|
|
Packit |
4b2029 |
(inner C.=$= CL.map Right)
|
|
Packit |
4b2029 |
(\e -> C.yield $ Left e)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
let res = runException_ $ C.yield badBS C.$$ (,)
|
|
Packit |
4b2029 |
<$> (grabExceptions (CT.decode CT.utf8) C.=$ CL.consume)
|
|
Packit |
4b2029 |
<*> CL.consume
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
first (map (either (Left . show) Right)) res `shouldBe`
|
|
Packit |
4b2029 |
( [ Right "this is good"
|
|
Packit |
4b2029 |
, Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
|
|
Packit |
4b2029 |
]
|
|
Packit |
4b2029 |
, ["\128\128\0that was bad"]
|
|
Packit |
4b2029 |
)
|
|
Packit |
4b2029 |
it "catch UTF8 exceptions, catchExceptionC, decodeUtf8" $ do
|
|
Packit |
4b2029 |
let badBS = "this is good\128\128\0that was bad"
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
grabExceptions inner = C.catchCatchC
|
|
Packit |
4b2029 |
(inner C.=$= CL.map Right)
|
|
Packit |
4b2029 |
(\e -> C.yield $ Left e)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
let res = runException_ $ C.yield badBS C.$$ (,)
|
|
Packit |
4b2029 |
<$> (grabExceptions CT.decodeUtf8 C.=$ CL.consume)
|
|
Packit |
4b2029 |
<*> CL.consume
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
first (map (either (Left . show) Right)) res `shouldBe`
|
|
Packit |
4b2029 |
( [ Right "this is good"
|
|
Packit |
4b2029 |
, Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
|
|
Packit |
4b2029 |
]
|
|
Packit |
4b2029 |
, ["\128\128\0that was bad"]
|
|
Packit |
4b2029 |
)
|
|
Packit |
4b2029 |
prop "lenient UTF8 decoding" $ \good1 good2 -> do
|
|
Packit |
4b2029 |
let bss = [TE.encodeUtf8 $ T.pack good1, "\128\129\130", TE.encodeUtf8 $ T.pack good2]
|
|
Packit |
4b2029 |
bs = S.concat bss
|
|
Packit |
4b2029 |
expected = TE.decodeUtf8With TEE.lenientDecode bs
|
|
Packit |
4b2029 |
actual = runIdentity $ mapM_ C.yield bss C.$$ CT.decodeUtf8Lenient C.=$ CL.consume
|
|
Packit |
4b2029 |
T.concat actual `shouldBe` expected
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
describe "text lines" $ do
|
|
Packit |
4b2029 |
it "yields nothing given nothing" $
|
|
Packit |
4b2029 |
(CL.sourceList [] C.$= CT.lines C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[[]]
|
|
Packit |
4b2029 |
it "yields nothing given only empty text" $
|
|
Packit |
4b2029 |
(CL.sourceList [""] C.$= CT.lines C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[[]]
|
|
Packit |
4b2029 |
it "works across split lines" $
|
|
Packit |
4b2029 |
(CL.sourceList ["abc", "d\nef"] C.$= CT.lines C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["abcd", "ef"]]
|
|
Packit |
4b2029 |
it "works with multiple lines in an item" $
|
|
Packit |
4b2029 |
(CL.sourceList ["ab\ncd\ne"] C.$= CT.lines C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["ab", "cd", "e"]]
|
|
Packit |
4b2029 |
it "works with ending on a newline" $
|
|
Packit |
4b2029 |
(CL.sourceList ["ab\n"] C.$= CT.lines C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["ab"]]
|
|
Packit |
4b2029 |
it "works with ending a middle item on a newline" $
|
|
Packit |
4b2029 |
(CL.sourceList ["ab\n", "cd\ne"] C.$= CT.lines C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["ab", "cd", "e"]]
|
|
Packit |
4b2029 |
it "works with empty text" $
|
|
Packit |
4b2029 |
(CL.sourceList ["ab", "", "cd"] C.$= CT.lines C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["abcd"]]
|
|
Packit |
4b2029 |
it "works with empty lines" $
|
|
Packit |
4b2029 |
(CL.sourceList ["\n\n"] C.$= CT.lines C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["", ""]]
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
describe "text lines bounded" $ do
|
|
Packit |
4b2029 |
it "yields nothing given nothing" $
|
|
Packit |
4b2029 |
(CL.sourceList [] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[[]]
|
|
Packit |
4b2029 |
it "yields nothing given only empty text" $
|
|
Packit |
4b2029 |
(CL.sourceList [""] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[[]]
|
|
Packit |
4b2029 |
it "works across split lines" $
|
|
Packit |
4b2029 |
(CL.sourceList ["abc", "d\nef"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["abcd", "ef"]]
|
|
Packit |
4b2029 |
it "works with multiple lines in an item" $
|
|
Packit |
4b2029 |
(CL.sourceList ["ab\ncd\ne"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["ab", "cd", "e"]]
|
|
Packit |
4b2029 |
it "works with ending on a newline" $
|
|
Packit |
4b2029 |
(CL.sourceList ["ab\n"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["ab"]]
|
|
Packit |
4b2029 |
it "works with ending a middle item on a newline" $
|
|
Packit |
4b2029 |
(CL.sourceList ["ab\n", "cd\ne"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["ab", "cd", "e"]]
|
|
Packit |
4b2029 |
it "works with empty text" $
|
|
Packit |
4b2029 |
(CL.sourceList ["ab", "", "cd"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["abcd"]]
|
|
Packit |
4b2029 |
it "works with empty lines" $
|
|
Packit |
4b2029 |
(CL.sourceList ["\n\n"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
|
|
Packit |
4b2029 |
[["", ""]]
|
|
Packit |
4b2029 |
it "throws an exception when lines are too long" $ do
|
|
Packit |
4b2029 |
x <- runExceptionT $ CL.sourceList ["hello\nworld"] C.$$ CT.linesBounded 4 C.=$ CL.consume
|
|
Packit |
4b2029 |
show x `shouldBe` show (Left $ CT.LengthExceeded 4 :: Either CT.TextException ())
|
|
Packit |
4b2029 |
it "works with infinite input" $ do
|
|
Packit |
4b2029 |
x <- runExceptionT $ CL.sourceList (cycle ["hello"]) C.$$ CT.linesBounded 256 C.=$ CL.consume
|
|
Packit |
4b2029 |
show x `shouldBe` show (Left $ CT.LengthExceeded 256 :: Either CT.TextException ())
|
|
Packit |
4b2029 |
describe "text decode" $ do
|
|
Packit |
4b2029 |
it' "doesn't throw runtime exceptions" $ do
|
|
Packit |
4b2029 |
let x = runIdentity $ runExceptionT $ C.yield "\x89\x243" C.$$ CT.decode CT.utf8 C.=$ CL.consume
|
|
Packit |
4b2029 |
case x of
|
|
Packit |
4b2029 |
Left _ -> return ()
|
|
Packit |
4b2029 |
Right t -> error $ "This should have failed: " ++ show t
|
|
Packit |
4b2029 |
it "is not too eager" $ do
|
|
Packit |
4b2029 |
x <- CL.sourceList ["foobarbaz", error "ignore me"] C.$$ CT.decode CT.utf8 C.=$ CL.head
|
|
Packit |
4b2029 |
x `shouldBe` Just "foobarbaz"
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
it' :: String -> IO () -> Spec
|
|
Packit |
4b2029 |
it' = it
|