Blame test/Data/Conduit/TextSpec.hs

Packit 4b2029
{-# LANGUAGE FlexibleContexts, OverloadedStrings #-}
Packit 4b2029
module Data.Conduit.TextSpec (spec) where
Packit 4b2029
Packit 4b2029
import qualified Data.Conduit.Text as CT
Packit 4b2029
import qualified Data.Conduit as C
Packit 4b2029
import qualified Data.Conduit.Lift as C
Packit 4b2029
import qualified Data.Conduit.List as CL
Packit 4b2029
import Test.Hspec
Packit 4b2029
import Test.Hspec.QuickCheck
Packit 4b2029
import Data.Monoid
Packit 4b2029
import Control.Monad.ST
Packit 4b2029
import qualified Data.Text as T
Packit 4b2029
import qualified Data.Text.Encoding as TE
Packit 4b2029
import qualified Data.Text.Encoding.Error as TEE
Packit 4b2029
import qualified Data.Text.Lazy.Encoding as TLE
Packit 4b2029
import Data.Functor.Identity
Packit 4b2029
import Control.Arrow
Packit 4b2029
import Control.Applicative
Packit 4b2029
import Control.Monad.Trans.Resource
Packit 4b2029
import qualified Data.ByteString as S
Packit 4b2029
import qualified Data.Text.Lazy as TL
Packit 4b2029
import qualified Data.ByteString.Lazy as L
Packit 4b2029
import Control.Monad.Trans.Resource (runExceptionT_)
Packit 4b2029
Packit 4b2029
spec :: Spec
Packit 4b2029
spec = describe "Data.Conduit.Text" $ do
Packit 4b2029
    describe "text" $ do
Packit 4b2029
        let go enc tenc tdec cenc = describe enc $ do
Packit 4b2029
                prop "single chunk" $ \chars -> runST $ runExceptionT_ $ do
Packit 4b2029
                    let tl = TL.pack chars
Packit 4b2029
                        lbs = tenc tl
Packit 4b2029
                        src = CL.sourceList $ L.toChunks lbs
Packit 4b2029
                    ts <- src C.$= CT.decode cenc C.$$ CL.consume
Packit 4b2029
                    return $ TL.fromChunks ts == tl
Packit 4b2029
                prop "many chunks" $ \chars -> runIdentity $ runExceptionT_ $ do
Packit 4b2029
                    let tl = TL.pack chars
Packit 4b2029
                        lbs = tenc tl
Packit 4b2029
                        src = mconcat $ map (CL.sourceList . return . S.singleton) $ L.unpack lbs
Packit 4b2029
Packit 4b2029
                    ts <- src C.$= CT.decode cenc C.$$ CL.consume
Packit 4b2029
                    return $ TL.fromChunks ts == tl
Packit 4b2029
Packit 4b2029
                -- Check whether raw bytes are decoded correctly, in
Packit 4b2029
                -- particular that Text decoding produces an error if
Packit 4b2029
                -- and only if Conduit does.
Packit 4b2029
                prop "raw bytes" $ \bytes ->
Packit 4b2029
                    let lbs = L.pack bytes
Packit 4b2029
                        src = CL.sourceList $ L.toChunks lbs
Packit 4b2029
                        etl = runException $ src C.$= CT.decode cenc C.$$ CL.consume
Packit 4b2029
                        tl' = tdec lbs
Packit 4b2029
                    in  case etl of
Packit 4b2029
                          (Left _) -> (return $! TL.toStrict tl') `shouldThrow` anyException
Packit 4b2029
                          (Right tl) -> TL.fromChunks tl `shouldBe` tl'
Packit 4b2029
                prop "encoding" $ \chars -> runIdentity $ runExceptionT_ $ do
Packit 4b2029
                    let tss = map T.pack chars
Packit 4b2029
                        lbs = tenc $ TL.fromChunks tss
Packit 4b2029
                        src = mconcat $ map (CL.sourceList . return) tss
Packit 4b2029
                    bss <- src C.$= CT.encode cenc C.$$ CL.consume
Packit 4b2029
                    return $ L.fromChunks bss == lbs
Packit 4b2029
                prop "valid then invalid" $ \x y chars -> runIdentity $ runExceptionT_ $ do
Packit 4b2029
                    let tss = map T.pack ([x, y]:chars)
Packit 4b2029
                        ts = T.concat tss
Packit 4b2029
                        lbs = tenc (TL.fromChunks tss) `L.append` "\0\0\0\0\0\0\0"
Packit 4b2029
                        src = mapM_ C.yield $ L.toChunks lbs
Packit 4b2029
                    Just x' <- src C.$$ CT.decode cenc C.=$ C.await
Packit 4b2029
                    return $ x' `T.isPrefixOf` ts
Packit 4b2029
        go "utf8" TLE.encodeUtf8 TLE.decodeUtf8 CT.utf8
Packit 4b2029
        go "utf16_le" TLE.encodeUtf16LE TLE.decodeUtf16LE CT.utf16_le
Packit 4b2029
        go "utf16_be" TLE.encodeUtf16BE TLE.decodeUtf16BE CT.utf16_be
Packit 4b2029
        go "utf32_le" TLE.encodeUtf32LE TLE.decodeUtf32LE CT.utf32_le
Packit 4b2029
        go "utf32_be" TLE.encodeUtf32BE TLE.decodeUtf32BE CT.utf32_be
Packit 4b2029
        it "mixed utf16 and utf8" $ do
Packit 4b2029
            let bs = "8\NUL:\NULu\NUL\215\216\217\218"
Packit 4b2029
                src = C.yield bs C.$= CT.decode CT.utf16_le
Packit 4b2029
            text <- src C.$$ C.await
Packit 4b2029
            text `shouldBe` Just "8:u"
Packit 4b2029
            (src C.$$ CL.sinkNull) `shouldThrow` anyException
Packit 4b2029
        it "invalid utf8" $ do
Packit 4b2029
            let bs = S.pack [0..255]
Packit 4b2029
                src = C.yield bs C.$= CT.decode CT.utf8
Packit 4b2029
            text <- src C.$$ C.await
Packit 4b2029
            text `shouldBe` Just (T.pack $ map toEnum [0..127])
Packit 4b2029
            (src C.$$ CL.sinkNull) `shouldThrow` anyException
Packit 4b2029
        it "catch UTF8 exceptions" $ do
Packit 4b2029
            let badBS = "this is good\128\128\0that was bad"
Packit 4b2029
Packit 4b2029
                grabExceptions inner = C.catchC
Packit 4b2029
                    (inner C.=$= CL.map Right)
Packit 4b2029
                    (\e -> C.yield (Left (e :: CT.TextException)))
Packit 4b2029
Packit 4b2029
            res <- C.yield badBS C.$$ (,)
Packit 4b2029
                <$> (grabExceptions (CT.decode CT.utf8) C.=$ CL.consume)
Packit 4b2029
                <*> CL.consume
Packit 4b2029
Packit 4b2029
            first (map (either (Left . show) Right)) res `shouldBe`
Packit 4b2029
                ( [ Right "this is good"
Packit 4b2029
                  , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
Packit 4b2029
                  ]
Packit 4b2029
                , ["\128\128\0that was bad"]
Packit 4b2029
                )
Packit 4b2029
        it "catch UTF8 exceptions, pure" $ do
Packit 4b2029
            let badBS = "this is good\128\128\0that was bad"
Packit 4b2029
Packit 4b2029
                grabExceptions inner = do
Packit 4b2029
                    res <- C.runCatchC $ inner C.=$= CL.map Right
Packit 4b2029
                    case res of
Packit 4b2029
                        Left e -> C.yield $ Left e
Packit 4b2029
                        Right () -> return ()
Packit 4b2029
Packit 4b2029
            let res = runIdentity $ C.yield badBS C.$$ (,)
Packit 4b2029
                        <$> (grabExceptions (CT.decode CT.utf8) C.=$ CL.consume)
Packit 4b2029
                        <*> CL.consume
Packit 4b2029
Packit 4b2029
            first (map (either (Left . show) Right)) res `shouldBe`
Packit 4b2029
                ( [ Right "this is good"
Packit 4b2029
                  , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
Packit 4b2029
                  ]
Packit 4b2029
                , ["\128\128\0that was bad"]
Packit 4b2029
                )
Packit 4b2029
        it "catch UTF8 exceptions, catchExceptionC" $ do
Packit 4b2029
            let badBS = "this is good\128\128\0that was bad"
Packit 4b2029
Packit 4b2029
                grabExceptions inner = C.catchCatchC
Packit 4b2029
                    (inner C.=$= CL.map Right)
Packit 4b2029
                    (\e -> C.yield $ Left e)
Packit 4b2029
Packit 4b2029
            let res = runException_ $ C.yield badBS C.$$ (,)
Packit 4b2029
                        <$> (grabExceptions (CT.decode CT.utf8) C.=$ CL.consume)
Packit 4b2029
                        <*> CL.consume
Packit 4b2029
Packit 4b2029
            first (map (either (Left . show) Right)) res `shouldBe`
Packit 4b2029
                ( [ Right "this is good"
Packit 4b2029
                  , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
Packit 4b2029
                  ]
Packit 4b2029
                , ["\128\128\0that was bad"]
Packit 4b2029
                )
Packit 4b2029
        it "catch UTF8 exceptions, catchExceptionC, decodeUtf8" $ do
Packit 4b2029
            let badBS = "this is good\128\128\0that was bad"
Packit 4b2029
Packit 4b2029
                grabExceptions inner = C.catchCatchC
Packit 4b2029
                    (inner C.=$= CL.map Right)
Packit 4b2029
                    (\e -> C.yield $ Left e)
Packit 4b2029
Packit 4b2029
            let res = runException_ $ C.yield badBS C.$$ (,)
Packit 4b2029
                        <$> (grabExceptions CT.decodeUtf8 C.=$ CL.consume)
Packit 4b2029
                        <*> CL.consume
Packit 4b2029
Packit 4b2029
            first (map (either (Left . show) Right)) res `shouldBe`
Packit 4b2029
                ( [ Right "this is good"
Packit 4b2029
                  , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
Packit 4b2029
                  ]
Packit 4b2029
                , ["\128\128\0that was bad"]
Packit 4b2029
                )
Packit 4b2029
        prop "lenient UTF8 decoding" $ \good1 good2 -> do
Packit 4b2029
            let bss = [TE.encodeUtf8 $ T.pack good1, "\128\129\130", TE.encodeUtf8 $ T.pack good2]
Packit 4b2029
                bs = S.concat bss
Packit 4b2029
                expected = TE.decodeUtf8With TEE.lenientDecode bs
Packit 4b2029
                actual = runIdentity $ mapM_ C.yield bss C.$$ CT.decodeUtf8Lenient C.=$ CL.consume
Packit 4b2029
            T.concat actual `shouldBe` expected
Packit 4b2029
Packit 4b2029
    describe "text lines" $ do
Packit 4b2029
        it "yields nothing given nothing" $
Packit 4b2029
            (CL.sourceList [] C.$= CT.lines C.$$ CL.consume) ==
Packit 4b2029
                [[]]
Packit 4b2029
        it "yields nothing given only empty text" $
Packit 4b2029
            (CL.sourceList [""] C.$= CT.lines C.$$ CL.consume) ==
Packit 4b2029
                [[]]
Packit 4b2029
        it "works across split lines" $
Packit 4b2029
            (CL.sourceList ["abc", "d\nef"] C.$= CT.lines C.$$ CL.consume) ==
Packit 4b2029
                [["abcd", "ef"]]
Packit 4b2029
        it "works with multiple lines in an item" $
Packit 4b2029
            (CL.sourceList ["ab\ncd\ne"] C.$= CT.lines C.$$ CL.consume) ==
Packit 4b2029
                [["ab", "cd", "e"]]
Packit 4b2029
        it "works with ending on a newline" $
Packit 4b2029
            (CL.sourceList ["ab\n"] C.$= CT.lines C.$$ CL.consume) ==
Packit 4b2029
                [["ab"]]
Packit 4b2029
        it "works with ending a middle item on a newline" $
Packit 4b2029
            (CL.sourceList ["ab\n", "cd\ne"] C.$= CT.lines C.$$ CL.consume) ==
Packit 4b2029
                [["ab", "cd", "e"]]
Packit 4b2029
        it "works with empty text" $
Packit 4b2029
            (CL.sourceList ["ab", "", "cd"] C.$= CT.lines C.$$ CL.consume) ==
Packit 4b2029
                [["abcd"]]
Packit 4b2029
        it "works with empty lines" $
Packit 4b2029
            (CL.sourceList ["\n\n"] C.$= CT.lines C.$$ CL.consume) ==
Packit 4b2029
                [["", ""]]
Packit 4b2029
Packit 4b2029
    describe "text lines bounded" $ do
Packit 4b2029
        it "yields nothing given nothing" $
Packit 4b2029
            (CL.sourceList [] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
Packit 4b2029
                [[]]
Packit 4b2029
        it "yields nothing given only empty text" $
Packit 4b2029
            (CL.sourceList [""] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
Packit 4b2029
                [[]]
Packit 4b2029
        it "works across split lines" $
Packit 4b2029
            (CL.sourceList ["abc", "d\nef"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
Packit 4b2029
                [["abcd", "ef"]]
Packit 4b2029
        it "works with multiple lines in an item" $
Packit 4b2029
            (CL.sourceList ["ab\ncd\ne"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
Packit 4b2029
                [["ab", "cd", "e"]]
Packit 4b2029
        it "works with ending on a newline" $
Packit 4b2029
            (CL.sourceList ["ab\n"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
Packit 4b2029
                [["ab"]]
Packit 4b2029
        it "works with ending a middle item on a newline" $
Packit 4b2029
            (CL.sourceList ["ab\n", "cd\ne"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
Packit 4b2029
                [["ab", "cd", "e"]]
Packit 4b2029
        it "works with empty text" $
Packit 4b2029
            (CL.sourceList ["ab", "", "cd"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
Packit 4b2029
                [["abcd"]]
Packit 4b2029
        it "works with empty lines" $
Packit 4b2029
            (CL.sourceList ["\n\n"] C.$= CT.linesBounded 80 C.$$ CL.consume) ==
Packit 4b2029
                [["", ""]]
Packit 4b2029
        it "throws an exception when lines are too long" $ do
Packit 4b2029
            x <- runExceptionT $ CL.sourceList ["hello\nworld"] C.$$ CT.linesBounded 4 C.=$ CL.consume
Packit 4b2029
            show x `shouldBe` show (Left $ CT.LengthExceeded 4 :: Either CT.TextException ())
Packit 4b2029
        it "works with infinite input" $ do
Packit 4b2029
            x <- runExceptionT $ CL.sourceList (cycle ["hello"]) C.$$ CT.linesBounded 256 C.=$ CL.consume
Packit 4b2029
            show x `shouldBe` show (Left $ CT.LengthExceeded 256 :: Either CT.TextException ())
Packit 4b2029
    describe "text decode" $ do
Packit 4b2029
        it' "doesn't throw runtime exceptions" $ do
Packit 4b2029
            let x = runIdentity $ runExceptionT $ C.yield "\x89\x243" C.$$ CT.decode CT.utf8 C.=$ CL.consume
Packit 4b2029
            case x of
Packit 4b2029
                Left _ -> return ()
Packit 4b2029
                Right t -> error $ "This should have failed: " ++ show t
Packit 4b2029
        it "is not too eager" $ do
Packit 4b2029
            x <- CL.sourceList ["foobarbaz", error "ignore me"] C.$$ CT.decode CT.utf8 C.=$ CL.head
Packit 4b2029
            x `shouldBe` Just "foobarbaz"
Packit 4b2029
Packit 4b2029
it' :: String -> IO () -> Spec
Packit 4b2029
it' = it