Blame Data/Conduit/Text.hs

Packit 4b2029
{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}
Packit 4b2029
-- |
Packit 4b2029
-- Copyright: 2011 Michael Snoyman, 2010-2011 John Millikin
Packit 4b2029
-- License: MIT
Packit 4b2029
--
Packit 4b2029
-- Handle streams of text.
Packit 4b2029
--
Packit 4b2029
-- Parts of this code were taken from enumerator and adapted for conduits.
Packit 4b2029
--
Packit 4b2029
-- For many purposes, it's recommended to use the conduit-combinators library,
Packit 4b2029
-- which provides a more complete set of functions.
Packit 4b2029
module Data.Conduit.Text
Packit 4b2029
    (
Packit 4b2029
Packit 4b2029
    -- * Text codecs
Packit 4b2029
      Codec
Packit 4b2029
    , encode
Packit 4b2029
    , decode
Packit 4b2029
    , utf8
Packit 4b2029
    , utf16_le
Packit 4b2029
    , utf16_be
Packit 4b2029
    , utf32_le
Packit 4b2029
    , utf32_be
Packit 4b2029
    , ascii
Packit 4b2029
    , iso8859_1
Packit 4b2029
    , lines
Packit 4b2029
    , linesBounded
Packit 4b2029
    , TextException (..)
Packit 4b2029
    , takeWhile
Packit 4b2029
    , dropWhile
Packit 4b2029
    , take
Packit 4b2029
    , drop
Packit 4b2029
    , foldLines
Packit 4b2029
    , withLine
Packit 4b2029
    , Data.Conduit.Text.decodeUtf8
Packit 4b2029
    , decodeUtf8Lenient
Packit 4b2029
    , encodeUtf8
Packit 4b2029
    , detectUtf
Packit 4b2029
    ) where
Packit 4b2029
Packit 4b2029
import           Prelude hiding (head, drop, takeWhile, lines, zip, zip3, zipWith, zipWith3, take, dropWhile)
Packit 4b2029
Packit 4b2029
import qualified Control.Exception as Exc
Packit 4b2029
import qualified Data.ByteString as B
Packit 4b2029
import qualified Data.ByteString.Char8 as B8
Packit 4b2029
import           Data.Char (ord)
Packit 4b2029
import qualified Data.Text as T
Packit 4b2029
import qualified Data.Text.Encoding as TE
Packit 4b2029
import           Data.Word (Word8)
Packit 4b2029
import           Data.Typeable (Typeable)
Packit 4b2029
Packit 4b2029
import Data.Conduit
Packit 4b2029
import qualified Data.Conduit.List as CL
Packit 4b2029
import Control.Monad.Trans.Class (lift)
Packit 4b2029
import Control.Monad.Trans.Resource (MonadThrow, monadThrow)
Packit 4b2029
import Control.Monad (unless)
Packit 4b2029
import Data.Streaming.Text
Packit 4b2029
Packit 4b2029
-- | A specific character encoding.
Packit 4b2029
--
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
data Codec = Codec
Packit 4b2029
    { _codecName :: T.Text
Packit 4b2029
    , codecEncode
Packit 4b2029
        :: T.Text
Packit 4b2029
        -> (B.ByteString, Maybe (TextException, T.Text))
Packit 4b2029
    , codecDecode
Packit 4b2029
        :: B.ByteString
Packit 4b2029
        -> (T.Text, Either
Packit 4b2029
            (TextException, B.ByteString)
Packit 4b2029
            B.ByteString)
Packit 4b2029
    }
Packit 4b2029
    | NewCodec T.Text (T.Text -> B.ByteString) (B.ByteString -> DecodeResult)
Packit 4b2029
Packit 4b2029
instance Show Codec where
Packit 4b2029
    showsPrec d c =
Packit 4b2029
        let (cnst, name) = case c of
Packit 4b2029
                Codec t _ _    -> ("Codec ", t)
Packit 4b2029
                NewCodec t _ _ -> ("NewCodec ", t)
Packit 4b2029
        in showParen (d > 10) $ showString cnst . shows name
Packit 4b2029
Packit 4b2029
Packit 4b2029
Packit 4b2029
-- | Emit each line separately
Packit 4b2029
--
Packit 4b2029
-- Since 0.4.1
Packit 4b2029
lines :: Monad m => Conduit T.Text m T.Text
Packit 4b2029
lines =
Packit 4b2029
    awaitText T.empty
Packit 4b2029
  where
Packit 4b2029
    awaitText buf = await >>= maybe (finish buf) (process buf)
Packit 4b2029
Packit 4b2029
    finish buf = unless (T.null buf) (yield buf)
Packit 4b2029
Packit 4b2029
    process buf text = yieldLines $ buf `T.append` text
Packit 4b2029
Packit 4b2029
    yieldLines buf =
Packit 4b2029
      let (line, rest) = T.break (== '\n') buf
Packit 4b2029
      in  case T.uncons rest of
Packit 4b2029
            Just (_, rest') -> yield line >> yieldLines rest'
Packit 4b2029
            _ -> awaitText line
Packit 4b2029
Packit 4b2029
Packit 4b2029
Packit 4b2029
-- | Variant of the lines function with an integer parameter.
Packit 4b2029
-- The text length of any emitted line
Packit 4b2029
-- never exceeds the value of the parameter. Whenever
Packit 4b2029
-- this is about to happen a LengthExceeded exception
Packit 4b2029
-- is thrown. This function should be used instead
Packit 4b2029
-- of the lines function whenever we are dealing with
Packit 4b2029
-- user input (e.g. a file upload) because we can't be sure that
Packit 4b2029
-- user input won't have extraordinarily large lines which would
Packit 4b2029
-- require large amounts of memory if consumed.
Packit 4b2029
linesBounded :: MonadThrow m => Int -> Conduit T.Text m T.Text
Packit 4b2029
linesBounded maxLineLen =
Packit 4b2029
    awaitText 0 T.empty
Packit 4b2029
  where
Packit 4b2029
    awaitText len buf = await >>= maybe (finish buf) (process len buf)
Packit 4b2029
Packit 4b2029
    finish buf = unless (T.null buf) (yield buf)
Packit 4b2029
Packit 4b2029
    process len buf text =
Packit 4b2029
      let (line, rest) = T.break (== '\n') text
Packit 4b2029
          len' = len + T.length line
Packit 4b2029
      in  if len' > maxLineLen
Packit 4b2029
            then lift $ monadThrow (LengthExceeded maxLineLen)
Packit 4b2029
            else case T.uncons rest of
Packit 4b2029
                   Just (_, rest') ->
Packit 4b2029
                     yield (buf `T.append` line) >> process 0 T.empty rest'
Packit 4b2029
                   _ ->
Packit 4b2029
                     awaitText len' $ buf `T.append` text
Packit 4b2029
Packit 4b2029
Packit 4b2029
Packit 4b2029
-- | Convert text into bytes, using the provided codec. If the codec is
Packit 4b2029
-- not capable of representing an input character, an exception will be thrown.
Packit 4b2029
--
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
encode :: MonadThrow m => Codec -> Conduit T.Text m B.ByteString
Packit 4b2029
encode (NewCodec _ enc _) = CL.map enc
Packit 4b2029
encode codec = CL.mapM $ \t -> do
Packit 4b2029
    let (bs, mexc) = codecEncode codec t
Packit 4b2029
    maybe (return bs) (monadThrow . fst) mexc
Packit 4b2029
Packit 4b2029
decodeNew
Packit 4b2029
    :: Monad m
Packit 4b2029
    => (Int -> B.ByteString -> T.Text -> B.ByteString -> Conduit B.ByteString m T.Text)
Packit 4b2029
    -> t
Packit 4b2029
    -> Int
Packit 4b2029
    -> (B.ByteString -> DecodeResult)
Packit 4b2029
    -> Conduit B.ByteString m T.Text
Packit 4b2029
decodeNew onFailure _name =
Packit 4b2029
    loop
Packit 4b2029
  where
Packit 4b2029
    loop consumed dec =
Packit 4b2029
        await >>= maybe finish go
Packit 4b2029
      where
Packit 4b2029
        finish =
Packit 4b2029
            case dec B.empty of
Packit 4b2029
                DecodeResultSuccess _ _ -> return ()
Packit 4b2029
                DecodeResultFailure t rest -> onFailure consumed B.empty t rest
Packit 4b2029
        {-# INLINE finish #-}
Packit 4b2029
Packit 4b2029
        go bs | B.null bs = loop consumed dec
Packit 4b2029
        go bs =
Packit 4b2029
            case dec bs of
Packit 4b2029
                DecodeResultSuccess t dec' -> do
Packit 4b2029
                    let consumed' = consumed + B.length bs
Packit 4b2029
                        next = do
Packit 4b2029
                            unless (T.null t) (yield t)
Packit 4b2029
                            loop consumed' dec'
Packit 4b2029
                     in consumed' `seq` next
Packit 4b2029
                DecodeResultFailure t rest -> onFailure consumed bs t rest
Packit 4b2029
Packit 4b2029
-- | Decode a stream of UTF8 data, and replace invalid bytes with the Unicode
Packit 4b2029
-- replacement character.
Packit 4b2029
--
Packit 4b2029
-- Since 1.1.1
Packit 4b2029
decodeUtf8Lenient :: Monad m => Conduit B.ByteString m T.Text
Packit 4b2029
decodeUtf8Lenient =
Packit 4b2029
    decodeNew onFailure "UTF8-lenient" 0 Data.Streaming.Text.decodeUtf8
Packit 4b2029
  where
Packit 4b2029
    onFailure _consumed _bs t rest = do
Packit 4b2029
        unless (T.null t) (yield t)
Packit 4b2029
        case B.uncons rest of
Packit 4b2029
            Nothing -> return ()
Packit 4b2029
            Just (_, rest') -> do
Packit 4b2029
                unless (B.null rest') (leftover rest')
Packit 4b2029
                yield $ T.singleton '\xFFFD'
Packit 4b2029
        decodeUtf8Lenient
Packit 4b2029
Packit 4b2029
-- | Convert bytes into text, using the provided codec. If the codec is
Packit 4b2029
-- not capable of decoding an input byte sequence, an exception will be thrown.
Packit 4b2029
--
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
decode :: MonadThrow m => Codec -> Conduit B.ByteString m T.Text
Packit 4b2029
decode (NewCodec name _ start) =
Packit 4b2029
    decodeNew onFailure name 0 start
Packit 4b2029
  where
Packit 4b2029
    onFailure consumed bs t rest = do
Packit 4b2029
        unless (T.null t) (yield t)
Packit 4b2029
        leftover rest -- rest will never be null, no need to check
Packit 4b2029
        let consumed' = consumed + B.length bs - B.length rest
Packit 4b2029
        monadThrow $ NewDecodeException name consumed' (B.take 4 rest)
Packit 4b2029
    {-# INLINE onFailure #-}
Packit 4b2029
decode codec =
Packit 4b2029
    loop id
Packit 4b2029
  where
Packit 4b2029
    loop front = await >>= maybe (finish front) (go front)
Packit 4b2029
Packit 4b2029
    finish front =
Packit 4b2029
        case B.uncons $ front B.empty of
Packit 4b2029
            Nothing -> return ()
Packit 4b2029
            Just (w, _) -> lift $ monadThrow $ DecodeException codec w
Packit 4b2029
Packit 4b2029
    go front bs' =
Packit 4b2029
        case extra of
Packit 4b2029
            Left (exc, _) -> lift $ monadThrow exc
Packit 4b2029
            Right bs'' -> yield text >> loop (B.append bs'')
Packit 4b2029
      where
Packit 4b2029
        (text, extra) = codecDecode codec bs
Packit 4b2029
        bs = front bs'
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
data TextException = DecodeException Codec Word8
Packit 4b2029
                   | EncodeException Codec Char
Packit 4b2029
                   | LengthExceeded Int
Packit 4b2029
                   | TextException Exc.SomeException
Packit 4b2029
                   | NewDecodeException !T.Text !Int !B.ByteString
Packit 4b2029
    deriving Typeable
Packit 4b2029
instance Show TextException where
Packit 4b2029
    show (DecodeException codec w) = concat
Packit 4b2029
        [ "Error decoding legacy Data.Conduit.Text codec "
Packit 4b2029
        , show codec
Packit 4b2029
        , " when parsing byte: "
Packit 4b2029
        , show w
Packit 4b2029
        ]
Packit 4b2029
    show (EncodeException codec c) = concat
Packit 4b2029
        [ "Error encoding legacy Data.Conduit.Text codec "
Packit 4b2029
        , show codec
Packit 4b2029
        , " when parsing char: "
Packit 4b2029
        , show c
Packit 4b2029
        ]
Packit 4b2029
    show (LengthExceeded i) = "Data.Conduit.Text.linesBounded: line too long: " ++ show i
Packit 4b2029
    show (TextException se) = "Data.Conduit.Text.TextException: " ++ show se
Packit 4b2029
    show (NewDecodeException codec consumed next) = concat
Packit 4b2029
        [ "Data.Conduit.Text.decode: Error decoding stream of "
Packit 4b2029
        , T.unpack codec
Packit 4b2029
        , " bytes. Error encountered in stream at offset "
Packit 4b2029
        , show consumed
Packit 4b2029
        , ". Encountered at byte sequence "
Packit 4b2029
        , show next
Packit 4b2029
        ]
Packit 4b2029
instance Exc.Exception TextException
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
utf8 :: Codec
Packit 4b2029
utf8 = NewCodec (T.pack "UTF-8") TE.encodeUtf8 Data.Streaming.Text.decodeUtf8
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
utf16_le :: Codec
Packit 4b2029
utf16_le = NewCodec (T.pack "UTF-16-LE") TE.encodeUtf16LE decodeUtf16LE
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
utf16_be :: Codec
Packit 4b2029
utf16_be = NewCodec (T.pack "UTF-16-BE") TE.encodeUtf16BE decodeUtf16BE
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
utf32_le :: Codec
Packit 4b2029
utf32_le = NewCodec (T.pack "UTF-32-LE") TE.encodeUtf32LE decodeUtf32LE
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
utf32_be :: Codec
Packit 4b2029
utf32_be = NewCodec (T.pack "UTF-32-BE") TE.encodeUtf32BE decodeUtf32BE
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
ascii :: Codec
Packit 4b2029
ascii = Codec name enc dec where
Packit 4b2029
    name = T.pack "ASCII"
Packit 4b2029
    enc text = (bytes, extra) where
Packit 4b2029
        (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
Packit 4b2029
        bytes = B8.pack (T.unpack safe)
Packit 4b2029
        extra = if T.null unsafe
Packit 4b2029
            then Nothing
Packit 4b2029
            else Just (EncodeException ascii (T.head unsafe), unsafe)
Packit 4b2029
Packit 4b2029
    dec bytes = (text, extra) where
Packit 4b2029
        (safe, unsafe) = B.span (<= 0x7F) bytes
Packit 4b2029
        text = T.pack (B8.unpack safe)
Packit 4b2029
        extra = if B.null unsafe
Packit 4b2029
            then Right B.empty
Packit 4b2029
            else Left (DecodeException ascii (B.head unsafe), unsafe)
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Since 0.3.0
Packit 4b2029
iso8859_1 :: Codec
Packit 4b2029
iso8859_1 = Codec name enc dec where
Packit 4b2029
    name = T.pack "ISO-8859-1"
Packit 4b2029
    enc text = (bytes, extra) where
Packit 4b2029
        (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
Packit 4b2029
        bytes = B8.pack (T.unpack safe)
Packit 4b2029
        extra = if T.null unsafe
Packit 4b2029
            then Nothing
Packit 4b2029
            else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
Packit 4b2029
Packit 4b2029
    dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
--
Packit 4b2029
-- Since 1.0.8
Packit 4b2029
takeWhile :: Monad m
Packit 4b2029
          => (Char -> Bool)
Packit 4b2029
          -> Conduit T.Text m T.Text
Packit 4b2029
takeWhile p =
Packit 4b2029
    loop
Packit 4b2029
  where
Packit 4b2029
    loop = await >>= maybe (return ()) go
Packit 4b2029
    go t =
Packit 4b2029
        case T.span p t of
Packit 4b2029
            (x, y)
Packit 4b2029
                | T.null y -> yield x >> loop
Packit 4b2029
                | otherwise -> yield x >> leftover y
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
--
Packit 4b2029
-- Since 1.0.8
Packit 4b2029
dropWhile :: Monad m
Packit 4b2029
          => (Char -> Bool)
Packit 4b2029
          -> Consumer T.Text m ()
Packit 4b2029
dropWhile p =
Packit 4b2029
    loop
Packit 4b2029
  where
Packit 4b2029
    loop = await >>= maybe (return ()) go
Packit 4b2029
    go t
Packit 4b2029
        | T.null x = loop
Packit 4b2029
        | otherwise = leftover x
Packit 4b2029
      where
Packit 4b2029
        x = T.dropWhile p t
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
--
Packit 4b2029
-- Since 1.0.8
Packit 4b2029
take :: Monad m => Int -> Conduit T.Text m T.Text
Packit 4b2029
take =
Packit 4b2029
    loop
Packit 4b2029
  where
Packit 4b2029
    loop i = await >>= maybe (return ()) (go i)
Packit 4b2029
    go i t
Packit 4b2029
        | diff == 0 = yield t
Packit 4b2029
        | diff < 0 =
Packit 4b2029
            let (x, y) = T.splitAt i t
Packit 4b2029
             in yield x >> leftover y
Packit 4b2029
        | otherwise = yield t >> loop diff
Packit 4b2029
      where
Packit 4b2029
        diff = i - T.length t
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
--
Packit 4b2029
-- Since 1.0.8
Packit 4b2029
drop :: Monad m => Int -> Consumer T.Text m ()
Packit 4b2029
drop =
Packit 4b2029
    loop
Packit 4b2029
  where
Packit 4b2029
    loop i = await >>= maybe (return ()) (go i)
Packit 4b2029
    go i t
Packit 4b2029
        | diff == 0 = return ()
Packit 4b2029
        | diff < 0 = leftover $ T.drop i t
Packit 4b2029
        | otherwise = loop diff
Packit 4b2029
      where
Packit 4b2029
        diff = i - T.length t
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
--
Packit 4b2029
-- Since 1.0.8
Packit 4b2029
foldLines :: Monad m
Packit 4b2029
          => (a -> ConduitM T.Text o m a)
Packit 4b2029
          -> a
Packit 4b2029
          -> ConduitM T.Text o m a
Packit 4b2029
foldLines f =
Packit 4b2029
    start
Packit 4b2029
  where
Packit 4b2029
    start a = CL.peek >>= maybe (return a) (const $ loop $ f a)
Packit 4b2029
Packit 4b2029
    loop consumer = do
Packit 4b2029
        a <- takeWhile (/= '\n') =$= do
Packit 4b2029
            a <- CL.map (T.filter (/= '\r')) =$= consumer
Packit 4b2029
            CL.sinkNull
Packit 4b2029
            return a
Packit 4b2029
        drop 1
Packit 4b2029
        start a
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
--
Packit 4b2029
-- Since 1.0.8
Packit 4b2029
withLine :: Monad m
Packit 4b2029
         => Sink T.Text m a
Packit 4b2029
         -> Consumer T.Text m (Maybe a)
Packit 4b2029
withLine consumer = toConsumer $ do
Packit 4b2029
    mx <- CL.peek
Packit 4b2029
    case mx of
Packit 4b2029
        Nothing -> return Nothing
Packit 4b2029
        Just _ -> do
Packit 4b2029
            x <- takeWhile (/= '\n') =$ do
Packit 4b2029
                x <- CL.map (T.filter (/= '\r')) =$ consumer
Packit 4b2029
                CL.sinkNull
Packit 4b2029
                return x
Packit 4b2029
            drop 1
Packit 4b2029
            return $ Just x
Packit 4b2029
Packit 4b2029
-- | Decode a stream of UTF8-encoded bytes into a stream of text, throwing an
Packit 4b2029
-- exception on invalid input.
Packit 4b2029
--
Packit 4b2029
-- Since 1.0.15
Packit 4b2029
decodeUtf8 :: MonadThrow m => Conduit B.ByteString m T.Text
Packit 4b2029
decodeUtf8 = decode utf8
Packit 4b2029
    {- no meaningful performance advantage
Packit 4b2029
    CI.ConduitM (loop 0 decodeUtf8)
Packit 4b2029
  where
Packit 4b2029
    loop consumed dec =
Packit 4b2029
        CI.NeedInput go finish
Packit 4b2029
      where
Packit 4b2029
        finish () =
Packit 4b2029
            case dec B.empty of
Packit 4b2029
                DecodeResultSuccess _ _ -> return ()
Packit 4b2029
                DecodeResultFailure t rest -> onFailure B.empty t rest
Packit 4b2029
        {-# INLINE finish #-}
Packit 4b2029
Packit 4b2029
        go bs | B.null bs = CI.NeedInput go finish
Packit 4b2029
        go bs =
Packit 4b2029
            case dec bs of
Packit 4b2029
                DecodeResultSuccess t dec' -> do
Packit 4b2029
                    let consumed' = consumed + B.length bs
Packit 4b2029
                        next' = loop consumed' dec'
Packit 4b2029
                        next
Packit 4b2029
                            | T.null t = next'
Packit 4b2029
                            | otherwise = CI.HaveOutput next' (return ()) t
Packit 4b2029
                     in consumed' `seq` next
Packit 4b2029
                DecodeResultFailure t rest -> onFailure bs t rest
Packit 4b2029
Packit 4b2029
        onFailure bs t rest = do
Packit 4b2029
            unless (T.null t) (CI.yield t)
Packit 4b2029
            unless (B.null rest) (CI.leftover rest)
Packit 4b2029
            let consumed' = consumed + B.length bs - B.length rest
Packit 4b2029
            monadThrow $ NewDecodeException (T.pack "UTF-8") consumed' (B.take 4 rest)
Packit 4b2029
        {-# INLINE onFailure #-}
Packit 4b2029
    -}
Packit 4b2029
{-# INLINE decodeUtf8 #-}
Packit 4b2029
Packit 4b2029
-- | Encode a stream of text into a stream of bytes.
Packit 4b2029
--
Packit 4b2029
-- Since 1.0.15
Packit 4b2029
encodeUtf8 :: Monad m => Conduit T.Text m B.ByteString
Packit 4b2029
encodeUtf8 = CL.map TE.encodeUtf8
Packit 4b2029
{-# INLINE encodeUtf8 #-}
Packit 4b2029
Packit 4b2029
-- | Automatically determine which UTF variant is being used. This function
Packit 4b2029
-- checks for BOMs, removing them as necessary. It defaults to assuming UTF-8.
Packit 4b2029
--
Packit 4b2029
-- Since 1.1.9
Packit 4b2029
detectUtf :: MonadThrow m => Conduit B.ByteString m T.Text
Packit 4b2029
detectUtf =
Packit 4b2029
    go id
Packit 4b2029
  where
Packit 4b2029
    go front = await >>= maybe (close front) (push front)
Packit 4b2029
Packit 4b2029
    push front bs'
Packit 4b2029
        | B.length bs < 4 = go $ B.append bs
Packit 4b2029
        | otherwise       = leftDecode bs
Packit 4b2029
      where bs = front bs'
Packit 4b2029
Packit 4b2029
    close front = leftDecode $ front B.empty
Packit 4b2029
Packit 4b2029
    leftDecode bs = leftover bsOut >> decode codec
Packit 4b2029
      where
Packit 4b2029
        bsOut = B.append (B.drop toDrop x) y
Packit 4b2029
        (x, y) = B.splitAt 4 bs
Packit 4b2029
        (toDrop, codec) =
Packit 4b2029
            case B.unpack x of
Packit 4b2029
                [0x00, 0x00, 0xFE, 0xFF] -> (4, utf32_be)
Packit 4b2029
                [0xFF, 0xFE, 0x00, 0x00] -> (4, utf32_le)
Packit 4b2029
                0xFE : 0xFF: _           -> (2, utf16_be)
Packit 4b2029
                0xFF : 0xFE: _           -> (2, utf16_le)
Packit 4b2029
                0xEF : 0xBB: 0xBF : _    -> (3, utf8)
Packit 4b2029
                _                        -> (0, utf8) -- Assuming UTF-8
Packit 4b2029
{-# INLINE detectUtf #-}