|
Packit |
4b2029 |
{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Copyright: 2011 Michael Snoyman, 2010-2011 John Millikin
|
|
Packit |
4b2029 |
-- License: MIT
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Handle streams of text.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Parts of this code were taken from enumerator and adapted for conduits.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- For many purposes, it's recommended to use the conduit-combinators library,
|
|
Packit |
4b2029 |
-- which provides a more complete set of functions.
|
|
Packit |
4b2029 |
module Data.Conduit.Text
|
|
Packit |
4b2029 |
(
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- * Text codecs
|
|
Packit |
4b2029 |
Codec
|
|
Packit |
4b2029 |
, encode
|
|
Packit |
4b2029 |
, decode
|
|
Packit |
4b2029 |
, utf8
|
|
Packit |
4b2029 |
, utf16_le
|
|
Packit |
4b2029 |
, utf16_be
|
|
Packit |
4b2029 |
, utf32_le
|
|
Packit |
4b2029 |
, utf32_be
|
|
Packit |
4b2029 |
, ascii
|
|
Packit |
4b2029 |
, iso8859_1
|
|
Packit |
4b2029 |
, lines
|
|
Packit |
4b2029 |
, linesBounded
|
|
Packit |
4b2029 |
, TextException (..)
|
|
Packit |
4b2029 |
, takeWhile
|
|
Packit |
4b2029 |
, dropWhile
|
|
Packit |
4b2029 |
, take
|
|
Packit |
4b2029 |
, drop
|
|
Packit |
4b2029 |
, foldLines
|
|
Packit |
4b2029 |
, withLine
|
|
Packit |
4b2029 |
, Data.Conduit.Text.decodeUtf8
|
|
Packit |
4b2029 |
, decodeUtf8Lenient
|
|
Packit |
4b2029 |
, encodeUtf8
|
|
Packit |
4b2029 |
, detectUtf
|
|
Packit |
4b2029 |
) where
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
import Prelude hiding (head, drop, takeWhile, lines, zip, zip3, zipWith, zipWith3, take, dropWhile)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
import qualified Control.Exception as Exc
|
|
Packit |
4b2029 |
import qualified Data.ByteString as B
|
|
Packit |
4b2029 |
import qualified Data.ByteString.Char8 as B8
|
|
Packit |
4b2029 |
import Data.Char (ord)
|
|
Packit |
4b2029 |
import qualified Data.Text as T
|
|
Packit |
4b2029 |
import qualified Data.Text.Encoding as TE
|
|
Packit |
4b2029 |
import Data.Word (Word8)
|
|
Packit |
4b2029 |
import Data.Typeable (Typeable)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
import Data.Conduit
|
|
Packit |
4b2029 |
import qualified Data.Conduit.List as CL
|
|
Packit |
4b2029 |
import Control.Monad.Trans.Class (lift)
|
|
Packit |
4b2029 |
import Control.Monad.Trans.Resource (MonadThrow, monadThrow)
|
|
Packit |
4b2029 |
import Control.Monad (unless)
|
|
Packit |
4b2029 |
import Data.Streaming.Text
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | A specific character encoding.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
data Codec = Codec
|
|
Packit |
4b2029 |
{ _codecName :: T.Text
|
|
Packit |
4b2029 |
, codecEncode
|
|
Packit |
4b2029 |
:: T.Text
|
|
Packit |
4b2029 |
-> (B.ByteString, Maybe (TextException, T.Text))
|
|
Packit |
4b2029 |
, codecDecode
|
|
Packit |
4b2029 |
:: B.ByteString
|
|
Packit |
4b2029 |
-> (T.Text, Either
|
|
Packit |
4b2029 |
(TextException, B.ByteString)
|
|
Packit |
4b2029 |
B.ByteString)
|
|
Packit |
4b2029 |
}
|
|
Packit |
4b2029 |
| NewCodec T.Text (T.Text -> B.ByteString) (B.ByteString -> DecodeResult)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
instance Show Codec where
|
|
Packit |
4b2029 |
showsPrec d c =
|
|
Packit |
4b2029 |
let (cnst, name) = case c of
|
|
Packit |
4b2029 |
Codec t _ _ -> ("Codec ", t)
|
|
Packit |
4b2029 |
NewCodec t _ _ -> ("NewCodec ", t)
|
|
Packit |
4b2029 |
in showParen (d > 10) $ showString cnst . shows name
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Emit each line separately
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 0.4.1
|
|
Packit |
4b2029 |
lines :: Monad m => Conduit T.Text m T.Text
|
|
Packit |
4b2029 |
lines =
|
|
Packit |
4b2029 |
awaitText T.empty
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
awaitText buf = await >>= maybe (finish buf) (process buf)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
finish buf = unless (T.null buf) (yield buf)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
process buf text = yieldLines $ buf `T.append` text
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
yieldLines buf =
|
|
Packit |
4b2029 |
let (line, rest) = T.break (== '\n') buf
|
|
Packit |
4b2029 |
in case T.uncons rest of
|
|
Packit |
4b2029 |
Just (_, rest') -> yield line >> yieldLines rest'
|
|
Packit |
4b2029 |
_ -> awaitText line
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Variant of the lines function with an integer parameter.
|
|
Packit |
4b2029 |
-- The text length of any emitted line
|
|
Packit |
4b2029 |
-- never exceeds the value of the parameter. Whenever
|
|
Packit |
4b2029 |
-- this is about to happen a LengthExceeded exception
|
|
Packit |
4b2029 |
-- is thrown. This function should be used instead
|
|
Packit |
4b2029 |
-- of the lines function whenever we are dealing with
|
|
Packit |
4b2029 |
-- user input (e.g. a file upload) because we can't be sure that
|
|
Packit |
4b2029 |
-- user input won't have extraordinarily large lines which would
|
|
Packit |
4b2029 |
-- require large amounts of memory if consumed.
|
|
Packit |
4b2029 |
linesBounded :: MonadThrow m => Int -> Conduit T.Text m T.Text
|
|
Packit |
4b2029 |
linesBounded maxLineLen =
|
|
Packit |
4b2029 |
awaitText 0 T.empty
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
awaitText len buf = await >>= maybe (finish buf) (process len buf)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
finish buf = unless (T.null buf) (yield buf)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
process len buf text =
|
|
Packit |
4b2029 |
let (line, rest) = T.break (== '\n') text
|
|
Packit |
4b2029 |
len' = len + T.length line
|
|
Packit |
4b2029 |
in if len' > maxLineLen
|
|
Packit |
4b2029 |
then lift $ monadThrow (LengthExceeded maxLineLen)
|
|
Packit |
4b2029 |
else case T.uncons rest of
|
|
Packit |
4b2029 |
Just (_, rest') ->
|
|
Packit |
4b2029 |
yield (buf `T.append` line) >> process 0 T.empty rest'
|
|
Packit |
4b2029 |
_ ->
|
|
Packit |
4b2029 |
awaitText len' $ buf `T.append` text
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Convert text into bytes, using the provided codec. If the codec is
|
|
Packit |
4b2029 |
-- not capable of representing an input character, an exception will be thrown.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
encode :: MonadThrow m => Codec -> Conduit T.Text m B.ByteString
|
|
Packit |
4b2029 |
encode (NewCodec _ enc _) = CL.map enc
|
|
Packit |
4b2029 |
encode codec = CL.mapM $ \t -> do
|
|
Packit |
4b2029 |
let (bs, mexc) = codecEncode codec t
|
|
Packit |
4b2029 |
maybe (return bs) (monadThrow . fst) mexc
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
decodeNew
|
|
Packit |
4b2029 |
:: Monad m
|
|
Packit |
4b2029 |
=> (Int -> B.ByteString -> T.Text -> B.ByteString -> Conduit B.ByteString m T.Text)
|
|
Packit |
4b2029 |
-> t
|
|
Packit |
4b2029 |
-> Int
|
|
Packit |
4b2029 |
-> (B.ByteString -> DecodeResult)
|
|
Packit |
4b2029 |
-> Conduit B.ByteString m T.Text
|
|
Packit |
4b2029 |
decodeNew onFailure _name =
|
|
Packit |
4b2029 |
loop
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
loop consumed dec =
|
|
Packit |
4b2029 |
await >>= maybe finish go
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
finish =
|
|
Packit |
4b2029 |
case dec B.empty of
|
|
Packit |
4b2029 |
DecodeResultSuccess _ _ -> return ()
|
|
Packit |
4b2029 |
DecodeResultFailure t rest -> onFailure consumed B.empty t rest
|
|
Packit |
4b2029 |
{-# INLINE finish #-}
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
go bs | B.null bs = loop consumed dec
|
|
Packit |
4b2029 |
go bs =
|
|
Packit |
4b2029 |
case dec bs of
|
|
Packit |
4b2029 |
DecodeResultSuccess t dec' -> do
|
|
Packit |
4b2029 |
let consumed' = consumed + B.length bs
|
|
Packit |
4b2029 |
next = do
|
|
Packit |
4b2029 |
unless (T.null t) (yield t)
|
|
Packit |
4b2029 |
loop consumed' dec'
|
|
Packit |
4b2029 |
in consumed' `seq` next
|
|
Packit |
4b2029 |
DecodeResultFailure t rest -> onFailure consumed bs t rest
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Decode a stream of UTF8 data, and replace invalid bytes with the Unicode
|
|
Packit |
4b2029 |
-- replacement character.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.1.1
|
|
Packit |
4b2029 |
decodeUtf8Lenient :: Monad m => Conduit B.ByteString m T.Text
|
|
Packit |
4b2029 |
decodeUtf8Lenient =
|
|
Packit |
4b2029 |
decodeNew onFailure "UTF8-lenient" 0 Data.Streaming.Text.decodeUtf8
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
onFailure _consumed _bs t rest = do
|
|
Packit |
4b2029 |
unless (T.null t) (yield t)
|
|
Packit |
4b2029 |
case B.uncons rest of
|
|
Packit |
4b2029 |
Nothing -> return ()
|
|
Packit |
4b2029 |
Just (_, rest') -> do
|
|
Packit |
4b2029 |
unless (B.null rest') (leftover rest')
|
|
Packit |
4b2029 |
yield $ T.singleton '\xFFFD'
|
|
Packit |
4b2029 |
decodeUtf8Lenient
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Convert bytes into text, using the provided codec. If the codec is
|
|
Packit |
4b2029 |
-- not capable of decoding an input byte sequence, an exception will be thrown.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
decode :: MonadThrow m => Codec -> Conduit B.ByteString m T.Text
|
|
Packit |
4b2029 |
decode (NewCodec name _ start) =
|
|
Packit |
4b2029 |
decodeNew onFailure name 0 start
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
onFailure consumed bs t rest = do
|
|
Packit |
4b2029 |
unless (T.null t) (yield t)
|
|
Packit |
4b2029 |
leftover rest -- rest will never be null, no need to check
|
|
Packit |
4b2029 |
let consumed' = consumed + B.length bs - B.length rest
|
|
Packit |
4b2029 |
monadThrow $ NewDecodeException name consumed' (B.take 4 rest)
|
|
Packit |
4b2029 |
{-# INLINE onFailure #-}
|
|
Packit |
4b2029 |
decode codec =
|
|
Packit |
4b2029 |
loop id
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
loop front = await >>= maybe (finish front) (go front)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
finish front =
|
|
Packit |
4b2029 |
case B.uncons $ front B.empty of
|
|
Packit |
4b2029 |
Nothing -> return ()
|
|
Packit |
4b2029 |
Just (w, _) -> lift $ monadThrow $ DecodeException codec w
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
go front bs' =
|
|
Packit |
4b2029 |
case extra of
|
|
Packit |
4b2029 |
Left (exc, _) -> lift $ monadThrow exc
|
|
Packit |
4b2029 |
Right bs'' -> yield text >> loop (B.append bs'')
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
(text, extra) = codecDecode codec bs
|
|
Packit |
4b2029 |
bs = front bs'
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
data TextException = DecodeException Codec Word8
|
|
Packit |
4b2029 |
| EncodeException Codec Char
|
|
Packit |
4b2029 |
| LengthExceeded Int
|
|
Packit |
4b2029 |
| TextException Exc.SomeException
|
|
Packit |
4b2029 |
| NewDecodeException !T.Text !Int !B.ByteString
|
|
Packit |
4b2029 |
deriving Typeable
|
|
Packit |
4b2029 |
instance Show TextException where
|
|
Packit |
4b2029 |
show (DecodeException codec w) = concat
|
|
Packit |
4b2029 |
[ "Error decoding legacy Data.Conduit.Text codec "
|
|
Packit |
4b2029 |
, show codec
|
|
Packit |
4b2029 |
, " when parsing byte: "
|
|
Packit |
4b2029 |
, show w
|
|
Packit |
4b2029 |
]
|
|
Packit |
4b2029 |
show (EncodeException codec c) = concat
|
|
Packit |
4b2029 |
[ "Error encoding legacy Data.Conduit.Text codec "
|
|
Packit |
4b2029 |
, show codec
|
|
Packit |
4b2029 |
, " when parsing char: "
|
|
Packit |
4b2029 |
, show c
|
|
Packit |
4b2029 |
]
|
|
Packit |
4b2029 |
show (LengthExceeded i) = "Data.Conduit.Text.linesBounded: line too long: " ++ show i
|
|
Packit |
4b2029 |
show (TextException se) = "Data.Conduit.Text.TextException: " ++ show se
|
|
Packit |
4b2029 |
show (NewDecodeException codec consumed next) = concat
|
|
Packit |
4b2029 |
[ "Data.Conduit.Text.decode: Error decoding stream of "
|
|
Packit |
4b2029 |
, T.unpack codec
|
|
Packit |
4b2029 |
, " bytes. Error encountered in stream at offset "
|
|
Packit |
4b2029 |
, show consumed
|
|
Packit |
4b2029 |
, ". Encountered at byte sequence "
|
|
Packit |
4b2029 |
, show next
|
|
Packit |
4b2029 |
]
|
|
Packit |
4b2029 |
instance Exc.Exception TextException
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
utf8 :: Codec
|
|
Packit |
4b2029 |
utf8 = NewCodec (T.pack "UTF-8") TE.encodeUtf8 Data.Streaming.Text.decodeUtf8
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
utf16_le :: Codec
|
|
Packit |
4b2029 |
utf16_le = NewCodec (T.pack "UTF-16-LE") TE.encodeUtf16LE decodeUtf16LE
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
utf16_be :: Codec
|
|
Packit |
4b2029 |
utf16_be = NewCodec (T.pack "UTF-16-BE") TE.encodeUtf16BE decodeUtf16BE
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
utf32_le :: Codec
|
|
Packit |
4b2029 |
utf32_le = NewCodec (T.pack "UTF-32-LE") TE.encodeUtf32LE decodeUtf32LE
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
utf32_be :: Codec
|
|
Packit |
4b2029 |
utf32_be = NewCodec (T.pack "UTF-32-BE") TE.encodeUtf32BE decodeUtf32BE
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
ascii :: Codec
|
|
Packit |
4b2029 |
ascii = Codec name enc dec where
|
|
Packit |
4b2029 |
name = T.pack "ASCII"
|
|
Packit |
4b2029 |
enc text = (bytes, extra) where
|
|
Packit |
4b2029 |
(safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
|
|
Packit |
4b2029 |
bytes = B8.pack (T.unpack safe)
|
|
Packit |
4b2029 |
extra = if T.null unsafe
|
|
Packit |
4b2029 |
then Nothing
|
|
Packit |
4b2029 |
else Just (EncodeException ascii (T.head unsafe), unsafe)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
dec bytes = (text, extra) where
|
|
Packit |
4b2029 |
(safe, unsafe) = B.span (<= 0x7F) bytes
|
|
Packit |
4b2029 |
text = T.pack (B8.unpack safe)
|
|
Packit |
4b2029 |
extra = if B.null unsafe
|
|
Packit |
4b2029 |
then Right B.empty
|
|
Packit |
4b2029 |
else Left (DecodeException ascii (B.head unsafe), unsafe)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Since 0.3.0
|
|
Packit |
4b2029 |
iso8859_1 :: Codec
|
|
Packit |
4b2029 |
iso8859_1 = Codec name enc dec where
|
|
Packit |
4b2029 |
name = T.pack "ISO-8859-1"
|
|
Packit |
4b2029 |
enc text = (bytes, extra) where
|
|
Packit |
4b2029 |
(safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
|
|
Packit |
4b2029 |
bytes = B8.pack (T.unpack safe)
|
|
Packit |
4b2029 |
extra = if T.null unsafe
|
|
Packit |
4b2029 |
then Nothing
|
|
Packit |
4b2029 |
else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.0.8
|
|
Packit |
4b2029 |
takeWhile :: Monad m
|
|
Packit |
4b2029 |
=> (Char -> Bool)
|
|
Packit |
4b2029 |
-> Conduit T.Text m T.Text
|
|
Packit |
4b2029 |
takeWhile p =
|
|
Packit |
4b2029 |
loop
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
loop = await >>= maybe (return ()) go
|
|
Packit |
4b2029 |
go t =
|
|
Packit |
4b2029 |
case T.span p t of
|
|
Packit |
4b2029 |
(x, y)
|
|
Packit |
4b2029 |
| T.null y -> yield x >> loop
|
|
Packit |
4b2029 |
| otherwise -> yield x >> leftover y
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.0.8
|
|
Packit |
4b2029 |
dropWhile :: Monad m
|
|
Packit |
4b2029 |
=> (Char -> Bool)
|
|
Packit |
4b2029 |
-> Consumer T.Text m ()
|
|
Packit |
4b2029 |
dropWhile p =
|
|
Packit |
4b2029 |
loop
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
loop = await >>= maybe (return ()) go
|
|
Packit |
4b2029 |
go t
|
|
Packit |
4b2029 |
| T.null x = loop
|
|
Packit |
4b2029 |
| otherwise = leftover x
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
x = T.dropWhile p t
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.0.8
|
|
Packit |
4b2029 |
take :: Monad m => Int -> Conduit T.Text m T.Text
|
|
Packit |
4b2029 |
take =
|
|
Packit |
4b2029 |
loop
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
loop i = await >>= maybe (return ()) (go i)
|
|
Packit |
4b2029 |
go i t
|
|
Packit |
4b2029 |
| diff == 0 = yield t
|
|
Packit |
4b2029 |
| diff < 0 =
|
|
Packit |
4b2029 |
let (x, y) = T.splitAt i t
|
|
Packit |
4b2029 |
in yield x >> leftover y
|
|
Packit |
4b2029 |
| otherwise = yield t >> loop diff
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
diff = i - T.length t
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.0.8
|
|
Packit |
4b2029 |
drop :: Monad m => Int -> Consumer T.Text m ()
|
|
Packit |
4b2029 |
drop =
|
|
Packit |
4b2029 |
loop
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
loop i = await >>= maybe (return ()) (go i)
|
|
Packit |
4b2029 |
go i t
|
|
Packit |
4b2029 |
| diff == 0 = return ()
|
|
Packit |
4b2029 |
| diff < 0 = leftover $ T.drop i t
|
|
Packit |
4b2029 |
| otherwise = loop diff
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
diff = i - T.length t
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.0.8
|
|
Packit |
4b2029 |
foldLines :: Monad m
|
|
Packit |
4b2029 |
=> (a -> ConduitM T.Text o m a)
|
|
Packit |
4b2029 |
-> a
|
|
Packit |
4b2029 |
-> ConduitM T.Text o m a
|
|
Packit |
4b2029 |
foldLines f =
|
|
Packit |
4b2029 |
start
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
start a = CL.peek >>= maybe (return a) (const $ loop $ f a)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
loop consumer = do
|
|
Packit |
4b2029 |
a <- takeWhile (/= '\n') =$= do
|
|
Packit |
4b2029 |
a <- CL.map (T.filter (/= '\r')) =$= consumer
|
|
Packit |
4b2029 |
CL.sinkNull
|
|
Packit |
4b2029 |
return a
|
|
Packit |
4b2029 |
drop 1
|
|
Packit |
4b2029 |
start a
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.0.8
|
|
Packit |
4b2029 |
withLine :: Monad m
|
|
Packit |
4b2029 |
=> Sink T.Text m a
|
|
Packit |
4b2029 |
-> Consumer T.Text m (Maybe a)
|
|
Packit |
4b2029 |
withLine consumer = toConsumer $ do
|
|
Packit |
4b2029 |
mx <- CL.peek
|
|
Packit |
4b2029 |
case mx of
|
|
Packit |
4b2029 |
Nothing -> return Nothing
|
|
Packit |
4b2029 |
Just _ -> do
|
|
Packit |
4b2029 |
x <- takeWhile (/= '\n') =$ do
|
|
Packit |
4b2029 |
x <- CL.map (T.filter (/= '\r')) =$ consumer
|
|
Packit |
4b2029 |
CL.sinkNull
|
|
Packit |
4b2029 |
return x
|
|
Packit |
4b2029 |
drop 1
|
|
Packit |
4b2029 |
return $ Just x
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Decode a stream of UTF8-encoded bytes into a stream of text, throwing an
|
|
Packit |
4b2029 |
-- exception on invalid input.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.0.15
|
|
Packit |
4b2029 |
decodeUtf8 :: MonadThrow m => Conduit B.ByteString m T.Text
|
|
Packit |
4b2029 |
decodeUtf8 = decode utf8
|
|
Packit |
4b2029 |
{- no meaningful performance advantage
|
|
Packit |
4b2029 |
CI.ConduitM (loop 0 decodeUtf8)
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
loop consumed dec =
|
|
Packit |
4b2029 |
CI.NeedInput go finish
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
finish () =
|
|
Packit |
4b2029 |
case dec B.empty of
|
|
Packit |
4b2029 |
DecodeResultSuccess _ _ -> return ()
|
|
Packit |
4b2029 |
DecodeResultFailure t rest -> onFailure B.empty t rest
|
|
Packit |
4b2029 |
{-# INLINE finish #-}
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
go bs | B.null bs = CI.NeedInput go finish
|
|
Packit |
4b2029 |
go bs =
|
|
Packit |
4b2029 |
case dec bs of
|
|
Packit |
4b2029 |
DecodeResultSuccess t dec' -> do
|
|
Packit |
4b2029 |
let consumed' = consumed + B.length bs
|
|
Packit |
4b2029 |
next' = loop consumed' dec'
|
|
Packit |
4b2029 |
next
|
|
Packit |
4b2029 |
| T.null t = next'
|
|
Packit |
4b2029 |
| otherwise = CI.HaveOutput next' (return ()) t
|
|
Packit |
4b2029 |
in consumed' `seq` next
|
|
Packit |
4b2029 |
DecodeResultFailure t rest -> onFailure bs t rest
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
onFailure bs t rest = do
|
|
Packit |
4b2029 |
unless (T.null t) (CI.yield t)
|
|
Packit |
4b2029 |
unless (B.null rest) (CI.leftover rest)
|
|
Packit |
4b2029 |
let consumed' = consumed + B.length bs - B.length rest
|
|
Packit |
4b2029 |
monadThrow $ NewDecodeException (T.pack "UTF-8") consumed' (B.take 4 rest)
|
|
Packit |
4b2029 |
{-# INLINE onFailure #-}
|
|
Packit |
4b2029 |
-}
|
|
Packit |
4b2029 |
{-# INLINE decodeUtf8 #-}
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Encode a stream of text into a stream of bytes.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.0.15
|
|
Packit |
4b2029 |
encodeUtf8 :: Monad m => Conduit T.Text m B.ByteString
|
|
Packit |
4b2029 |
encodeUtf8 = CL.map TE.encodeUtf8
|
|
Packit |
4b2029 |
{-# INLINE encodeUtf8 #-}
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Automatically determine which UTF variant is being used. This function
|
|
Packit |
4b2029 |
-- checks for BOMs, removing them as necessary. It defaults to assuming UTF-8.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Since 1.1.9
|
|
Packit |
4b2029 |
detectUtf :: MonadThrow m => Conduit B.ByteString m T.Text
|
|
Packit |
4b2029 |
detectUtf =
|
|
Packit |
4b2029 |
go id
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
go front = await >>= maybe (close front) (push front)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
push front bs'
|
|
Packit |
4b2029 |
| B.length bs < 4 = go $ B.append bs
|
|
Packit |
4b2029 |
| otherwise = leftDecode bs
|
|
Packit |
4b2029 |
where bs = front bs'
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
close front = leftDecode $ front B.empty
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
leftDecode bs = leftover bsOut >> decode codec
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
bsOut = B.append (B.drop toDrop x) y
|
|
Packit |
4b2029 |
(x, y) = B.splitAt 4 bs
|
|
Packit |
4b2029 |
(toDrop, codec) =
|
|
Packit |
4b2029 |
case B.unpack x of
|
|
Packit |
4b2029 |
[0x00, 0x00, 0xFE, 0xFF] -> (4, utf32_be)
|
|
Packit |
4b2029 |
[0xFF, 0xFE, 0x00, 0x00] -> (4, utf32_le)
|
|
Packit |
4b2029 |
0xFE : 0xFF: _ -> (2, utf16_be)
|
|
Packit |
4b2029 |
0xFF : 0xFE: _ -> (2, utf16_le)
|
|
Packit |
4b2029 |
0xEF : 0xBB: 0xBF : _ -> (3, utf8)
|
|
Packit |
4b2029 |
_ -> (0, utf8) -- Assuming UTF-8
|
|
Packit |
4b2029 |
{-# INLINE detectUtf #-}
|