|
Packit |
4b2029 |
{-# LANGUAGE FlexibleContexts #-}
|
|
Packit |
4b2029 |
{-# LANGUAGE RankNTypes #-}
|
|
Packit |
4b2029 |
-- | Streaming compression and decompression using conduits.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Parts of this code were taken from zlib-enum and adapted for conduits.
|
|
Packit |
4b2029 |
module Data.Conduit.Zlib (
|
|
Packit |
4b2029 |
-- * Conduits
|
|
Packit |
4b2029 |
compress, decompress, gzip, ungzip,
|
|
Packit |
4b2029 |
-- * Flushing
|
|
Packit |
4b2029 |
compressFlush, decompressFlush,
|
|
Packit |
4b2029 |
-- * Decompression combinators
|
|
Packit |
4b2029 |
multiple,
|
|
Packit |
4b2029 |
-- * Re-exported from zlib-bindings
|
|
Packit |
4b2029 |
WindowBits (..), defaultWindowBits
|
|
Packit |
4b2029 |
) where
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
import Data.Streaming.Zlib
|
|
Packit |
4b2029 |
import Data.Conduit
|
|
Packit |
4b2029 |
import Data.ByteString (ByteString)
|
|
Packit |
4b2029 |
import qualified Data.ByteString as S
|
|
Packit |
4b2029 |
import Control.Monad (unless, liftM)
|
|
Packit |
4b2029 |
import Control.Monad.Trans.Class (lift, MonadTrans)
|
|
Packit |
4b2029 |
import Control.Monad.Primitive (PrimMonad, unsafePrimToPrim)
|
|
Packit |
4b2029 |
import Control.Monad.Base (MonadBase, liftBase)
|
|
Packit |
4b2029 |
import Control.Monad.Trans.Resource (MonadThrow, monadThrow)
|
|
Packit |
4b2029 |
import Data.Function (fix)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Gzip compression with default parameters.
|
|
Packit |
4b2029 |
gzip :: (MonadThrow m, MonadBase base m, PrimMonad base) => Conduit ByteString m ByteString
|
|
Packit |
4b2029 |
gzip = compress 1 (WindowBits 31)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Gzip decompression with default parameters.
|
|
Packit |
4b2029 |
ungzip :: (MonadBase base m, PrimMonad base, MonadThrow m) => Conduit ByteString m ByteString
|
|
Packit |
4b2029 |
ungzip = decompress (WindowBits 31)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
unsafeLiftIO :: (MonadBase base m, PrimMonad base, MonadThrow m) => IO a -> m a
|
|
Packit |
4b2029 |
unsafeLiftIO = liftBase . unsafePrimToPrim
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Decompress (inflate) a stream of 'ByteString's. For example:
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- > sourceFile "test.z" $= decompress defaultWindowBits $$ sinkFile "test"
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
decompress
|
|
Packit |
4b2029 |
:: (MonadBase base m, PrimMonad base, MonadThrow m)
|
|
Packit |
4b2029 |
=> WindowBits -- ^ Zlib parameter (see the zlib-bindings package as well as the zlib C library)
|
|
Packit |
4b2029 |
-> Conduit ByteString m ByteString
|
|
Packit |
4b2029 |
decompress =
|
|
Packit |
4b2029 |
helperDecompress (liftM (fmap Chunk) await) yield' leftover
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
yield' Flush = return ()
|
|
Packit |
4b2029 |
yield' (Chunk bs) = yield bs
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Same as 'decompress', but allows you to explicitly flush the stream.
|
|
Packit |
4b2029 |
decompressFlush
|
|
Packit |
4b2029 |
:: (MonadBase base m, PrimMonad base, MonadThrow m)
|
|
Packit |
4b2029 |
=> WindowBits -- ^ Zlib parameter (see the zlib-bindings package as well as the zlib C library)
|
|
Packit |
4b2029 |
-> Conduit (Flush ByteString) m (Flush ByteString)
|
|
Packit |
4b2029 |
decompressFlush = helperDecompress await yield (leftover . Chunk)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
helperDecompress :: (Monad (t m), MonadBase base m, PrimMonad base, MonadThrow m, MonadTrans t)
|
|
Packit |
4b2029 |
=> t m (Maybe (Flush ByteString))
|
|
Packit |
4b2029 |
-> (Flush ByteString -> t m ())
|
|
Packit |
4b2029 |
-> (ByteString -> t m ())
|
|
Packit |
4b2029 |
-> WindowBits
|
|
Packit |
4b2029 |
-> t m ()
|
|
Packit |
4b2029 |
helperDecompress await' yield' leftover' config = do
|
|
Packit |
4b2029 |
-- Initialize the stateful inflater, which will be used below
|
|
Packit |
4b2029 |
-- This inflater is never exposed outside of this function
|
|
Packit |
4b2029 |
inf <- lift $ unsafeLiftIO $ initInflate config
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- Some helper functions used by the main feeder loop below
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
let -- Flush any remaining inflated bytes downstream
|
|
Packit |
4b2029 |
flush = do
|
|
Packit |
4b2029 |
chunk <- lift $ unsafeLiftIO $ flushInflate inf
|
|
Packit |
4b2029 |
unless (S.null chunk) $ yield' $ Chunk chunk
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- Get any input which is unused by the inflater
|
|
Packit |
4b2029 |
getUnused = lift $ unsafeLiftIO $ getUnusedInflate inf
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- If there is any unused data, return it as leftovers to the stream
|
|
Packit |
4b2029 |
unused = do
|
|
Packit |
4b2029 |
rem' <- getUnused
|
|
Packit |
4b2029 |
unless (S.null rem') $ leftover' rem'
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- Main loop: feed data from upstream into the inflater
|
|
Packit |
4b2029 |
fix $ \feeder -> do
|
|
Packit |
4b2029 |
mnext <- await'
|
|
Packit |
4b2029 |
case mnext of
|
|
Packit |
4b2029 |
-- No more data is available from upstream
|
|
Packit |
4b2029 |
Nothing -> do
|
|
Packit |
4b2029 |
-- Flush any remaining uncompressed data
|
|
Packit |
4b2029 |
flush
|
|
Packit |
4b2029 |
-- Return the rest of the unconsumed data as leftovers
|
|
Packit |
4b2029 |
unused
|
|
Packit |
4b2029 |
-- Another chunk of compressed data arrived
|
|
Packit |
4b2029 |
Just (Chunk x) -> do
|
|
Packit |
4b2029 |
-- Feed the compressed data into the inflater, returning a
|
|
Packit |
4b2029 |
-- "popper" which will return chunks of decompressed data
|
|
Packit |
4b2029 |
popper <- lift $ unsafeLiftIO $ feedInflate inf x
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- Loop over the popper grabbing decompressed chunks and
|
|
Packit |
4b2029 |
-- yielding them downstream
|
|
Packit |
4b2029 |
fix $ \pop -> do
|
|
Packit |
4b2029 |
mbs <- lift $ unsafeLiftIO popper
|
|
Packit |
4b2029 |
case mbs of
|
|
Packit |
4b2029 |
-- No more data from this popper
|
|
Packit |
4b2029 |
PRDone -> do
|
|
Packit |
4b2029 |
rem' <- getUnused
|
|
Packit |
4b2029 |
if S.null rem'
|
|
Packit |
4b2029 |
-- No data was unused by the inflater, so let's
|
|
Packit |
4b2029 |
-- fill it up again and get more data out of it
|
|
Packit |
4b2029 |
then feeder
|
|
Packit |
4b2029 |
-- In this case, there is some unconsumed data,
|
|
Packit |
4b2029 |
-- meaning the compressed stream is complete.
|
|
Packit |
4b2029 |
-- At this point, we need to stop feeding,
|
|
Packit |
4b2029 |
-- return the unconsumed data as leftovers, and
|
|
Packit |
4b2029 |
-- flush any remaining content (which should be
|
|
Packit |
4b2029 |
-- nothing)
|
|
Packit |
4b2029 |
else do
|
|
Packit |
4b2029 |
flush
|
|
Packit |
4b2029 |
leftover' rem'
|
|
Packit |
4b2029 |
-- Another chunk available, yield it downstream and
|
|
Packit |
4b2029 |
-- loop again
|
|
Packit |
4b2029 |
PRNext bs -> do
|
|
Packit |
4b2029 |
yield' (Chunk bs)
|
|
Packit |
4b2029 |
pop
|
|
Packit |
4b2029 |
-- An error occurred inside zlib, throw it
|
|
Packit |
4b2029 |
PRError e -> lift $ monadThrow e
|
|
Packit |
4b2029 |
-- We've been asked to flush the stream
|
|
Packit |
4b2029 |
Just Flush -> do
|
|
Packit |
4b2029 |
-- Get any uncompressed data waiting for us
|
|
Packit |
4b2029 |
flush
|
|
Packit |
4b2029 |
-- Put a Flush in the stream
|
|
Packit |
4b2029 |
yield' Flush
|
|
Packit |
4b2029 |
-- Feed in more data
|
|
Packit |
4b2029 |
feeder
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- |
|
|
Packit |
4b2029 |
-- Compress (deflate) a stream of 'ByteString's. The 'WindowBits' also control
|
|
Packit |
4b2029 |
-- the format (zlib vs. gzip).
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
compress
|
|
Packit |
4b2029 |
:: (MonadBase base m, PrimMonad base, MonadThrow m)
|
|
Packit |
4b2029 |
=> Int -- ^ Compression level
|
|
Packit |
4b2029 |
-> WindowBits -- ^ Zlib parameter (see the zlib-bindings package as well as the zlib C library)
|
|
Packit |
4b2029 |
-> Conduit ByteString m ByteString
|
|
Packit |
4b2029 |
compress =
|
|
Packit |
4b2029 |
helperCompress (liftM (fmap Chunk) await) yield'
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
yield' Flush = return ()
|
|
Packit |
4b2029 |
yield' (Chunk bs) = yield bs
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | Same as 'compress', but allows you to explicitly flush the stream.
|
|
Packit |
4b2029 |
compressFlush
|
|
Packit |
4b2029 |
:: (MonadBase base m, PrimMonad base, MonadThrow m)
|
|
Packit |
4b2029 |
=> Int -- ^ Compression level
|
|
Packit |
4b2029 |
-> WindowBits -- ^ Zlib parameter (see the zlib-bindings package as well as the zlib C library)
|
|
Packit |
4b2029 |
-> Conduit (Flush ByteString) m (Flush ByteString)
|
|
Packit |
4b2029 |
compressFlush = helperCompress await yield
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
helperCompress :: (Monad (t m), MonadBase base m, PrimMonad base, MonadThrow m, MonadTrans t)
|
|
Packit |
4b2029 |
=> t m (Maybe (Flush ByteString))
|
|
Packit |
4b2029 |
-> (Flush ByteString -> t m ())
|
|
Packit |
4b2029 |
-> Int
|
|
Packit |
4b2029 |
-> WindowBits
|
|
Packit |
4b2029 |
-> t m ()
|
|
Packit |
4b2029 |
helperCompress await' yield' level config =
|
|
Packit |
4b2029 |
await' >>= maybe (return ()) start
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
start input = do
|
|
Packit |
4b2029 |
def <- lift $ unsafeLiftIO $ initDeflate level config
|
|
Packit |
4b2029 |
push def input
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
continue def = await' >>= maybe (close def) (push def)
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
goPopper popper = do
|
|
Packit |
4b2029 |
mbs <- lift $ unsafeLiftIO popper
|
|
Packit |
4b2029 |
case mbs of
|
|
Packit |
4b2029 |
PRDone -> return ()
|
|
Packit |
4b2029 |
PRNext bs -> yield' (Chunk bs) >> goPopper popper
|
|
Packit |
4b2029 |
PRError e -> lift $ monadThrow e
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
push def (Chunk x) = do
|
|
Packit |
4b2029 |
popper <- lift $ unsafeLiftIO $ feedDeflate def x
|
|
Packit |
4b2029 |
goPopper popper
|
|
Packit |
4b2029 |
continue def
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
push def Flush = do
|
|
Packit |
4b2029 |
mchunk <- lift $ unsafeLiftIO $ flushDeflate def
|
|
Packit |
4b2029 |
case mchunk of
|
|
Packit |
4b2029 |
PRDone -> return ()
|
|
Packit |
4b2029 |
PRNext x -> yield' $ Chunk x
|
|
Packit |
4b2029 |
PRError e -> lift $ monadThrow e
|
|
Packit |
4b2029 |
yield' Flush
|
|
Packit |
4b2029 |
continue def
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
close def = do
|
|
Packit |
4b2029 |
mchunk <- lift $ unsafeLiftIO $ finishDeflate def
|
|
Packit |
4b2029 |
case mchunk of
|
|
Packit |
4b2029 |
PRDone -> return ()
|
|
Packit |
4b2029 |
PRNext chunk -> yield' (Chunk chunk) >> close def
|
|
Packit |
4b2029 |
PRError e -> lift $ monadThrow e
|
|
Packit |
4b2029 |
|
|
Packit |
4b2029 |
-- | The standard 'decompress' and 'ungzip' functions will only decompress a
|
|
Packit |
4b2029 |
-- single compressed entity from the stream. This combinator will exhaust the
|
|
Packit |
4b2029 |
-- stream completely of all individual compressed entities. This is useful for
|
|
Packit |
4b2029 |
-- cases where you have a concatenated archive, e.g. @cat file1.gz file2.gz >
|
|
Packit |
4b2029 |
-- combined.gz@.
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- Usage:
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- > sourceFile "combined.gz" $$ multiple ungzip =$ consume
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- This combinator will not fail on an empty stream. If you want to ensure that
|
|
Packit |
4b2029 |
-- at least one compressed entity in the stream exists, consider a usage such
|
|
Packit |
4b2029 |
-- as:
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- > sourceFile "combined.gz" $$ (ungzip >> multiple ungzip) =$ consume
|
|
Packit |
4b2029 |
--
|
|
Packit |
4b2029 |
-- @since 1.1.10
|
|
Packit |
4b2029 |
multiple :: Monad m
|
|
Packit |
4b2029 |
=> Conduit ByteString m a
|
|
Packit |
4b2029 |
-> Conduit ByteString m a
|
|
Packit |
4b2029 |
multiple inner =
|
|
Packit |
4b2029 |
loop
|
|
Packit |
4b2029 |
where
|
|
Packit |
4b2029 |
loop = do
|
|
Packit |
4b2029 |
mbs <- await
|
|
Packit |
4b2029 |
case mbs of
|
|
Packit |
4b2029 |
Nothing -> return ()
|
|
Packit |
4b2029 |
Just bs
|
|
Packit |
4b2029 |
| S.null bs -> loop
|
|
Packit |
4b2029 |
| otherwise -> do
|
|
Packit |
4b2029 |
leftover bs
|
|
Packit |
4b2029 |
inner
|
|
Packit |
4b2029 |
loop
|