Blame Data/Conduit/Zlib.hs

Packit 4b2029
{-# LANGUAGE FlexibleContexts #-}
Packit 4b2029
{-# LANGUAGE RankNTypes #-}
Packit 4b2029
-- | Streaming compression and decompression using conduits.
Packit 4b2029
--
Packit 4b2029
-- Parts of this code were taken from zlib-enum and adapted for conduits.
Packit 4b2029
module Data.Conduit.Zlib (
Packit 4b2029
    -- * Conduits
Packit 4b2029
    compress, decompress, gzip, ungzip,
Packit 4b2029
    -- * Flushing
Packit 4b2029
    compressFlush, decompressFlush,
Packit 4b2029
    -- * Decompression combinators
Packit 4b2029
    multiple,
Packit 4b2029
    -- * Re-exported from zlib-bindings
Packit 4b2029
    WindowBits (..), defaultWindowBits
Packit 4b2029
) where
Packit 4b2029
Packit 4b2029
import Data.Streaming.Zlib
Packit 4b2029
import Data.Conduit
Packit 4b2029
import Data.ByteString (ByteString)
Packit 4b2029
import qualified Data.ByteString as S
Packit 4b2029
import Control.Monad (unless, liftM)
Packit 4b2029
import Control.Monad.Trans.Class (lift, MonadTrans)
Packit 4b2029
import Control.Monad.Primitive (PrimMonad, unsafePrimToPrim)
Packit 4b2029
import Control.Monad.Base (MonadBase, liftBase)
Packit 4b2029
import Control.Monad.Trans.Resource (MonadThrow, monadThrow)
Packit 4b2029
import Data.Function (fix)
Packit 4b2029
Packit 4b2029
-- | Gzip compression with default parameters.
Packit 4b2029
gzip :: (MonadThrow m, MonadBase base m, PrimMonad base) => Conduit ByteString m ByteString
Packit 4b2029
gzip = compress 1 (WindowBits 31)
Packit 4b2029
Packit 4b2029
-- | Gzip decompression with default parameters.
Packit 4b2029
ungzip :: (MonadBase base m, PrimMonad base, MonadThrow m) => Conduit ByteString m ByteString
Packit 4b2029
ungzip = decompress (WindowBits 31)
Packit 4b2029
Packit 4b2029
unsafeLiftIO :: (MonadBase base m, PrimMonad base, MonadThrow m) => IO a -> m a
Packit 4b2029
unsafeLiftIO = liftBase . unsafePrimToPrim
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Decompress (inflate) a stream of 'ByteString's. For example:
Packit 4b2029
--
Packit 4b2029
-- >    sourceFile "test.z" $= decompress defaultWindowBits $$ sinkFile "test"
Packit 4b2029
Packit 4b2029
decompress
Packit 4b2029
    :: (MonadBase base m, PrimMonad base, MonadThrow m)
Packit 4b2029
    => WindowBits -- ^ Zlib parameter (see the zlib-bindings package as well as the zlib C library)
Packit 4b2029
    -> Conduit ByteString m ByteString
Packit 4b2029
decompress =
Packit 4b2029
    helperDecompress (liftM (fmap Chunk) await) yield' leftover
Packit 4b2029
  where
Packit 4b2029
    yield' Flush = return ()
Packit 4b2029
    yield' (Chunk bs) = yield bs
Packit 4b2029
Packit 4b2029
-- | Same as 'decompress', but allows you to explicitly flush the stream.
Packit 4b2029
decompressFlush
Packit 4b2029
    :: (MonadBase base m, PrimMonad base, MonadThrow m)
Packit 4b2029
    => WindowBits -- ^ Zlib parameter (see the zlib-bindings package as well as the zlib C library)
Packit 4b2029
    -> Conduit (Flush ByteString) m (Flush ByteString)
Packit 4b2029
decompressFlush = helperDecompress await yield (leftover . Chunk)
Packit 4b2029
Packit 4b2029
helperDecompress :: (Monad (t m), MonadBase base m, PrimMonad base, MonadThrow m, MonadTrans t)
Packit 4b2029
                 => t m (Maybe (Flush ByteString))
Packit 4b2029
                 -> (Flush ByteString -> t m ())
Packit 4b2029
                 -> (ByteString -> t m ())
Packit 4b2029
                 -> WindowBits
Packit 4b2029
                 -> t m ()
Packit 4b2029
helperDecompress await' yield' leftover' config = do
Packit 4b2029
    -- Initialize the stateful inflater, which will be used below
Packit 4b2029
    -- This inflater is never exposed outside of this function
Packit 4b2029
    inf <- lift $ unsafeLiftIO $ initInflate config
Packit 4b2029
Packit 4b2029
    -- Some helper functions used by the main feeder loop below
Packit 4b2029
Packit 4b2029
    let -- Flush any remaining inflated bytes downstream
Packit 4b2029
        flush = do
Packit 4b2029
            chunk <- lift $ unsafeLiftIO $ flushInflate inf
Packit 4b2029
            unless (S.null chunk) $ yield' $ Chunk chunk
Packit 4b2029
Packit 4b2029
        -- Get any input which is unused by the inflater
Packit 4b2029
        getUnused = lift $ unsafeLiftIO $ getUnusedInflate inf
Packit 4b2029
Packit 4b2029
        -- If there is any unused data, return it as leftovers to the stream
Packit 4b2029
        unused = do
Packit 4b2029
            rem' <- getUnused
Packit 4b2029
            unless (S.null rem') $ leftover' rem'
Packit 4b2029
Packit 4b2029
    -- Main loop: feed data from upstream into the inflater
Packit 4b2029
    fix $ \feeder -> do
Packit 4b2029
        mnext <- await'
Packit 4b2029
        case mnext of
Packit 4b2029
            -- No more data is available from upstream
Packit 4b2029
            Nothing -> do
Packit 4b2029
                -- Flush any remaining uncompressed data
Packit 4b2029
                flush
Packit 4b2029
                -- Return the rest of the unconsumed data as leftovers
Packit 4b2029
                unused
Packit 4b2029
            -- Another chunk of compressed data arrived
Packit 4b2029
            Just (Chunk x) -> do
Packit 4b2029
                -- Feed the compressed data into the inflater, returning a
Packit 4b2029
                -- "popper" which will return chunks of decompressed data
Packit 4b2029
                popper <- lift $ unsafeLiftIO $ feedInflate inf x
Packit 4b2029
Packit 4b2029
                -- Loop over the popper grabbing decompressed chunks and
Packit 4b2029
                -- yielding them downstream
Packit 4b2029
                fix $ \pop -> do
Packit 4b2029
                    mbs <- lift $ unsafeLiftIO popper
Packit 4b2029
                    case mbs of
Packit 4b2029
                        -- No more data from this popper
Packit 4b2029
                        PRDone -> do
Packit 4b2029
                            rem' <- getUnused
Packit 4b2029
                            if S.null rem'
Packit 4b2029
                                -- No data was unused by the inflater, so let's
Packit 4b2029
                                -- fill it up again and get more data out of it
Packit 4b2029
                                then feeder
Packit 4b2029
                                -- In this case, there is some unconsumed data,
Packit 4b2029
                                -- meaning the compressed stream is complete.
Packit 4b2029
                                -- At this point, we need to stop feeding,
Packit 4b2029
                                -- return the unconsumed data as leftovers, and
Packit 4b2029
                                -- flush any remaining content (which should be
Packit 4b2029
                                -- nothing)
Packit 4b2029
                                else do
Packit 4b2029
                                    flush
Packit 4b2029
                                    leftover' rem'
Packit 4b2029
                        -- Another chunk available, yield it downstream and
Packit 4b2029
                        -- loop again
Packit 4b2029
                        PRNext bs -> do
Packit 4b2029
                            yield' (Chunk bs)
Packit 4b2029
                            pop
Packit 4b2029
                        -- An error occurred inside zlib, throw it
Packit 4b2029
                        PRError e -> lift $ monadThrow e
Packit 4b2029
            -- We've been asked to flush the stream
Packit 4b2029
            Just Flush -> do
Packit 4b2029
                -- Get any uncompressed data waiting for us
Packit 4b2029
                flush
Packit 4b2029
                -- Put a Flush in the stream
Packit 4b2029
                yield' Flush
Packit 4b2029
                -- Feed in more data
Packit 4b2029
                feeder
Packit 4b2029
Packit 4b2029
-- |
Packit 4b2029
-- Compress (deflate) a stream of 'ByteString's. The 'WindowBits' also control
Packit 4b2029
-- the format (zlib vs. gzip).
Packit 4b2029
Packit 4b2029
compress
Packit 4b2029
    :: (MonadBase base m, PrimMonad base, MonadThrow m)
Packit 4b2029
    => Int         -- ^ Compression level
Packit 4b2029
    -> WindowBits  -- ^ Zlib parameter (see the zlib-bindings package as well as the zlib C library)
Packit 4b2029
    -> Conduit ByteString m ByteString
Packit 4b2029
compress =
Packit 4b2029
    helperCompress (liftM (fmap Chunk) await) yield'
Packit 4b2029
  where
Packit 4b2029
    yield' Flush = return ()
Packit 4b2029
    yield' (Chunk bs) = yield bs
Packit 4b2029
Packit 4b2029
-- | Same as 'compress', but allows you to explicitly flush the stream.
Packit 4b2029
compressFlush
Packit 4b2029
    :: (MonadBase base m, PrimMonad base, MonadThrow m)
Packit 4b2029
    => Int         -- ^ Compression level
Packit 4b2029
    -> WindowBits  -- ^ Zlib parameter (see the zlib-bindings package as well as the zlib C library)
Packit 4b2029
    -> Conduit (Flush ByteString) m (Flush ByteString)
Packit 4b2029
compressFlush = helperCompress await yield
Packit 4b2029
Packit 4b2029
helperCompress :: (Monad (t m), MonadBase base m, PrimMonad base, MonadThrow m, MonadTrans t)
Packit 4b2029
               => t m (Maybe (Flush ByteString))
Packit 4b2029
               -> (Flush ByteString -> t m ())
Packit 4b2029
               -> Int
Packit 4b2029
               -> WindowBits
Packit 4b2029
               -> t m ()
Packit 4b2029
helperCompress await' yield' level config =
Packit 4b2029
    await' >>= maybe (return ()) start
Packit 4b2029
  where
Packit 4b2029
    start input = do
Packit 4b2029
        def <- lift $ unsafeLiftIO $ initDeflate level config
Packit 4b2029
        push def input
Packit 4b2029
Packit 4b2029
    continue def = await' >>= maybe (close def) (push def)
Packit 4b2029
Packit 4b2029
    goPopper popper = do
Packit 4b2029
        mbs <- lift $ unsafeLiftIO popper
Packit 4b2029
        case mbs of
Packit 4b2029
            PRDone -> return ()
Packit 4b2029
            PRNext bs -> yield' (Chunk bs) >> goPopper popper
Packit 4b2029
            PRError e -> lift $ monadThrow e
Packit 4b2029
Packit 4b2029
    push def (Chunk x) = do
Packit 4b2029
        popper <- lift $ unsafeLiftIO $ feedDeflate def x
Packit 4b2029
        goPopper popper
Packit 4b2029
        continue def
Packit 4b2029
Packit 4b2029
    push def Flush = do
Packit 4b2029
        mchunk <- lift $ unsafeLiftIO $ flushDeflate def
Packit 4b2029
        case mchunk of
Packit 4b2029
            PRDone -> return ()
Packit 4b2029
            PRNext x -> yield' $ Chunk x
Packit 4b2029
            PRError e -> lift $ monadThrow e
Packit 4b2029
        yield' Flush
Packit 4b2029
        continue def
Packit 4b2029
Packit 4b2029
    close def = do
Packit 4b2029
        mchunk <- lift $ unsafeLiftIO $ finishDeflate def
Packit 4b2029
        case mchunk of
Packit 4b2029
            PRDone -> return ()
Packit 4b2029
            PRNext chunk -> yield' (Chunk chunk) >> close def
Packit 4b2029
            PRError e -> lift $ monadThrow e
Packit 4b2029
Packit 4b2029
-- | The standard 'decompress' and 'ungzip' functions will only decompress a
Packit 4b2029
-- single compressed entity from the stream. This combinator will exhaust the
Packit 4b2029
-- stream completely of all individual compressed entities. This is useful for
Packit 4b2029
-- cases where you have a concatenated archive, e.g. @cat file1.gz file2.gz >
Packit 4b2029
-- combined.gz@.
Packit 4b2029
--
Packit 4b2029
-- Usage:
Packit 4b2029
--
Packit 4b2029
-- > sourceFile "combined.gz" $$ multiple ungzip =$ consume
Packit 4b2029
--
Packit 4b2029
-- This combinator will not fail on an empty stream. If you want to ensure that
Packit 4b2029
-- at least one compressed entity in the stream exists, consider a usage such
Packit 4b2029
-- as:
Packit 4b2029
--
Packit 4b2029
-- > sourceFile "combined.gz" $$ (ungzip >> multiple ungzip) =$ consume
Packit 4b2029
--
Packit 4b2029
-- @since 1.1.10
Packit 4b2029
multiple :: Monad m
Packit 4b2029
         => Conduit ByteString m a
Packit 4b2029
         -> Conduit ByteString m a
Packit 4b2029
multiple inner =
Packit 4b2029
    loop
Packit 4b2029
  where
Packit 4b2029
    loop = do
Packit 4b2029
        mbs <- await
Packit 4b2029
        case mbs of
Packit 4b2029
            Nothing -> return ()
Packit 4b2029
            Just bs
Packit 4b2029
                | S.null bs -> loop
Packit 4b2029
                | otherwise -> do
Packit 4b2029
                    leftover bs
Packit 4b2029
                    inner
Packit 4b2029
                    loop