diff --git a/Codec/Archive/Tar.hs b/Codec/Archive/Tar.hs
new file mode 100644
index 0000000..20e6658
--- /dev/null
+++ b/Codec/Archive/Tar.hs
@@ -0,0 +1,277 @@
+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Codec.Archive.Tar
+-- Copyright   :  (c) 2007 Bjorn Bringert,
+--                    2008 Andrea Vezzosi,
+--                    2008-2012 Duncan Coutts
+-- License     :  BSD3
+--
+-- Maintainer  :  duncan@community.haskell.org
+-- Portability :  portable
+--
+-- Reading, writing and manipulating \"@.tar@\" archive files.
+--
+-- This module uses common names and so is designed to be imported qualified:
+--
+-- > import qualified Codec.Archive.Tar as Tar
+--
+-----------------------------------------------------------------------------
+module Codec.Archive.Tar (
+
+  -- | Tar archive files are used to store a collection of other files in a
+  -- single file. They consists of a sequence of entries. Each entry describes
+  -- a file or directory (or some other special kind of file). The entry stores
+  -- a little bit of meta-data, in particular the file or directory name.
+  --
+  -- Unlike some other archive formats, a tar file contains no index. The
+  -- information about each entry is stored next to the entry. Because of this,
+  -- tar files are almost always processed linearly rather than in a
+  -- random-access fashion.
+  --
+  -- The functions in this package are designed for working on tar files
+  -- linearly and lazily. This makes it possible to do many operations in
+  -- constant space rather than having to load the entire archive into memory.
+  --
+  -- It can read and write standard POSIX tar files and also the GNU and old
+  -- Unix V7 tar formats. The convenience functions that are provided in the
+  -- "Codec.Archive.Tar.Entry" module for creating archive entries are
+  -- primarily designed for standard portable archives. If you need to
+  -- construct GNU format archives or exactly preserve file ownership and
+  -- permissions then you will need to write some extra helper functions.
+  --
+  -- This module contains just the simple high level operations without
+  -- exposing the all the details of tar files. If you need to inspect tar
+  -- entries in more detail or construct them directly then you also need
+  -- the module "Codec.Archive.Tar.Entry".
+
+  -- * High level \"all in one\" operations
+  create,
+  extract,
+  append,
+
+  -- * Notes
+  -- ** Compressed tar archives
+  -- | Tar files are commonly used in conjunction with gzip compression, as in
+  -- \"@.tar.gz@\" or \"@.tar.bz2@\" files. This module does not directly
+  -- handle compressed tar files however they can be handled easily by
+  -- composing functions from this module and the modules
+  -- @Codec.Compression.GZip@ or @Codec.Compression.BZip@
+  -- (see @zlib@ or @bzlib@ packages).
+  --
+  -- Creating a compressed \"@.tar.gz@\" file is just a minor variation on the
+  -- 'create' function, but where throw compression into the pipeline:
+  --
+  -- > BS.writeFile tar . GZip.compress . Tar.write =<< Tar.pack base dir
+  --
+  -- Similarly, extracting a compressed \"@.tar.gz@\" is just a minor variation
+  -- on the 'extract' function where we use decompression in the pipeline:
+  --
+  -- > Tar.unpack dir . Tar.read . GZip.decompress =<< BS.readFile tar
+  --
+
+  -- ** Security
+  -- | This is pretty important. A maliciously constructed tar archives could
+  -- contain entries that specify bad file names. It could specify absolute
+  -- file names like \"@\/etc\/passwd@\" or relative files outside of the
+  -- archive like \"..\/..\/..\/something\". This security problem is commonly
+  -- called a \"directory traversal vulnerability\". Historically, such
+  -- vulnerabilities have been common in packages handling tar archives.
+  --
+  -- The 'extract' and 'unpack' functions check for bad file names. See the
+  -- 'checkSecurity' function for more details. If you need to do any custom
+  -- unpacking then you should use this.
+
+  -- ** Tarbombs
+  -- | A \"tarbomb\" is a @.tar@ file where not all entries are in a
+  -- subdirectory but instead files extract into the top level directory. The
+  -- 'extract' function does not check for these however if you want to do
+  -- that you can use the 'checkTarbomb' function like so:
+  --
+  -- > Tar.unpack dir . Tar.checkTarbomb expectedDir
+  -- >                . Tar.read =<< BS.readFile tar
+  --
+  -- In this case extraction will fail if any file is outside of @expectedDir@.
+
+  -- * Converting between internal and external representation
+  -- | Note, you cannot expect @write . read@ to give exactly the same output
+  -- as input. You can expect the information to be preserved exactly however.
+  -- This is because 'read' accepts common format variations while 'write'
+  -- produces the standard format.
+  read,
+  write,
+
+  -- * Packing and unpacking files to\/from internal representation
+  -- | These functions are for packing and unpacking portable archives. They
+  -- are not suitable in cases where it is important to preserve file ownership
+  -- and permissions or to archive special files like named pipes and Unix
+  -- device files.
+  pack,
+  unpack,
+
+  -- * Types
+  -- ** Tar entry type
+  -- | This module provides only very simple and limited read-only access to
+  -- the 'Entry' type. If you need access to the details or if you need to
+  -- construct your own entries then also import "Codec.Archive.Tar.Entry".
+  Entry,
+  entryPath,
+  entryContent,
+  EntryContent(..),
+
+  -- ** Sequences of tar entries
+  Entries(..),
+  mapEntries,
+  mapEntriesNoFail,
+  foldEntries,
+  foldlEntries,
+  unfoldEntries,
+
+  -- * Error handling
+  -- | Reading tar files can fail if the data does not match the tar file
+  -- format correctly.
+  --
+  -- The style of error handling by returning structured errors. The pure
+  -- functions in the library do not throw exceptions, they return the errors
+  -- as data. The IO actions in the library can throw exceptions, in particular
+  -- the 'unpack' action does this. All the error types used are an instance of
+  -- the standard 'Exception' class so it is possible to 'throw' and 'catch'
+  -- them.
+
+  -- ** Errors from reading tar files
+  FormatError(..),
+
+#ifdef TESTS
+  prop_write_read_ustar,
+  prop_write_read_gnu,
+  prop_write_read_v7,
+#endif
+  ) where
+
+import Codec.Archive.Tar.Types
+
+import Codec.Archive.Tar.Read
+import Codec.Archive.Tar.Write
+
+import Codec.Archive.Tar.Pack
+import Codec.Archive.Tar.Unpack
+import Codec.Archive.Tar.Index (hSeekEndEntryOffset)
+
+import Codec.Archive.Tar.Check
+
+import Control.Exception (Exception, throw, catch)
+import qualified Data.ByteString.Lazy as BS
+import System.IO (withFile, IOMode(..))
+import Prelude hiding (read)
+
+-- | Create a new @\".tar\"@ file from a directory of files.
+--
+-- It is equivalent to calling the standard @tar@ program like so:
+--
+-- @$ tar -f tarball.tar -C base -c dir@
+--
+-- This assumes a directory @.\/base\/dir@ with files inside, eg
+-- @.\/base\/dir\/foo.txt@. The file names inside the resulting tar file will be
+-- relative to @dir@, eg @dir\/foo.txt@.
+--
+-- This is a high level \"all in one\" operation. Since you may need variations
+-- on this function it is instructive to see how it is written. It is just:
+--
+-- > BS.writeFile tar . Tar.write =<< Tar.pack base paths
+--
+-- Notes:
+--
+-- The files and directories must not change during this operation or the
+-- result is not well defined.
+--
+-- The intention of this function is to create tarballs that are portable
+-- between systems. It is /not/ suitable for doing file system backups because
+-- file ownership and permissions are not fully preserved. File ownership is
+-- not preserved at all. File permissions are set to simple portable values:
+--
+-- * @rw-r--r--@ for normal files
+--
+-- * @rwxr-xr-x@ for executable files
+--
+-- * @rwxr-xr-x@ for directories
+--
+create :: FilePath   -- ^ Path of the \".tar\" file to write.
+       -> FilePath   -- ^ Base directory
+       -> [FilePath] -- ^ Files and directories to archive, relative to base dir
+       -> IO ()
+create tar base paths = BS.writeFile tar . write =<< pack base paths
+
+-- | Extract all the files contained in a @\".tar\"@ file.
+--
+-- It is equivalent to calling the standard @tar@ program like so:
+--
+-- @$ tar -x -f tarball.tar -C dir@
+--
+-- So for example if the @tarball.tar@ file contains @foo\/bar.txt@ then this
+-- will extract it to @dir\/foo\/bar.txt@.
+--
+-- This is a high level \"all in one\" operation. Since you may need variations
+-- on this function it is instructive to see how it is written. It is just:
+--
+-- > Tar.unpack dir . Tar.read =<< BS.readFile tar
+--
+-- Notes:
+--
+-- Extracting can fail for a number of reasons. The tarball may be incorrectly
+-- formatted. There may be IO or permission errors. In such cases an exception
+-- will be thrown and extraction will not continue.
+--
+-- Since the extraction may fail part way through it is not atomic. For this
+-- reason you may want to extract into an empty directory and, if the
+-- extraction fails, recursively delete the directory.
+--
+-- Security: only files inside the target directory will be written. Tarballs
+-- containing entries that point outside of the tarball (either absolute paths
+-- or relative paths) will be caught and an exception will be thrown.
+--
+extract :: FilePath -- ^ Destination directory
+        -> FilePath -- ^ Tarball
+        -> IO ()
+extract dir tar = unpack dir . read =<< BS.readFile tar
+
+-- | Append new entries to a @\".tar\"@ file from a directory of files.
+--
+-- This is much like 'create', except that all the entries are added to the
+-- end of an existing tar file. Or if the file does not already exists then
+-- it behaves the same as 'create'.
+--
+append :: FilePath   -- ^ Path of the \".tar\" file to write.
+       -> FilePath   -- ^ Base directory
+       -> [FilePath] -- ^ Files and directories to archive, relative to base dir
+       -> IO ()
+append tar base paths =
+    withFile tar ReadWriteMode $ \hnd -> do
+      _ <- hSeekEndEntryOffset hnd Nothing
+      BS.hPut hnd . write =<< pack base paths
+
+-------------------------
+-- Correctness properties
+--
+
+#ifdef TESTS
+
+prop_write_read_ustar :: [Entry] -> Bool
+prop_write_read_ustar entries =
+    foldr Next Done entries' == read (write entries')
+  where
+    entries' = [ e { entryFormat = UstarFormat } | e <- entries ]
+
+prop_write_read_gnu :: [Entry] -> Bool
+prop_write_read_gnu entries =
+    foldr Next Done entries' == read (write entries')
+  where
+    entries' = [ e { entryFormat = GnuFormat } | e <- entries ]
+
+prop_write_read_v7 :: [Entry] -> Bool
+prop_write_read_v7 entries =
+    foldr Next Done entries' == read (write entries')
+  where
+    entries' = [ limitToV7FormatCompat e { entryFormat = V7Format }
+               | e <- entries ]
+
+#endif
diff --git a/Codec/Archive/Tar/Check.hs b/Codec/Archive/Tar/Check.hs
new file mode 100644
index 0000000..27724f2
--- /dev/null
+++ b/Codec/Archive/Tar/Check.hs
@@ -0,0 +1,247 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Codec.Archive.Tar
+-- Copyright   :  (c) 2008-2012 Duncan Coutts
+--                    2011 Max Bolingbroke
+-- License     :  BSD3
+--
+-- Maintainer  :  duncan@community.haskell.org
+-- Portability :  portable
+--
+-- Perform various checks on tar file entries.
+--
+-----------------------------------------------------------------------------
+module Codec.Archive.Tar.Check (
+
+  -- * Security
+  checkSecurity,
+  FileNameError(..),
+
+  -- * Tarbombs
+  checkTarbomb,
+  TarBombError(..),
+
+  -- * Portability
+  checkPortability,
+  PortabilityError(..),
+  PortabilityPlatform,
+  ) where
+
+import Codec.Archive.Tar.Types
+
+import Data.Typeable (Typeable)
+import Control.Exception (Exception)
+import Control.Monad (MonadPlus(mplus))
+import qualified System.FilePath as FilePath.Native
+         ( splitDirectories, isAbsolute, isValid )
+
+import qualified System.FilePath.Windows as FilePath.Windows
+import qualified System.FilePath.Posix   as FilePath.Posix
+
+
+--------------------------
+-- Security
+--
+
+-- | This function checks a sequence of tar entries for file name security
+-- problems. It checks that:
+--
+-- * file paths are not absolute
+--
+-- * file paths do not contain any path components that are \"@..@\"
+--
+-- * file names are valid
+--
+-- These checks are from the perspective of the current OS. That means we check
+-- for \"@C:\blah@\" files on Windows and \"\/blah\" files on Unix. For archive
+-- entry types 'HardLink' and 'SymbolicLink' the same checks are done for the
+-- link target. A failure in any entry terminates the sequence of entries with
+-- an error.
+--
+checkSecurity :: Entries e -> Entries (Either e FileNameError)
+checkSecurity = checkEntries checkEntrySecurity
+
+checkEntrySecurity :: Entry -> Maybe FileNameError
+checkEntrySecurity entry = case entryContent entry of
+    HardLink     link -> check (entryPath entry)
+                 `mplus` check (fromLinkTarget link)
+    SymbolicLink link -> check (entryPath entry)
+                 `mplus` check (fromLinkTarget link)
+    _                 -> check (entryPath entry)
+
+  where
+    check name
+      | FilePath.Native.isAbsolute name
+      = Just $ AbsoluteFileName name
+
+      | not (FilePath.Native.isValid name)
+      = Just $ InvalidFileName name
+
+      | any (=="..") (FilePath.Native.splitDirectories name)
+      = Just $ InvalidFileName name
+
+      | otherwise = Nothing
+
+-- | Errors arising from tar file names being in some way invalid or dangerous
+data FileNameError
+  = InvalidFileName FilePath
+  | AbsoluteFileName FilePath
+  deriving (Typeable)
+
+instance Show FileNameError where
+  show = showFileNameError Nothing
+
+instance Exception FileNameError
+
+showFileNameError :: Maybe PortabilityPlatform -> FileNameError -> String
+showFileNameError mb_plat err = case err of
+    InvalidFileName  path -> "Invalid"  ++ plat ++ " file name in tar archive: " ++ show path
+    AbsoluteFileName path -> "Absolute" ++ plat ++ " file name in tar archive: " ++ show path
+  where plat = maybe "" (' ':) mb_plat
+
+
+--------------------------
+-- Tarbombs
+--
+
+-- | This function checks a sequence of tar entries for being a \"tar bomb\".
+-- This means that the tar file does not follow the standard convention that
+-- all entries are within a single subdirectory, e.g. a file \"foo.tar\" would
+-- usually have all entries within the \"foo/\" subdirectory.
+--
+-- Given the expected subdirectory, this function checks all entries are within
+-- that subdirectroy.
+--
+-- Note: This check must be used in conjunction with 'checkSecurity'
+-- (or 'checkPortability').
+--
+checkTarbomb :: FilePath -> Entries e -> Entries (Either e TarBombError)
+checkTarbomb expectedTopDir = checkEntries (checkEntryTarbomb expectedTopDir)
+
+checkEntryTarbomb :: FilePath -> Entry -> Maybe TarBombError
+checkEntryTarbomb _ entry | nonFilesystemEntry = Nothing
+  where
+    -- Ignore some special entries we will not unpack anyway
+    nonFilesystemEntry =
+      case entryContent entry of
+        OtherEntryType 'g' _ _ -> True --PAX global header
+        OtherEntryType 'x' _ _ -> True --PAX individual header
+        _                      -> False
+
+checkEntryTarbomb expectedTopDir entry =
+  case FilePath.Native.splitDirectories (entryPath entry) of
+    (topDir:_) | topDir == expectedTopDir -> Nothing
+    _ -> Just $ TarBombError expectedTopDir
+
+-- | An error that occurs if a tar file is a \"tar bomb\" that would extract
+-- files outside of the intended directory.
+data TarBombError = TarBombError FilePath
+                  deriving (Typeable)
+
+instance Exception TarBombError
+
+instance Show TarBombError where
+  show (TarBombError expectedTopDir)
+    = "File in tar archive is not in the expected directory " ++ show expectedTopDir
+
+
+--------------------------
+-- Portability
+--
+
+-- | This function checks a sequence of tar entries for a number of portability
+-- issues. It will complain if:
+--
+-- * The old \"Unix V7\" or \"gnu\" formats are used. For maximum portability
+--   only the POSIX standard \"ustar\" format should be used.
+--
+-- * A non-portable entry type is used. Only ordinary files, hard links,
+--   symlinks and directories are portable. Device files, pipes and others are
+--   not portable between all common operating systems.
+--
+-- * Non-ASCII characters are used in file names. There is no agreed portable
+--   convention for Unicode or other extended character sets in file names in
+--   tar archives.
+--
+-- * File names that would not be portable to both Unix and Windows. This check
+--   includes characters that are valid in both systems and the \'/\' vs \'\\\'
+--   directory separator conventions.
+--
+checkPortability :: Entries e -> Entries (Either e PortabilityError)
+checkPortability = checkEntries checkEntryPortability
+
+checkEntryPortability :: Entry -> Maybe PortabilityError
+checkEntryPortability entry
+  | entryFormat entry `elem` [V7Format, GnuFormat]
+  = Just $ NonPortableFormat (entryFormat entry)
+
+  | not (portableFileType (entryContent entry))
+  = Just NonPortableFileType
+
+  | not (all portableChar posixPath)
+  = Just $ NonPortableEntryNameChar posixPath
+
+  | not (FilePath.Posix.isValid posixPath)
+  = Just $ NonPortableFileName "unix"    (InvalidFileName posixPath)
+  | not (FilePath.Windows.isValid windowsPath)
+  = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
+
+  | FilePath.Posix.isAbsolute posixPath
+  = Just $ NonPortableFileName "unix"    (AbsoluteFileName posixPath)
+  | FilePath.Windows.isAbsolute windowsPath
+  = Just $ NonPortableFileName "windows" (AbsoluteFileName windowsPath)
+
+  | any (=="..") (FilePath.Posix.splitDirectories posixPath)
+  = Just $ NonPortableFileName "unix"    (InvalidFileName posixPath)
+  | any (=="..") (FilePath.Windows.splitDirectories windowsPath)
+  = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
+
+  | otherwise = Nothing
+
+  where
+    tarPath     = entryTarPath entry
+    posixPath   = fromTarPathToPosixPath   tarPath
+    windowsPath = fromTarPathToWindowsPath tarPath
+
+    portableFileType ftype = case ftype of
+      NormalFile   {} -> True
+      HardLink     {} -> True
+      SymbolicLink {} -> True
+      Directory       -> True
+      _               -> False
+
+    portableChar c = c <= '\127'
+
+-- | Portability problems in a tar archive
+data PortabilityError
+  = NonPortableFormat Format
+  | NonPortableFileType
+  | NonPortableEntryNameChar FilePath
+  | NonPortableFileName PortabilityPlatform FileNameError
+  deriving (Typeable)
+
+-- | The name of a platform that portability issues arise from
+type PortabilityPlatform = String
+
+instance Exception PortabilityError
+
+instance Show PortabilityError where
+  show (NonPortableFormat format) = "Archive is in the " ++ fmt ++ " format"
+    where fmt = case format of V7Format    -> "old Unix V7 tar"
+                               UstarFormat -> "ustar" -- I never generate this but a user might
+                               GnuFormat   -> "GNU tar"
+  show NonPortableFileType        = "Non-portable file type in archive"
+  show (NonPortableEntryNameChar posixPath)
+    = "Non-portable character in archive entry name: " ++ show posixPath
+  show (NonPortableFileName platform err)
+    = showFileNameError (Just platform) err
+
+
+--------------------------
+-- Utils
+--
+
+checkEntries :: (Entry -> Maybe e') -> Entries e -> Entries (Either e e')
+checkEntries checkEntry =
+  mapEntries (\entry -> maybe (Right entry) Left (checkEntry entry))
diff --git a/Codec/Archive/Tar/Entry.hs b/Codec/Archive/Tar/Entry.hs
new file mode 100644
index 0000000..3f19021
--- /dev/null
+++ b/Codec/Archive/Tar/Entry.hs
@@ -0,0 +1,80 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Codec.Archive.Tar.Entry
+-- Copyright   :  (c) 2007 Bjorn Bringert,
+--                    2008 Andrea Vezzosi,
+--                    2008-2009 Duncan Coutts
+-- License     :  BSD3
+--
+-- Maintainer  :  duncan@community.haskell.org
+-- Portability :  portable
+--
+-- Types and functions to manipulate tar entries.
+--
+-- While the "Codec.Archive.Tar" module provides only the simple high level
+-- API, this module provides full access to the details of tar entries. This
+-- lets you inspect all the meta-data, construct entries and handle error cases
+-- more precisely.
+--
+-- This module uses common names and so is designed to be imported qualified:
+--
+-- > import qualified Codec.Archive.Tar       as Tar
+-- > import qualified Codec.Archive.Tar.Entry as Tar
+--
+-----------------------------------------------------------------------------
+module Codec.Archive.Tar.Entry (
+
+  -- * Tar entry and associated types
+  Entry(..),
+  --TODO: should be the following with the Entry constructor not exported,
+  --      but haddock cannot document that properly
+  --      see http://trac.haskell.org/haddock/ticket/3
+  --Entry(filePath, fileMode, ownerId, groupId, fileSize, modTime,
+  --      fileType, linkTarget, headerExt, fileContent),
+  entryPath,
+  EntryContent(..),
+  Ownership(..),
+
+  FileSize,
+  Permissions,
+  EpochTime,
+  DevMajor,
+  DevMinor,
+  TypeCode,
+  Format(..),
+
+  -- * Constructing simple entry values
+  simpleEntry,
+  fileEntry,
+  directoryEntry,
+
+  -- * Standard file permissions
+  -- | For maximum portability when constructing archives use only these file
+  -- permissions.
+  ordinaryFilePermissions,
+  executableFilePermissions,
+  directoryPermissions,
+
+  -- * Constructing entries from disk files
+  packFileEntry,
+  packDirectoryEntry,
+  getDirectoryContentsRecursive,
+
+  -- * TarPath type
+  TarPath,
+  toTarPath,
+  fromTarPath,
+  fromTarPathToPosixPath,
+  fromTarPathToWindowsPath,
+
+  -- * LinkTarget type
+  LinkTarget,
+  toLinkTarget,
+  fromLinkTarget,
+  fromLinkTargetToPosixPath,
+  fromLinkTargetToWindowsPath,
+
+  ) where
+
+import Codec.Archive.Tar.Types
+import Codec.Archive.Tar.Pack
diff --git a/Codec/Archive/Tar/Index.hs b/Codec/Archive/Tar/Index.hs
new file mode 100644
index 0000000..df44c7a
--- /dev/null
+++ b/Codec/Archive/Tar/Index.hs
@@ -0,0 +1,825 @@
+{-# LANGUAGE CPP, BangPatterns, PatternGuards #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Codec.Archive.Tar.Index
+-- Copyright   :  (c) 2010-2015 Duncan Coutts
+-- License     :  BSD3
+--
+-- Maintainer  :  duncan@community.haskell.org
+-- Portability :  portable
+--
+-- Random access to the content of a @.tar@ archive.
+--
+-- This module uses common names and so is designed to be imported qualified:
+--
+-- > import qualified Codec.Archive.Tar.Index as TarIndex
+--
+-----------------------------------------------------------------------------
+module Codec.Archive.Tar.Index (
+
+    -- | The @tar@ format does not contain an index of files within the
+    -- archive. Normally, @tar@ file have to be processed linearly. It is
+    -- sometimes useful however to be able to get random access to files
+    -- within the archive.
+    --
+    -- This module provides an index of a @tar@ file. A linear pass of the
+    -- @tar@ file is needed to 'build' the 'TarIndex', but thereafter you can
+    -- 'lookup' paths in the @tar@ file, and then use 'hReadEntry' to
+    -- seek to the right part of the file and read the entry.
+    --
+    -- An index cannot be used to lookup 'Directory' entries in a tar file;
+    -- instead, you will get 'TarDir' entry listing all the entries in the
+    -- directory.
+
+    -- * Index type
+    TarIndex,
+
+    -- * Index lookup
+    lookup,
+    TarIndexEntry(..),
+    toList,
+
+    -- ** I\/O operations
+    TarEntryOffset,
+    hReadEntry,
+    hReadEntryHeader,
+
+    -- * Index construction
+    build,
+    -- ** Incremental construction
+    -- $incremental-construction
+    IndexBuilder,
+    empty,
+    addNextEntry,
+    skipNextEntry,
+    finalise,
+    unfinalise,
+
+    -- * Serialising indexes
+    serialise,
+    deserialise,
+
+    -- * Lower level operations with offsets and I\/O on tar files
+    hReadEntryHeaderOrEof,
+    hSeekEntryOffset,
+    hSeekEntryContentOffset,
+    hSeekEndEntryOffset,
+    nextEntryOffset,
+    indexEndEntryOffset,
+    indexNextEntryOffset,
+
+    -- * Deprecated aliases
+    emptyIndex,
+    finaliseIndex,
+
+#ifdef TESTS
+    prop_lookup,
+    prop_toList,
+    prop_valid,
+    prop_serialise_deserialise,
+    prop_serialiseSize,
+    prop_index_matches_tar,
+    prop_finalise_unfinalise,
+#endif
+  ) where
+
+import Data.Typeable (Typeable)
+
+import Codec.Archive.Tar.Types as Tar
+import Codec.Archive.Tar.Read  as Tar
+import qualified Codec.Archive.Tar.Index.StringTable as StringTable
+import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder)
+import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie
+import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder)
+
+import qualified System.FilePath.Posix as FilePath
+import Data.Monoid (Monoid(..))
+#if (MIN_VERSION_base(4,5,0))
+import Data.Monoid ((<>))
+#endif
+import Data.Word
+import Data.Int
+import Data.Bits
+import qualified Data.Array.Unboxed as A
+import Prelude hiding (lookup)
+import System.IO
+import Control.Exception (assert, throwIO)
+import Control.DeepSeq
+
+import qualified Data.ByteString        as BS
+import qualified Data.ByteString.Char8  as BS.Char8
+import qualified Data.ByteString.Lazy   as LBS
+import qualified Data.ByteString.Unsafe as BS
+#if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
+import Data.ByteString.Builder          as BS
+import Data.ByteString.Builder.Extra    as BS (toLazyByteStringWith,
+                                               untrimmedStrategy)
+#else
+import Data.ByteString.Lazy.Builder     as BS
+import Data.ByteString.Lazy.Builder.Extras as BS (toLazyByteStringWith,
+                                                  untrimmedStrategy)
+#endif
+
+#ifdef TESTS
+import qualified Prelude
+import Test.QuickCheck
+import Test.QuickCheck.Property (ioProperty)
+import Control.Applicative ((<$>), (<*>))
+import Control.Monad (unless)
+import Data.List (nub, sort, sortBy, stripPrefix, isPrefixOf)
+import Data.Maybe
+import Data.Function (on)
+import Control.Exception (SomeException, try)
+import Codec.Archive.Tar.Write          as Tar
+import qualified Data.ByteString.Handle as HBS
+#endif
+
+
+-- | An index of the entries in a tar file.
+--
+-- This index type is designed to be quite compact and suitable to store either
+-- on disk or in memory.
+--
+data TarIndex = TarIndex
+
+  -- As an example of how the mapping works, consider these example files:
+  --   "foo/bar.hs" at offset 0
+  --   "foo/baz.hs" at offset 1024
+  --
+  -- We split the paths into components and enumerate them.
+  --   { "foo" -> TokenId 0, "bar.hs" -> TokenId 1,  "baz.hs" -> TokenId 2 }
+  --
+  -- We convert paths into sequences of 'TokenId's, i.e.
+  --   "foo/bar.hs" becomes [PathComponentId 0, PathComponentId 1]
+  --   "foo/baz.hs" becomes [PathComponentId 0, PathComponentId 2]
+  --
+  -- We use a trie mapping sequences of 'PathComponentId's to the entry offset:
+  --  { [PathComponentId 0, PathComponentId 1] -> offset 0
+  --  , [PathComponentId 0, PathComponentId 2] -> offset 1024 }
+
+  -- The mapping of filepath components as strings to ids.
+  {-# UNPACK #-} !(StringTable PathComponentId)
+
+  -- Mapping of sequences of filepath component ids to tar entry offsets.
+  {-# UNPACK #-} !(IntTrie PathComponentId TarEntryOffset)
+
+  -- The offset immediatly after the last entry, where we would append any
+  -- additional entries.
+  {-# UNPACK #-} !TarEntryOffset
+
+  deriving (Eq, Show, Typeable)
+
+instance NFData TarIndex where
+  rnf (TarIndex _ _ _) = () -- fully strict by construction
+
+-- | The result of 'lookup' in a 'TarIndex'. It can either be a file directly,
+-- or a directory entry containing further entries (and all subdirectories
+-- recursively). Note that the subtrees are constructed lazily, so it's
+-- cheaper if you don't look at them.
+--
+data TarIndexEntry = TarFileEntry {-# UNPACK #-} !TarEntryOffset
+                   | TarDir [(FilePath, TarIndexEntry)]
+  deriving (Show, Typeable)
+
+
+newtype PathComponentId = PathComponentId Int
+  deriving (Eq, Ord, Enum, Show, Typeable)
+
+-- | An offset within a tar file. Use 'hReadEntry', 'hReadEntryHeader' or
+-- 'hSeekEntryOffset'.
+--
+-- This is actually a tar \"record\" number, not a byte offset.
+--
+type TarEntryOffset = Word32
+
+
+-- | Look up a given filepath in the 'TarIndex'. It may return a 'TarFileEntry'
+-- containing the 'TarEntryOffset' of the file within the tar file, or if
+-- the filepath identifies a directory then it returns a 'TarDir' containing
+-- the list of files within that directory.
+--
+-- Given the 'TarEntryOffset' you can then use one of the I\/O operations:
+--
+-- * 'hReadEntry' to read the whole entry;
+--
+-- * 'hReadEntryHeader' to read just the file metadata (e.g. its length);
+--
+lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry
+lookup (TarIndex pathTable pathTrie _) path = do
+    fpath  <- toComponentIds pathTable path
+    tentry <- IntTrie.lookup pathTrie fpath
+    return (mkIndexEntry tentry)
+  where
+    mkIndexEntry (IntTrie.Entry offset)        = TarFileEntry offset
+    mkIndexEntry (IntTrie.Completions entries) =
+      TarDir [ (fromComponentId pathTable key, mkIndexEntry entry)
+             | (key, entry) <- entries ]
+
+
+toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId]
+toComponentIds table =
+    lookupComponents []
+  . filter (/= BS.Char8.singleton '.')
+  . splitDirectories
+  . BS.Char8.pack
+  where
+    lookupComponents cs' []     = Just (reverse cs')
+    lookupComponents cs' (c:cs) = case StringTable.lookup table c of
+      Nothing  -> Nothing
+      Just cid -> lookupComponents (cid:cs') cs
+
+fromComponentId :: StringTable PathComponentId -> PathComponentId -> FilePath
+fromComponentId table = BS.Char8.unpack . StringTable.index table
+
+-- | All the files in the index with their corresponding 'TarEntryOffset's.
+--
+-- Note that the files are in no special order. If you intend to read all or
+-- most files then is is recommended to sort by the 'TarEntryOffset'.
+--
+toList :: TarIndex -> [(FilePath, TarEntryOffset)]
+toList (TarIndex pathTable pathTrie _) =
+    [ (path, off)
+    | (cids, off) <- IntTrie.toList pathTrie
+    , let path = FilePath.joinPath (map (fromComponentId pathTable) cids) ]
+
+
+-- | Build a 'TarIndex' from a sequence of tar 'Entries'. The 'Entries' are
+-- assumed to start at offset @0@ within a file.
+--
+build :: Entries e -> Either e TarIndex
+build = go empty
+  where
+    go !builder (Next e es) = go (addNextEntry e builder) es
+    go !builder  Done       = Right $! finalise builder
+    go !_       (Fail err)  = Left err
+
+
+-- $incremental-construction
+-- If you need more control than 'build' then you can construct the index
+-- in an acumulator style using the 'IndexBuilder' and operations.
+--
+-- Start with 'empty' and use 'addNextEntry' (or 'skipNextEntry') for
+-- each 'Entry' in the tar file in order. Every entry must added or skipped in
+-- order, otherwise the resulting 'TarIndex' will report the wrong
+-- 'TarEntryOffset's. At the end use 'finalise' to get the 'TarIndex'.
+--
+-- For example, 'build' is simply:
+--
+-- > build = go empty
+-- >   where
+-- >     go !builder (Next e es) = go (addNextEntry e builder) es
+-- >     go !builder  Done       = Right $! finalise builder
+-- >     go !_       (Fail err)  = Left err
+
+
+-- | The intermediate type used for incremental construction of a 'TarIndex'.
+--
+data IndexBuilder
+   = IndexBuilder !(StringTableBuilder PathComponentId)
+                  !(IntTrieBuilder PathComponentId TarEntryOffset)
+   {-# UNPACK #-} !TarEntryOffset
+  deriving (Eq, Show)
+
+instance NFData IndexBuilder where
+  rnf (IndexBuilder _ _ _) = () -- fully strict by construction
+
+-- | The initial empty 'IndexBuilder'.
+--
+empty :: IndexBuilder
+empty = IndexBuilder StringTable.empty IntTrie.empty 0
+
+emptyIndex :: IndexBuilder
+emptyIndex = empty
+{-# DEPRECATED emptyIndex "Use TarIndex.empty" #-}
+
+-- | Add the next 'Entry' into the 'IndexBuilder'.
+--
+addNextEntry :: Entry -> IndexBuilder -> IndexBuilder
+addNextEntry entry (IndexBuilder stbl itrie nextOffset) =
+    IndexBuilder stbl' itrie'
+                 (nextEntryOffset entry nextOffset)
+  where
+    !entrypath    = splitTarPath (entryTarPath entry)
+    (stbl', cids) = StringTable.inserts entrypath stbl
+    itrie'        = IntTrie.insert cids nextOffset itrie
+
+-- | Use this function if you want to skip some entries and not add them to the
+-- final 'TarIndex'.
+--
+skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder
+skipNextEntry entry (IndexBuilder stbl itrie nextOffset) =
+    IndexBuilder stbl itrie (nextEntryOffset entry nextOffset)
+
+-- | Finish accumulating 'Entry' information and build the compact 'TarIndex'
+-- lookup structure.
+--
+finalise :: IndexBuilder -> TarIndex
+finalise (IndexBuilder stbl itrie finalOffset) =
+    TarIndex pathTable pathTrie finalOffset
+  where
+    pathTable = StringTable.finalise stbl
+    pathTrie  = IntTrie.finalise itrie
+
+finaliseIndex :: IndexBuilder -> TarIndex
+finaliseIndex = finalise
+{-# DEPRECATED finaliseIndex "Use TarIndex.finalise" #-}
+
+-- | This is the offset immediately following the entry most recently added
+-- to the 'IndexBuilder'. You might use this if you need to know the offsets
+-- but don't want to use the 'TarIndex' lookup structure.
+-- Use with 'hSeekEntryOffset'. See also 'nextEntryOffset'.
+--
+indexNextEntryOffset :: IndexBuilder -> TarEntryOffset
+indexNextEntryOffset (IndexBuilder _ _ off) = off
+
+-- | This is the offset immediately following the last entry in the tar file.
+-- This can be useful to append further entries into the tar file.
+-- Use with 'hSeekEntryOffset', or just use 'hSeekEndEntryOffset' directly.
+--
+indexEndEntryOffset :: TarIndex -> TarEntryOffset
+indexEndEntryOffset (TarIndex _ _ off) = off
+
+-- | Calculate the 'TarEntryOffset' of the next entry, given the size and
+-- offset of the current entry.
+--
+-- This is much like using 'skipNextEntry' and 'indexNextEntryOffset', but without
+-- using an 'IndexBuilder'.
+--
+nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset
+nextEntryOffset entry offset =
+    offset
+  + 1
+  + case entryContent entry of
+      NormalFile     _   size -> blocks size
+      OtherEntryType _ _ size -> blocks size
+      _                       -> 0
+  where
+    -- NOTE: to avoid underflow, do the (fromIntegral :: Int64 -> Word32) last
+    blocks :: Int64 -> TarEntryOffset
+    blocks size = fromIntegral (1 + (size - 1) `div` 512)
+
+type FilePathBS = BS.ByteString
+
+splitTarPath :: TarPath -> [FilePathBS]
+splitTarPath (TarPath name prefix) =
+    splitDirectories prefix ++ splitDirectories name
+
+splitDirectories :: FilePathBS -> [FilePathBS]
+splitDirectories bs =
+    case BS.Char8.split '/' bs of
+      c:cs | BS.null c -> BS.Char8.singleton '/' : filter (not . BS.null) cs
+      cs               ->                          filter (not . BS.null) cs
+
+
+-------------------------
+-- Resume building an existing index
+--
+
+-- | Resume building an existing index
+--
+-- A 'TarIndex' is optimized for a highly compact and efficient in-memory
+-- representation. This, however, makes it read-only. If you have an existing
+-- 'TarIndex' for a large file, and want to add to it, you can translate the
+-- 'TarIndex' back to an 'IndexBuilder'. Be aware that this is a relatively
+-- costly operation (linear in the size of the 'TarIndex'), though still
+-- faster than starting again from scratch.
+--
+-- This is the left inverse to 'finalise' (modulo ordering).
+--
+unfinalise :: TarIndex -> IndexBuilder
+unfinalise (TarIndex pathTable pathTrie finalOffset) =
+    IndexBuilder (StringTable.unfinalise pathTable)
+                 (IntTrie.unfinalise pathTrie)
+                 finalOffset
+
+
+-------------------------
+-- I/O operations
+--
+
+-- | Reads an entire 'Entry' at the given 'TarEntryOffset' in the tar file.
+-- The 'Handle' must be open for reading and be seekable.
+--
+-- This reads the whole entry into memory strictly, not incrementally. For more
+-- control, use 'hReadEntryHeader' and then read the entry content manually.
+--
+hReadEntry :: Handle -> TarEntryOffset -> IO Entry
+hReadEntry hnd off = do
+    entry <- hReadEntryHeader hnd off
+    case entryContent entry of
+      NormalFile       _ size -> do body <- LBS.hGet hnd (fromIntegral size)
+                                    return entry {
+                                      entryContent = NormalFile body size
+                                    }
+      OtherEntryType c _ size -> do body <- LBS.hGet hnd (fromIntegral size)
+                                    return entry {
+                                      entryContent = OtherEntryType c body size
+                                    }
+      _                       -> return entry
+
+-- | Read the header for a 'Entry' at the given 'TarEntryOffset' in the tar
+-- file. The 'entryContent' will contain the correct metadata but an empty file
+-- content. The 'Handle' must be open for reading and be seekable.
+--
+-- The 'Handle' position is advanced to the beginning of the entry content (if
+-- any). You must check the 'entryContent' to see if the entry is of type
+-- 'NormalFile'. If it is, the 'NormalFile' gives the content length and you
+-- are free to read this much data from the 'Handle'.
+--
+-- > entry <- Tar.hReadEntryHeader hnd
+-- > case Tar.entryContent entry of
+-- >   Tar.NormalFile _ size -> do content <- BS.hGet hnd size
+-- >                               ...
+--
+-- Of course you don't have to read it all in one go (as 'hReadEntry' does),
+-- you can use any appropriate method to read it incrementally.
+--
+-- In addition to I\/O errors, this can throw a 'FormatError' if the offset is
+-- wrong, or if the file is not valid tar format.
+--
+-- There is also the lower level operation 'hSeekEntryOffset'.
+--
+hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry
+hReadEntryHeader hnd blockOff = do
+    hSeekEntryOffset hnd blockOff
+    header <- LBS.hGet hnd 512
+    case Tar.read header of
+      Tar.Next entry _ -> return entry
+      Tar.Fail e       -> throwIO e
+      Tar.Done         -> fail "hReadEntryHeader: impossible"
+
+-- | Set the 'Handle' position to the position corresponding to the given
+-- 'TarEntryOffset'.
+--
+-- This position is where the entry metadata can be read. If you already know
+-- the entry has a body (and perhaps know it's length), you may wish to seek to
+-- the body content directly using 'hSeekEntryContentOffset'.
+--
+hSeekEntryOffset :: Handle -> TarEntryOffset -> IO ()
+hSeekEntryOffset hnd blockOff =
+    hSeek hnd AbsoluteSeek (fromIntegral blockOff * 512)
+
+-- | Set the 'Handle' position to the entry content position corresponding to
+-- the given 'TarEntryOffset'.
+--
+-- This position is where the entry content can be read using ordinary I\/O
+-- operations (though you have to know in advance how big the entry content
+-- is). This is /only valid/ if you /already know/ the entry has a body (i.e.
+-- is a normal file).
+--
+hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO ()
+hSeekEntryContentOffset hnd blockOff =
+    hSeekEntryOffset hnd (blockOff + 1)
+
+-- | This is a low level variant on 'hReadEntryHeader', that can be used to
+-- iterate through a tar file, entry by entry.
+--
+-- It has a few differences compared to 'hReadEntryHeader':
+--
+-- * It returns an indication when the end of the tar file is reached.
+--
+-- * It /does not/ move the 'Handle' position to the beginning of the entry
+--   content.
+--
+-- * It returns the 'TarEntryOffset' of the next entry.
+--
+-- After this action, the 'Handle' position is not in any useful place. If
+-- you want to skip to the next entry, take the 'TarEntryOffset' returned and
+-- use 'hReadEntryHeaderOrEof' again. Or if having inspected the 'Entry'
+-- header you want to read the entry content (if it has one) then use
+-- 'hSeekEntryContentOffset' on the original input 'TarEntryOffset'.
+--
+hReadEntryHeaderOrEof :: Handle -> TarEntryOffset
+                      -> IO (Maybe (Entry, TarEntryOffset))
+hReadEntryHeaderOrEof hnd blockOff = do
+    hSeekEntryOffset hnd blockOff
+    header <- LBS.hGet hnd 1024
+    case Tar.read header of
+      Tar.Next entry _ -> let !blockOff' = nextEntryOffset entry blockOff
+                           in return (Just (entry, blockOff'))
+      Tar.Done         -> return Nothing
+      Tar.Fail e       -> throwIO e
+
+-- | Seek to the end of a tar file, to the position where new entries can
+-- be appended, and return that 'TarEntryOffset'.
+--
+-- If you have a valid 'TarIndex' for this tar file then you should supply it
+-- because it allows seeking directly to the correct location.
+--
+-- If you do not have an index, then this becomes an expensive linear
+-- operation because we have to read each tar entry header from the beginning
+-- to find the location immediately after the last entry (this is because tar
+-- files have a variable length trailer and we cannot reliably find that by
+-- starting at the end). In this mode, it will fail with an exception if the
+-- file is not in fact in the tar format.
+--
+hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset
+hSeekEndEntryOffset hnd (Just index) = do
+    let offset = indexEndEntryOffset index
+    hSeekEntryOffset hnd offset
+    return offset
+
+hSeekEndEntryOffset hnd Nothing = do
+    size <- hFileSize hnd
+    if size == 0
+      then return 0
+      else seekToEnd 0
+  where
+    seekToEnd offset = do
+      mbe <- hReadEntryHeaderOrEof hnd offset
+      case mbe of
+        Nothing -> do hSeekEntryOffset hnd offset
+                      return offset
+        Just (_, offset') -> seekToEnd offset'
+
+-------------------------
+-- (de)serialisation
+--
+
+-- | The 'TarIndex' is compact in memory, and it has a similarly compact
+-- external representation.
+--
+serialise :: TarIndex -> BS.ByteString
+serialise = toStrict . serialiseLBS
+
+-- we keep this version around just so we can check we got the size right.
+serialiseLBS :: TarIndex -> LBS.ByteString
+serialiseLBS index =
+    BS.toLazyByteStringWith
+      (BS.untrimmedStrategy (serialiseSize index) 512) LBS.empty
+      (serialiseBuilder index)
+
+serialiseSize :: TarIndex -> Int
+serialiseSize (TarIndex stringTable intTrie _) =
+    StringTable.serialiseSize stringTable
+  + IntTrie.serialiseSize intTrie
+  + 8
+
+serialiseBuilder :: TarIndex -> BS.Builder
+serialiseBuilder (TarIndex stringTable intTrie finalOffset) =
+     BS.word32BE 2 -- format version
+  <> BS.word32BE finalOffset
+  <> StringTable.serialise stringTable
+  <> IntTrie.serialise intTrie
+
+-- | Read the external representation back into a 'TarIndex'.
+--
+deserialise :: BS.ByteString -> Maybe (TarIndex, BS.ByteString)
+deserialise bs
+  | BS.length bs < 8
+  = Nothing
+
+  | let ver = readWord32BE bs 0
+  , ver == 1
+  = do let !finalOffset = readWord32BE bs 4
+       (stringTable, bs')  <- StringTable.deserialiseV1 (BS.drop 8 bs)
+       (intTrie,     bs'') <- IntTrie.deserialise bs'
+       return (TarIndex stringTable intTrie finalOffset, bs'')
+
+  | let ver = readWord32BE bs 0
+  , ver == 2
+  = do let !finalOffset = readWord32BE bs 4
+       (stringTable, bs')  <- StringTable.deserialiseV2 (BS.drop 8 bs)
+       (intTrie,     bs'') <- IntTrie.deserialise bs'
+       return (TarIndex stringTable intTrie finalOffset, bs'')
+
+  | otherwise = Nothing
+
+readWord32BE :: BS.ByteString -> Int -> Word32
+readWord32BE bs i =
+    assert (i >= 0 && i+3 <= BS.length bs - 1) $
+    fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
+  + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
+  + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
+  + fromIntegral (BS.unsafeIndex bs (i + 3))
+
+
+-------------------------
+-- Test properties
+--
+
+#ifdef TESTS
+
+-- Not quite the properties of a finite mapping because we also have lookups
+-- that result in completions.
+
+prop_lookup :: ValidPaths -> NonEmptyFilePath -> Bool
+prop_lookup (ValidPaths paths) (NonEmptyFilePath p) =
+  case (lookup index p, Prelude.lookup p paths) of
+    (Nothing,                    Nothing)          -> True
+    (Just (TarFileEntry offset), Just (_,offset')) -> offset == offset'
+    (Just (TarDir entries),      Nothing)          -> sort (nub (map fst entries))
+                                                   == sort (nub completions)
+    _                                              -> False
+  where
+    index       = construct paths
+    completions = [ head (FilePath.splitDirectories completion)
+                  | (path,_) <- paths
+                  , completion <- maybeToList $ stripPrefix (p ++ "/") path ]
+
+prop_toList :: ValidPaths -> Bool
+prop_toList (ValidPaths paths) =
+    sort (toList index)
+ == sort [ (path, off) | (path, (_sz, off)) <- paths ]
+  where
+    index = construct paths
+
+prop_valid :: ValidPaths -> Bool
+prop_valid (ValidPaths paths)
+  | not $ StringTable.prop_valid   pathbits = error "TarIndex: bad string table"
+  | not $ IntTrie.prop_lookup      intpaths = error "TarIndex: bad int trie"
+  | not $ IntTrie.prop_completions intpaths = error "TarIndex: bad int trie"
+  | not $ prop'                             = error "TarIndex: bad prop"
+  | otherwise                               = True
+
+  where
+    index@(TarIndex pathTable _ _) = construct paths
+
+    pathbits = concatMap (map BS.Char8.pack . FilePath.splitDirectories . fst)
+                         paths
+    intpaths = [ (cids, offset)
+               | (path, (_size, offset)) <- paths
+               , let Just cids = toComponentIds pathTable path ]
+    prop' = flip all paths $ \(file, (_size, offset)) ->
+      case lookup index file of
+        Just (TarFileEntry offset') -> offset' == offset
+        _                           -> False
+
+prop_serialise_deserialise :: ValidPaths -> Bool
+prop_serialise_deserialise (ValidPaths paths) =
+    Just (index, BS.empty) == (deserialise . serialise) index
+  where
+    index = construct paths
+
+prop_serialiseSize :: ValidPaths -> Bool
+prop_serialiseSize (ValidPaths paths) =
+    case (LBS.toChunks . serialiseLBS) index of
+      [c1] -> BS.length c1 == serialiseSize index
+      _    -> False
+  where
+    index = construct paths
+
+newtype NonEmptyFilePath = NonEmptyFilePath FilePath deriving Show
+
+instance Arbitrary NonEmptyFilePath where
+  arbitrary = NonEmptyFilePath . FilePath.joinPath
+                <$> listOf1 (elements ["a", "b", "c", "d"])
+
+newtype ValidPaths = ValidPaths [(FilePath, (Int64, TarEntryOffset))] deriving Show
+
+instance Arbitrary ValidPaths where
+  arbitrary = do
+      paths <- makeNoPrefix <$> listOf arbitraryPath
+      sizes <- vectorOf (length paths) (getNonNegative <$> arbitrary)
+      let offsets = scanl (\o sz -> o + 1 + blocks sz) 0 sizes
+      return (ValidPaths (zip paths (zip sizes offsets)))
+    where
+      arbitraryPath   = FilePath.joinPath
+                         <$> listOf1 (elements ["a", "b", "c", "d"])
+      makeNoPrefix [] = []
+      makeNoPrefix (k:ks)
+        | all (not . isPrefixOfOther k) ks
+                     = k : makeNoPrefix ks
+        | otherwise  =     makeNoPrefix ks
+
+      isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a
+
+      blocks :: Int64 -> TarEntryOffset
+      blocks size = fromIntegral (1 + ((size - 1) `div` 512))
+
+-- Helper for bulk construction.
+construct :: [(FilePath, (Int64, TarEntryOffset))] -> TarIndex
+construct =
+    either (\_ -> undefined) id
+  . build
+  . foldr (\(path, (size, _off)) es -> Next (testEntry path size) es) Done
+
+example0 :: Entries ()
+example0 =
+         testEntry "foo-1.0/foo-1.0.cabal" 1500 -- at block 0
+  `Next` testEntry "foo-1.0/LICENSE"       2000 -- at block 4
+  `Next` testEntry "foo-1.0/Data/Foo.hs"   1000 -- at block 9
+  `Next` Done
+
+example1 :: Entries ()
+example1 =
+  Next (testEntry "./" 1500) Done <> example0
+
+testEntry :: FilePath -> Int64 -> Entry
+testEntry name size = simpleEntry path (NormalFile mempty size)
+  where
+    Right path = toTarPath False name
+
+-- | Simple tar archive containing regular files only
+data SimpleTarArchive = SimpleTarArchive {
+    simpleTarEntries :: Tar.Entries ()
+  , simpleTarRaw     :: [(FilePath, LBS.ByteString)]
+  , simpleTarBS      :: LBS.ByteString
+  }
+
+instance Show SimpleTarArchive where
+  show = show . simpleTarRaw
+
+prop_index_matches_tar :: SimpleTarArchive -> Property
+prop_index_matches_tar sta =
+    ioProperty (try go >>= either (\e -> throwIO (e :: SomeException))
+                                  (\_ -> return True))
+  where
+    go :: IO ()
+    go = do
+      h <- HBS.readHandle True (simpleTarBS sta)
+      goEntries h 0 (simpleTarEntries sta)
+
+    goEntries :: Handle -> TarEntryOffset -> Tar.Entries () -> IO ()
+    goEntries _ _ Tar.Done =
+      return ()
+    goEntries _ _ (Tar.Fail _) =
+      throwIO (userError "Fail entry in SimpleTarArchive")
+    goEntries h offset (Tar.Next e es) = do
+      goEntry h offset e
+      goEntries h (nextEntryOffset e offset) es
+
+    goEntry :: Handle -> TarEntryOffset -> Tar.Entry -> IO ()
+    goEntry h offset e = do
+      e' <- hReadEntry h offset
+      case (Tar.entryContent e, Tar.entryContent e') of
+        (Tar.NormalFile bs sz, Tar.NormalFile bs' sz') ->
+          unless (sz == sz' && bs == bs') $
+            throwIO $ userError "Entry mismatch"
+        _otherwise ->
+          throwIO $ userError "unexpected entry types"
+
+instance Arbitrary SimpleTarArchive where
+  arbitrary = do
+      numEntries <- sized $ \n -> choose (0, n)
+      rawEntries <- mkRaw numEntries
+      let entries = mkList rawEntries
+      return SimpleTarArchive {
+          simpleTarEntries = mkEntries entries
+        , simpleTarRaw     = rawEntries
+        , simpleTarBS      = Tar.write entries
+        }
+    where
+      mkRaw :: Int -> Gen [(FilePath, LBS.ByteString)]
+      mkRaw 0 = return []
+      mkRaw n = do
+         -- Pick a size around 0, 1, or 2 block boundaries
+         sz <- sized $ \n -> elements (take n fileSizes)
+         bs <- LBS.pack `fmap` vectorOf sz arbitrary
+         es <- mkRaw (n - 1)
+         return $ ("file" ++ show n, bs) : es
+
+      mkList :: [(FilePath, LBS.ByteString)] -> [Tar.Entry]
+      mkList []            = []
+      mkList ((fp, bs):es) = entry : mkList es
+        where
+          Right path = toTarPath False fp
+          entry   = simpleEntry path content
+          content = NormalFile bs (LBS.length bs)
+
+      mkEntries :: [Tar.Entry] -> Tar.Entries ()
+      mkEntries []     = Tar.Done
+      mkEntries (e:es) = Tar.Next e (mkEntries es)
+
+      -- Sizes around 0, 1, and 2 block boundaries
+      fileSizes :: [Int]
+      fileSizes = [
+                           0 ,    1 ,    2
+        ,  510 ,  511 ,  512 ,  513 ,  514
+        , 1022 , 1023 , 1024 , 1025 , 1026
+        ]
+
+-- | 'IndexBuilder' constructed from a 'SimpleIndex'
+newtype SimpleIndexBuilder = SimpleIndexBuilder IndexBuilder
+  deriving Show
+
+instance Arbitrary SimpleIndexBuilder where
+  arbitrary = SimpleIndexBuilder . build' . simpleTarEntries <$> arbitrary
+    where
+      -- like 'build', but don't finalize
+      build' :: Show e => Entries e -> IndexBuilder
+      build' = go empty
+        where
+          go !builder (Next e es) = go (addNextEntry e builder) es
+          go !builder  Done       = builder
+          go !_       (Fail err)  = error (show err)
+
+prop_finalise_unfinalise :: SimpleIndexBuilder -> Bool
+prop_finalise_unfinalise (SimpleIndexBuilder index) =
+    unfinalise (finalise index) == index
+
+#endif
+
+toStrict :: LBS.ByteString -> BS.ByteString
+#if MIN_VERSION_bytestring(0,10,0)
+toStrict = LBS.toStrict
+#else
+toStrict = BS.concat . LBS.toChunks
+#endif
+
+#if !(MIN_VERSION_base(4,5,0))
+(<>) :: Monoid m => m -> m -> m
+(<>) = mappend
+#endif
diff --git a/Codec/Archive/Tar/Index/IntTrie.hs b/Codec/Archive/Tar/Index/IntTrie.hs
new file mode 100644
index 0000000..4e8fe8f
--- /dev/null
+++ b/Codec/Archive/Tar/Index/IntTrie.hs
@@ -0,0 +1,607 @@
+{-# LANGUAGE CPP, BangPatterns, PatternGuards #-}
+{-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-}
+
+module Codec.Archive.Tar.Index.IntTrie (
+
+  IntTrie,
+  construct,
+  toList,
+
+  IntTrieBuilder,
+  empty,
+  insert,
+  finalise,
+  unfinalise,
+
+  lookup,
+  TrieLookup(..),
+
+  serialise,
+  serialiseSize,
+  deserialise,
+
+#ifdef TESTS
+  test1, test2, test3,
+  ValidPaths(..),
+  prop_lookup,
+  prop_completions,
+  prop_lookup_mono,
+  prop_completions_mono,
+  prop_construct_toList,
+  prop_finalise_unfinalise,
+  prop_serialise_deserialise,
+  prop_serialiseSize,
+#endif
+ ) where
+
+import Prelude hiding (lookup)
+
+import Data.Typeable (Typeable)
+
+import qualified Data.Array.Unboxed as A
+import Data.Array.IArray  ((!))
+import qualified Data.Bits as Bits
+import Data.Word (Word32)
+import Data.Bits
+import Data.Monoid (Monoid(..))
+#if (MIN_VERSION_base(4,5,0))
+import Data.Monoid ((<>))
+#endif
+import qualified Data.ByteString        as BS
+import qualified Data.ByteString.Lazy   as LBS
+import qualified Data.ByteString.Unsafe as BS
+#if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
+import Data.ByteString.Builder          as BS
+#else
+import Data.ByteString.Lazy.Builder     as BS
+#endif
+import Control.Exception (assert)
+#if MIN_VERSION_containers(0,5,0)
+import qualified Data.Map.Strict        as Map
+import qualified Data.IntMap.Strict     as IntMap
+import Data.IntMap.Strict (IntMap)
+#else
+import qualified Data.Map               as Map
+import qualified Data.IntMap            as IntMap
+import Data.IntMap (IntMap)
+#endif
+
+import Data.List hiding (lookup, insert)
+import Data.Function (on)
+
+#ifdef TESTS
+import Test.QuickCheck
+import Control.Applicative ((<$>), (<*>))
+#endif
+
+
+-- | A compact mapping from sequences of nats to nats.
+--
+-- NOTE: The tries in this module have values /only/ at the leaves (which
+-- correspond to files), they do not have values at the branch points (which
+-- correspond to directories).
+newtype IntTrie k v = IntTrie (A.UArray Word32 Word32)
+    deriving (Eq, Show, Typeable)
+
+
+-- Compact, read-only implementation of a trie. It's intended for use with file
+-- paths, but we do that via string ids.
+
+#ifdef TESTS
+-- Example mapping:
+--
+example0 :: [(FilePath, Int)]
+example0 =
+  [("foo-1.0/foo-1.0.cabal", 512)   -- tar block 1
+  ,("foo-1.0/LICENSE",       2048)  -- tar block 4
+  ,("foo-1.0/Data/Foo.hs",   4096)] -- tar block 8
+
+-- After converting path components to integers this becomes:
+--
+example1 :: [([Word32], Word32)]
+example1 =
+  [([1,2],   512)
+  ,([1,3],   2048)
+  ,([1,4,5], 4096)]
+
+-- As a trie this looks like:
+
+--  [ (1, *) ]
+--        |
+--        [ (2, 512), (3, 1024), (4, *) ]
+--                                   |
+--                                   [ (5, 4096) ]
+
+-- We use an intermediate trie representation
+
+mktrie :: [(Int, TrieNode k v)] -> IntTrieBuilder k v
+mkleaf :: (Enum k, Enum v) => k -> v                  -> (Int, TrieNode k v)
+mknode ::  Enum k          => k -> IntTrieBuilder k v -> (Int, TrieNode k v)
+
+mktrie = IntTrieBuilder . IntMap.fromList
+mkleaf k v = (fromEnum k, TrieLeaf (enumToWord32 v))
+mknode k t = (fromEnum k, TrieNode t) 
+
+example2 :: IntTrieBuilder Word32 Word32
+example2 = mktrie [ mknode 1 t1 ]
+  where
+    t1   = mktrie [ mkleaf 2 512, mkleaf 3 2048, mknode 4 t2 ]
+    t2   = mktrie [ mkleaf 5 4096 ]
+
+
+example2' :: IntTrieBuilder Word32 Word32
+example2' = mktrie [ mknode 0 t1 ]
+  where
+    t1   = mktrie [ mknode 3 t2 ]
+    t2   = mktrie [ mknode 1 t3, mknode 2 t4 ]
+    t3   = mktrie [ mkleaf 4 10608 ]
+    t4   = mktrie [ mkleaf 4 10612 ]
+{-
+0: [1,N0,3]
+
+  3: [1,N3,6]
+
+   6: [2,N1,N2,11,12]
+
+     11: [1,4,10608]
+     14: [1,4,10612]
+-}
+
+example2'' :: IntTrieBuilder Word32 Word32
+example2'' = mktrie [ mknode 1 t1, mknode 2 t2 ]
+  where
+    t1   = mktrie [ mkleaf 4 10608 ]
+    t2   = mktrie [ mkleaf 4 10612 ]
+
+example2''' :: IntTrieBuilder Word32 Word32
+example2''' = mktrie [ mknode 0 t3 ]
+  where
+    t3  = mktrie [ mknode 4 t8, mknode 6 t11 ]
+    t8  = mktrie [ mknode 1 t14 ]
+    t11 = mktrie [ mkleaf 5 10605 ]
+    t14 = mktrie [ mknode 2 t19, mknode 3 t22 ]
+    t19 = mktrie [ mkleaf 7 10608 ]
+    t22 = mktrie [ mkleaf 7 10612 ]
+{-
+ 0: [1,N0,3]
+ 3: [2,N4,N6,8,11]
+ 8: [1,N1,11]
+11: [1,5,10605]
+14: [2,N2,N3,16,19]
+19: [1,7,10608]
+22: [1,7,10612]
+-}
+
+-- We convert from the 'Paths' to the 'IntTrieBuilder' using 'inserts':
+--
+test1 = example2 == inserts example1 empty
+#endif
+
+-- Each node has a size and a sequence of keys followed by an equal length
+-- sequnce of corresponding entries. Since we're going to flatten this into
+-- a single array then we will need to replace the trie structure with pointers
+-- represented as array offsets.
+
+-- Each node is a pair of arrays, one of keys and one of Either value pointer.
+-- We need to distinguish values from internal pointers. We use a tag bit:
+--
+tagLeaf, tagNode, untag :: Word32 -> Word32
+tagLeaf = id
+tagNode = flip Bits.setBit   31
+untag   = flip Bits.clearBit 31
+
+isNode :: Word32 -> Bool
+isNode = flip Bits.testBit 31
+
+-- So the overall array form of the above trie is:
+--
+-- offset:   0   1    2    3   4  5  6    7    8     9     10  11  12
+-- array:  [ 1 | N1 | 3 ][ 3 | 2, 3, N4 | 512, 2048, 10 ][ 1 | 5 | 4096 ]
+--                     \__/                           \___/
+
+#ifdef TESTS
+example3 :: [Word32]
+example3 =
+ [1, tagNode 1,
+     3,
+  3, tagLeaf 2, tagLeaf 3, tagNode 4,
+     512,       2048,      10,
+  1, tagLeaf 5,
+     4096
+ ]
+
+-- We get the array form by using flattenTrie:
+
+test2 = example3 == flattenTrie example2
+
+example4 :: IntTrie Int Int
+example4 = IntTrie (mkArray example3)
+
+mkArray :: [Word32] -> A.UArray Word32 Word32
+mkArray xs = A.listArray (0, fromIntegral (length xs) - 1) xs
+
+test3 = case lookup example4 [1] of
+          Just (Completions [(2,_),(3,_),(4,_)]) -> True
+          _                          -> False
+
+test1, test2, test3 :: Bool
+#endif
+
+-------------------------------------
+-- Decoding the trie array form
+--
+
+completionsFrom :: (Enum k, Enum v) => IntTrie k v -> Word32 -> Completions k v
+completionsFrom trie@(IntTrie arr) nodeOff =
+    [ (word32ToEnum (untag key), next)
+    | keyOff <- [keysStart..keysEnd]
+    , let key   = arr ! keyOff
+          entry = arr ! (keyOff + nodeSize)
+          next | isNode key = Completions (completionsFrom trie entry)
+               | otherwise  = Entry (word32ToEnum entry)
+    ]
+  where
+    nodeSize  = arr ! nodeOff
+    keysStart = nodeOff + 1
+    keysEnd   = nodeOff + nodeSize
+
+-- | Convert the trie to a list
+--
+-- This is the left inverse to 'construct' (modulo ordering).
+toList :: forall k v. (Enum k, Enum v) => IntTrie k v -> [([k], v)]
+toList = concatMap (aux []) . (`completionsFrom` 0)
+  where
+    aux :: [k] -> (k, TrieLookup k v) -> [([k], v)]
+    aux ks (k, Entry v)        = [(reverse (k:ks), v)]
+    aux ks (k, Completions cs) = concatMap (aux (k:ks)) cs
+
+-------------------------------------
+-- Toplevel trie array construction
+--
+
+-- So constructing the 'IntTrie' as a whole is just a matter of stringing
+-- together all the bits
+
+-- | Build an 'IntTrie' from a bunch of (key, value) pairs, where the keys
+-- are sequences.
+--
+construct :: (Enum k, Enum v) => [([k], v)] -> IntTrie k v
+construct = finalise . flip inserts empty
+
+
+---------------------------------
+-- Looking up in the trie array
+--
+
+data TrieLookup  k v = Entry !v | Completions (Completions k v) deriving Show
+type Completions k v = [(k, TrieLookup k v)]
+
+lookup :: forall k v. (Enum k, Enum v) => IntTrie k v -> [k] -> Maybe (TrieLookup k v)
+lookup trie@(IntTrie arr) = go 0
+  where
+    go :: Word32 -> [k] -> Maybe (TrieLookup k v)
+    go nodeOff []     = Just (completions nodeOff)
+    go nodeOff (k:ks) = case search nodeOff (tagLeaf k') of
+      Just entryOff
+        | null ks   -> Just (entry entryOff)
+        | otherwise -> Nothing
+      Nothing       -> case search nodeOff (tagNode k') of
+        Nothing       -> Nothing
+        Just entryOff -> go (arr ! entryOff) ks
+      where
+        k' = enumToWord32 k
+
+    entry       entryOff = Entry (word32ToEnum (arr ! entryOff))
+    completions nodeOff  = Completions (completionsFrom trie nodeOff)
+
+    search :: Word32 -> Word32 -> Maybe Word32
+    search nodeOff key = fmap (+nodeSize) (bsearch keysStart keysEnd key)
+      where
+        nodeSize  = arr ! nodeOff
+        keysStart = nodeOff + 1
+        keysEnd   = nodeOff + nodeSize
+
+    bsearch :: Word32 -> Word32 -> Word32 -> Maybe Word32
+    bsearch a b key
+      | a > b     = Nothing
+      | otherwise = case compare key (arr ! mid) of
+          LT -> bsearch a (mid-1) key
+          EQ -> Just mid
+          GT -> bsearch (mid+1) b key
+      where mid = (a + b) `div` 2
+
+
+enumToWord32 :: Enum n => n -> Word32
+enumToWord32 = fromIntegral . fromEnum
+
+word32ToEnum :: Enum n => Word32 -> n
+word32ToEnum = toEnum . fromIntegral
+
+
+-------------------------
+-- Building Tries
+--
+
+newtype IntTrieBuilder k v = IntTrieBuilder (IntMap (TrieNode k v))
+  deriving (Show, Eq)
+
+data TrieNode k v = TrieLeaf {-# UNPACK #-} !Word32
+                  | TrieNode !(IntTrieBuilder k v)
+  deriving (Show, Eq)
+
+empty :: IntTrieBuilder k v
+empty = IntTrieBuilder IntMap.empty
+
+insert :: (Enum k, Enum v) => [k] -> v
+       -> IntTrieBuilder k v -> IntTrieBuilder k v
+insert []    _v t = t
+insert (k:ks) v t = insertTrie (fromEnum k) (map fromEnum ks) (enumToWord32 v) t
+
+insertTrie :: Int -> [Int] -> Word32
+           -> IntTrieBuilder k v -> IntTrieBuilder k v
+insertTrie k ks v (IntTrieBuilder t) =
+    IntTrieBuilder $
+      IntMap.alter (\t' -> Just $! maybe (freshTrieNode  ks v)
+                                         (insertTrieNode ks v) t')
+                   k t
+
+insertTrieNode :: [Int] -> Word32 -> TrieNode k v -> TrieNode k v
+insertTrieNode []     v  _           = TrieLeaf v
+insertTrieNode (k:ks) v (TrieLeaf _) = TrieNode (freshTrie  k ks v)
+insertTrieNode (k:ks) v (TrieNode t) = TrieNode (insertTrie k ks v t)
+
+freshTrie :: Int -> [Int] -> Word32 -> IntTrieBuilder k v
+freshTrie k []      v =
+  IntTrieBuilder (IntMap.singleton k (TrieLeaf v))
+freshTrie k (k':ks) v =
+  IntTrieBuilder (IntMap.singleton k (TrieNode (freshTrie k' ks v)))
+
+freshTrieNode :: [Int] -> Word32 -> TrieNode k v
+freshTrieNode []     v = TrieLeaf v
+freshTrieNode (k:ks) v = TrieNode (freshTrie k ks v)
+
+inserts :: (Enum k, Enum v) => [([k], v)]
+        -> IntTrieBuilder k v -> IntTrieBuilder k v
+inserts kvs t = foldl' (\t' (ks, v) -> insert ks v t') t kvs
+
+finalise :: IntTrieBuilder k v -> IntTrie k v
+finalise trie =
+    IntTrie $
+      A.listArray (0, fromIntegral (flatTrieLength trie) - 1)
+                  (flattenTrie trie)
+
+unfinalise :: (Enum k, Enum v) => IntTrie k v -> IntTrieBuilder k v
+unfinalise trie =
+    go (completionsFrom trie 0)
+  where
+    go kns =
+      IntTrieBuilder $
+        IntMap.fromList
+          [ (fromEnum k, t)
+          | (k, n) <- kns
+          , let t = case n of
+                      Entry       v    -> TrieLeaf (enumToWord32 v)
+                      Completions kns' -> TrieNode (go kns')
+          ]
+
+---------------------------------
+-- Flattening Tries
+--
+
+type Offset = Int
+
+flatTrieLength :: IntTrieBuilder k v -> Int
+flatTrieLength (IntTrieBuilder tns) =
+    1
+  + 2 * IntMap.size tns
+  + sum [ flatTrieLength n | TrieNode n <- IntMap.elems tns ]
+
+-- This is a breadth-first traversal. We keep a list of the tries that we are
+-- to write out next. Each of these have an offset allocated to them at the
+-- time we put them into the list. We keep a running offset so we know where
+-- to allocate next.
+--
+flattenTrie :: IntTrieBuilder k v -> [Word32]
+flattenTrie trie = go (queue [trie]) (size trie)
+  where
+    size (IntTrieBuilder tns) = 1 + 2 * IntMap.size tns
+
+    go :: Q (IntTrieBuilder k v) -> Offset -> [Word32]
+    go todo !offset =
+      case dequeue todo of
+        Nothing                   -> []
+        Just (IntTrieBuilder tnodes, tries) ->
+            flat ++ go tries' offset'
+          where
+            !count = IntMap.size tnodes
+            flat   = fromIntegral count
+                   : Map.keys  keysValues
+                  ++ Map.elems keysValues
+            (!offset', !keysValues, !tries') =
+#if MIN_VERSION_containers(0,4,2)
+              IntMap.foldlWithKey' accumNodes
+                                   (offset, Map.empty, tries)
+                                   tnodes
+#else
+              foldl' (\a (k,v) -> accumNodes a k v)
+                     (offset, Map.empty, tries)
+                     (IntMap.toList tnodes)
+#endif
+
+    accumNodes :: (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v))
+               -> Int -> TrieNode k v
+               -> (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v))
+    accumNodes (!off, !kvs, !tries) !k (TrieLeaf v) =
+        (off, kvs', tries)
+      where
+        kvs' = Map.insert (tagLeaf (int2Word32 k)) v kvs
+
+    accumNodes (!off, !kvs, !tries) !k (TrieNode t) =
+        (off + size t, kvs', tries')
+      where
+        kvs'   = Map.insert (tagNode (int2Word32 k)) (int2Word32 off) kvs
+        tries' = enqueue tries t
+
+data Q a = Q [a] [a]
+
+queue :: [a] -> Q a
+queue xs = Q xs []
+
+enqueue :: Q a -> a -> Q a
+enqueue (Q front  back) x = Q front (x : back)
+
+dequeue :: Q a -> Maybe (a, Q a)
+dequeue (Q (x:xs) back)    = Just (x, Q xs back)
+dequeue (Q []     back)    = case reverse back of
+                               x:xs -> Just (x, Q xs [])
+                               []   -> Nothing
+
+int2Word32 :: Int -> Word32
+int2Word32 = fromIntegral
+
+
+-------------------------
+-- (de)serialisation
+--
+
+serialise :: IntTrie k v -> BS.Builder
+serialise (IntTrie arr) =
+    let (_, !ixEnd) = A.bounds arr in
+    BS.word32BE (ixEnd+1)
+ <> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems arr)
+
+serialiseSize :: IntTrie k v -> Int
+serialiseSize (IntTrie arr) =
+    let (_, ixEnd) = A.bounds arr in
+    4
+  + 4 * (fromIntegral ixEnd + 1)
+
+deserialise :: BS.ByteString -> Maybe (IntTrie k v, BS.ByteString)
+deserialise bs
+  | BS.length bs >= 4
+  , let lenArr   = readWord32BE bs 0
+        lenTotal = 4 + 4 * fromIntegral lenArr
+  , BS.length bs >= 4 + 4 * fromIntegral lenArr
+  , let !arr = A.array (0, lenArr-1)
+                      [ (i, readWord32BE bs off)
+                      | (i, off) <- zip [0..lenArr-1] [4,8 .. lenTotal - 4] ]
+        !bs' = BS.drop lenTotal bs
+  = Just (IntTrie arr, bs')
+
+  | otherwise
+  = Nothing
+
+readWord32BE :: BS.ByteString -> Int -> Word32
+readWord32BE bs i =
+    assert (i >= 0 && i+3 <= BS.length bs - 1) $
+    fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
+  + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
+  + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
+  + fromIntegral (BS.unsafeIndex bs (i + 3))
+
+
+-------------------------
+-- Correctness property
+--
+
+#ifdef TESTS
+
+prop_lookup :: (Ord k, Enum k, Eq v, Enum v, Show k, Show v)
+            => [([k], v)] -> Bool
+prop_lookup paths =
+  flip all paths $ \(key, value) ->
+    case lookup trie key of
+      Just (Entry value') | value' == value -> True
+      Just (Entry value')   -> error $ "IntTrie: " ++ show (key, value, value')
+      Nothing               -> error $ "IntTrie: didn't find " ++ show key
+      Just (Completions xs) -> error $ "IntTrie: " ++ show xs
+
+  where
+    trie = construct paths
+
+prop_completions :: forall k v. (Ord k, Enum k, Eq v, Enum v) => [([k], v)] -> Bool
+prop_completions paths =
+    inserts paths empty 
+ == convertCompletions (completionsFrom (construct paths) 0)
+  where
+    convertCompletions :: Ord k => Completions k v -> IntTrieBuilder k v
+    convertCompletions kls =
+      IntTrieBuilder $
+        IntMap.fromList
+          [ case l of
+              Entry v          -> mkleaf k v
+              Completions kls' -> mknode k (convertCompletions kls')
+          | (k, l) <- sortBy (compare `on` fst) kls ]
+
+
+prop_lookup_mono :: ValidPaths -> Bool
+prop_lookup_mono (ValidPaths paths) = prop_lookup paths
+
+prop_completions_mono :: ValidPaths -> Bool
+prop_completions_mono (ValidPaths paths) = prop_completions paths
+
+prop_construct_toList :: ValidPaths -> Bool
+prop_construct_toList (ValidPaths paths) =
+       sortBy (compare `on` fst) (toList (construct paths))
+    == sortBy (compare `on` fst) paths
+
+prop_finalise_unfinalise :: ValidPaths -> Bool
+prop_finalise_unfinalise (ValidPaths paths) =
+    builder == unfinalise (finalise builder)
+  where
+    builder :: IntTrieBuilder Char Char
+    builder = inserts paths empty
+
+prop_serialise_deserialise :: ValidPaths -> Bool
+prop_serialise_deserialise (ValidPaths paths) =
+    Just (trie, BS.empty) == (deserialise
+                            . toStrict . BS.toLazyByteString
+                            . serialise) trie
+  where
+    trie :: IntTrie Char Char
+    trie = construct paths
+
+prop_serialiseSize :: ValidPaths -> Bool
+prop_serialiseSize (ValidPaths paths) =
+    (fromIntegral . LBS.length . BS.toLazyByteString . serialise) trie
+ == serialiseSize trie
+  where
+    trie :: IntTrie Char Char
+    trie = construct paths
+
+newtype ValidPaths = ValidPaths [([Char], Char)] deriving Show
+
+instance Arbitrary ValidPaths where
+  arbitrary =
+      ValidPaths . makeNoPrefix <$> listOf ((,) <$> listOf1 arbitrary <*> arbitrary)
+    where
+      makeNoPrefix [] = []
+      makeNoPrefix ((k,v):kvs)
+        | all (\(k', _) -> not (isPrefixOfOther k k')) kvs
+                     = (k,v) : makeNoPrefix kvs
+        | otherwise  =         makeNoPrefix kvs
+
+  shrink (ValidPaths kvs) =
+      map ValidPaths . filter noPrefix . filter nonEmpty . shrink $ kvs
+    where
+      noPrefix []           = True
+      noPrefix ((k,_):kvs') = all (\(k', _) -> not (isPrefixOfOther k k')) kvs'
+                           && noPrefix kvs'
+      nonEmpty = all (not . null . fst)
+
+isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a
+
+toStrict :: LBS.ByteString -> BS.ByteString
+#if MIN_VERSION_bytestring(0,10,0)
+toStrict = LBS.toStrict
+#else
+toStrict = BS.concat . LBS.toChunks
+#endif
+
+#endif
+
+#if !(MIN_VERSION_base(4,5,0))
+(<>) :: Monoid m => m -> m -> m
+(<>) = mappend
+#endif
+
diff --git a/Codec/Archive/Tar/Index/StringTable.hs b/Codec/Archive/Tar/Index/StringTable.hs
new file mode 100644
index 0000000..688bd80
--- /dev/null
+++ b/Codec/Archive/Tar/Index/StringTable.hs
@@ -0,0 +1,324 @@
+{-# LANGUAGE CPP, BangPatterns, PatternGuards, DeriveDataTypeable #-}
+
+module Codec.Archive.Tar.Index.StringTable (
+
+    StringTable,
+    lookup,
+    index,
+    construct,
+
+    StringTableBuilder,
+    empty,
+    insert,
+    inserts,
+    finalise,
+    unfinalise,
+
+    serialise,
+    serialiseSize,
+    deserialiseV1,
+    deserialiseV2,
+
+#ifdef TESTS
+    prop_valid,
+    prop_sorted,
+    prop_finalise_unfinalise,
+    prop_serialise_deserialise,
+    prop_serialiseSize,
+#endif
+ ) where
+
+import Data.Typeable (Typeable)
+
+import Prelude   hiding (lookup, id)
+import Data.List hiding (lookup, insert)
+import Data.Function (on)
+import Data.Word (Word32)
+import Data.Int  (Int32)
+import Data.Bits
+import Data.Monoid (Monoid(..))
+#if (MIN_VERSION_base(4,5,0))
+import Data.Monoid ((<>))
+#endif
+import Control.Exception (assert)
+
+import qualified Data.Array.Unboxed as A
+import           Data.Array.Unboxed ((!))
+#if MIN_VERSION_containers(0,5,0)
+import qualified Data.Map.Strict        as Map
+import           Data.Map.Strict (Map)
+#else
+import qualified Data.Map               as Map
+import           Data.Map (Map)
+#endif
+import qualified Data.ByteString        as BS
+import qualified Data.ByteString.Unsafe as BS
+import qualified Data.ByteString.Lazy   as LBS
+#if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
+import Data.ByteString.Builder          as BS
+import Data.ByteString.Builder.Extra    as BS (byteStringCopy)
+#else
+import Data.ByteString.Lazy.Builder     as BS
+import Data.ByteString.Lazy.Builder.Extras as BS (byteStringCopy)
+#endif
+
+
+-- | An effecient mapping from strings to a dense set of integers.
+--
+data StringTable id = StringTable
+         {-# UNPACK #-} !BS.ByteString           -- all strings concatenated
+         {-# UNPACK #-} !(A.UArray Int32 Word32) -- string offset table
+         {-# UNPACK #-} !(A.UArray Int32 Int32)  -- string index to id table
+         {-# UNPACK #-} !(A.UArray Int32 Int32)  -- string id to index table
+  deriving (Show, Typeable)
+
+instance (Eq id, Enum id) => Eq (StringTable id) where
+  tbl1 == tbl2 = unfinalise tbl1 == unfinalise tbl2
+
+-- | Look up a string in the token table. If the string is present, return
+-- its corresponding index.
+--
+lookup :: Enum id => StringTable id -> BS.ByteString -> Maybe id
+lookup (StringTable bs offsets ids _ixs) str =
+    binarySearch 0 (topBound-1) str
+  where
+    (0, topBound) = A.bounds offsets
+
+    binarySearch !a !b !key
+      | a > b     = Nothing
+      | otherwise = case compare key (index' bs offsets mid) of
+          LT -> binarySearch a (mid-1) key
+          EQ -> Just $! toEnum (fromIntegral (ids ! mid))
+          GT -> binarySearch (mid+1) b key
+      where mid = (a + b) `div` 2
+
+index' :: BS.ByteString -> A.UArray Int32 Word32 -> Int32 -> BS.ByteString
+index' bs offsets i = BS.unsafeTake len . BS.unsafeDrop start $ bs
+  where
+    start, end, len :: Int
+    start = fromIntegral (offsets ! i)
+    end   = fromIntegral (offsets ! (i+1))
+    len   = end - start
+
+
+-- | Given the index of a string in the table, return the string.
+--
+index :: Enum id => StringTable id -> id -> BS.ByteString
+index (StringTable bs offsets _ids ixs) =
+    index' bs offsets . (ixs !) . fromIntegral . fromEnum
+
+
+-- | Given a list of strings, construct a 'StringTable' mapping those strings
+-- to a dense set of integers. Also return the ids for all the strings used
+-- in the construction.
+--
+construct :: Enum id => [BS.ByteString] -> StringTable id
+construct = finalise . foldl' (\tbl s -> fst (insert s tbl)) empty
+
+
+data StringTableBuilder id = StringTableBuilder
+                                              !(Map BS.ByteString id)
+                               {-# UNPACK #-} !Word32
+  deriving (Eq, Show, Typeable)
+
+empty :: StringTableBuilder id
+empty = StringTableBuilder Map.empty 0
+
+insert :: Enum id => BS.ByteString -> StringTableBuilder id -> (StringTableBuilder id, id)
+insert str builder@(StringTableBuilder smap nextid) =
+    case Map.lookup str smap of
+      Just id -> (builder, id)
+      Nothing -> let !id   = toEnum (fromIntegral nextid)
+                     !smap' = Map.insert str id smap
+                   in (StringTableBuilder smap' (nextid+1), id)
+
+inserts :: Enum id => [BS.ByteString] -> StringTableBuilder id -> (StringTableBuilder id, [id])
+inserts bss builder = mapAccumL (flip insert) builder bss
+
+finalise :: Enum id => StringTableBuilder id -> StringTable id
+finalise (StringTableBuilder smap _) =
+    (StringTable strs offsets ids ixs)
+  where
+    strs    = BS.concat (Map.keys smap)
+    offsets = A.listArray (0, fromIntegral (Map.size smap))
+            . scanl (\off str -> off + fromIntegral (BS.length str)) 0
+            $ Map.keys smap
+    ids     = A.listArray (0, fromIntegral (Map.size smap) - 1)
+            . map (fromIntegral . fromEnum)
+            $ Map.elems smap
+    ixs     = A.array (A.bounds ids) [ (id,ix) | (ix,id) <- A.assocs ids ]
+
+unfinalise :: Enum id => StringTable id -> StringTableBuilder id
+unfinalise (StringTable strs offsets ids _) =
+    StringTableBuilder smap nextid
+  where
+    smap   = Map.fromAscList
+               [ (index' strs offsets ix, toEnum (fromIntegral (ids ! ix)))
+               | ix <- [0..h] ]
+    (0,h)  = A.bounds ids
+    nextid = fromIntegral (h+1)
+
+
+-------------------------
+-- (de)serialisation
+--
+
+serialise :: StringTable id -> BS.Builder
+serialise (StringTable strs offs ids ixs) =
+      let (_, !ixEnd) = A.bounds offs in
+
+      BS.word32BE (fromIntegral (BS.length strs))
+   <> BS.word32BE (fromIntegral ixEnd + 1)
+   <> BS.byteStringCopy strs
+   <> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems offs)
+   <> foldr (\n r -> BS.int32BE  n <> r) mempty (A.elems ids)
+   <> foldr (\n r -> BS.int32BE  n <> r) mempty (A.elems ixs)
+
+serialiseSize :: StringTable id -> Int
+serialiseSize (StringTable strs offs _ids _ixs) =
+    let (_, !ixEnd) = A.bounds offs
+     in 4 * 2
+      + BS.length strs
+      + 4 * (fromIntegral ixEnd + 1)
+      + 8 *  fromIntegral ixEnd
+
+deserialiseV1 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString)
+deserialiseV1 bs
+  | BS.length bs >= 8
+  , let lenStrs = fromIntegral (readWord32BE bs 0)
+        lenArr  = fromIntegral (readWord32BE bs 4)
+        lenTotal= 8 + lenStrs + 4 * lenArr
+  , BS.length bs >= lenTotal
+  , let strs = BS.take lenStrs (BS.drop 8 bs)
+        arr  = A.array (0, fromIntegral lenArr - 1)
+                       [ (i, readWord32BE bs off)
+                       | (i, off) <- zip [0 .. fromIntegral lenArr - 1]
+                                         [offArrS,offArrS+4 .. offArrE]
+                       ]
+        ids  = A.array (0, fromIntegral lenArr - 1)
+                       [ (i,i) | i <- [0 .. fromIntegral lenArr - 1] ]
+        ixs  = ids -- two identity mappings
+        offArrS = 8 + lenStrs
+        offArrE = offArrS + 4 * lenArr - 1
+        !stringTable = StringTable strs arr ids ixs
+        !bs'         = BS.drop lenTotal bs
+  = Just (stringTable, bs')
+
+  | otherwise
+  = Nothing
+
+deserialiseV2 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString)
+deserialiseV2 bs
+  | BS.length bs >= 8
+  , let lenStrs = fromIntegral (readWord32BE bs 0)
+        lenArr  = fromIntegral (readWord32BE bs 4)
+        lenTotal= 8                   -- the two length prefixes
+                + lenStrs
+                + 4 * lenArr
+                +(4 * (lenArr - 1)) * 2 -- offsets array is 1 longer
+  , BS.length bs >= lenTotal
+  , let strs = BS.take lenStrs (BS.drop 8 bs)
+        offs = A.listArray (0, fromIntegral lenArr - 1)
+                           [ readWord32BE bs off
+                           | off <- offsets offsOff ]
+        -- the second two arrays are 1 shorter
+        ids  = A.listArray (0, fromIntegral lenArr - 2)
+                           [ readInt32BE bs off
+                           | off <- offsets idsOff ]
+        ixs  = A.listArray (0, fromIntegral lenArr - 2)
+                           [ readInt32BE bs off
+                           | off <- offsets ixsOff ]
+        offsOff = 8 + lenStrs
+        idsOff  = offsOff + 4 * lenArr
+        ixsOff  = idsOff  + 4 * (lenArr-1)
+        offsets from = [from,from+4 .. from + 4 * (lenArr - 1)]
+        !stringTable = StringTable strs offs ids ixs
+        !bs'         = BS.drop lenTotal bs
+  = Just (stringTable, bs')
+
+  | otherwise
+  = Nothing
+
+readInt32BE :: BS.ByteString -> Int -> Int32
+readInt32BE bs i = fromIntegral (readWord32BE bs i)
+
+readWord32BE :: BS.ByteString -> Int -> Word32
+readWord32BE bs i =
+    assert (i >= 0 && i+3 <= BS.length bs - 1) $
+    fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
+  + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
+  + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
+  + fromIntegral (BS.unsafeIndex bs (i + 3))
+
+#ifdef TESTS
+
+prop_valid :: [BS.ByteString] -> Bool
+prop_valid strs =
+     all lookupIndex (enumStrings tbl)
+  && all indexLookup (enumIds tbl)
+
+  where
+    tbl :: StringTable Int
+    tbl = construct strs
+
+    lookupIndex str = index tbl ident == str
+      where Just ident = lookup tbl str
+
+    indexLookup ident = lookup tbl str == Just ident
+      where str       = index tbl ident
+
+-- this is important so we can use Map.fromAscList
+prop_sorted :: [BS.ByteString] -> Bool
+prop_sorted strings =
+    isSorted [ index' strs offsets ix
+             | ix <- A.range (A.bounds ids) ]
+  where
+    _tbl :: StringTable Int
+    _tbl@(StringTable strs offsets ids _ixs) = construct strings
+    isSorted xs = and (zipWith (<) xs (tail xs))
+
+prop_finalise_unfinalise :: [BS.ByteString] -> Bool
+prop_finalise_unfinalise strs =
+    builder == unfinalise (finalise builder)
+  where
+    builder :: StringTableBuilder Int
+    builder = foldl' (\tbl s -> fst (insert s tbl)) empty strs
+
+prop_serialise_deserialise :: [BS.ByteString] -> Bool
+prop_serialise_deserialise strs =
+    Just (strtable, BS.empty) == (deserialiseV2
+                                . toStrict . BS.toLazyByteString
+                                . serialise) strtable
+  where
+    strtable :: StringTable Int
+    strtable = construct strs
+
+prop_serialiseSize :: [BS.ByteString] -> Bool
+prop_serialiseSize strs =
+    (fromIntegral . LBS.length . BS.toLazyByteString . serialise) strtable
+ == serialiseSize strtable
+  where
+    strtable :: StringTable Int
+    strtable = construct strs
+
+enumStrings :: Enum id => StringTable id -> [BS.ByteString]
+enumStrings (StringTable bs offsets _ _) = map (index' bs offsets) [0..h-1]
+  where (0,h) = A.bounds offsets
+
+enumIds :: Enum id => StringTable id -> [id]
+enumIds (StringTable _ offsets _ _) = [toEnum 0 .. toEnum (fromIntegral (h-1))]
+  where (0,h) = A.bounds offsets
+
+toStrict :: LBS.ByteString -> BS.ByteString
+#if MIN_VERSION_bytestring(0,10,0)
+toStrict = LBS.toStrict
+#else
+toStrict = BS.concat . LBS.toChunks
+#endif
+
+#endif
+
+#if !(MIN_VERSION_base(4,5,0))
+(<>) :: Monoid m => m -> m -> m
+(<>) = mappend
+#endif
diff --git a/Codec/Archive/Tar/Pack.hs b/Codec/Archive/Tar/Pack.hs
new file mode 100644
index 0000000..2057203
--- /dev/null
+++ b/Codec/Archive/Tar/Pack.hs
@@ -0,0 +1,189 @@
+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Codec.Archive.Tar
+-- Copyright   :  (c) 2007 Bjorn Bringert,
+--                    2008 Andrea Vezzosi,
+--                    2008-2009, 2012, 2016 Duncan Coutts
+-- License     :  BSD3
+--
+-- Maintainer  :  duncan@community.haskell.org
+-- Portability :  portable
+--
+-----------------------------------------------------------------------------
+module Codec.Archive.Tar.Pack (
+    pack,
+    packFileEntry,
+    packDirectoryEntry,
+
+    getDirectoryContentsRecursive,
+  ) where
+
+import Codec.Archive.Tar.Types
+
+import qualified Data.ByteString.Lazy as BS
+import System.FilePath
+         ( (</>) )
+import qualified System.FilePath as FilePath.Native
+         ( addTrailingPathSeparator, hasTrailingPathSeparator )
+import System.Directory
+         ( getDirectoryContents, doesDirectoryExist, getModificationTime
+         , Permissions(..), getPermissions )
+#if MIN_VERSION_directory(1,2,0)
+-- The directory package switched to the new time package
+import Data.Time.Clock
+         ( UTCTime )
+import Data.Time.Clock.POSIX
+         ( utcTimeToPOSIXSeconds )
+#else
+import System.Time
+         ( ClockTime(..) )
+#endif
+import System.IO
+         ( IOMode(ReadMode), openBinaryFile, hFileSize )
+import System.IO.Unsafe (unsafeInterleaveIO)
+
+-- | Creates a tar archive from a list of directory or files. Any directories
+-- specified will have their contents included recursively. Paths in the
+-- archive will be relative to the given base directory.
+--
+-- This is a portable implementation of packing suitable for portable archives.
+-- In particular it only constructs 'NormalFile' and 'Directory' entries. Hard
+-- links and symbolic links are treated like ordinary files. It cannot be used
+-- to pack directories containing recursive symbolic links. Special files like
+-- FIFOs (named pipes), sockets or device files will also cause problems.
+--
+-- An exception will be thrown for any file names that are too long to
+-- represent as a 'TarPath'.
+--
+-- * This function returns results lazily. Subdirectories are scanned
+-- and files are read one by one as the list of entries is consumed.
+--
+pack :: FilePath   -- ^ Base directory
+     -> [FilePath] -- ^ Files and directories to pack, relative to the base dir
+     -> IO [Entry]
+pack baseDir paths0 = preparePaths baseDir paths0 >>= packPaths baseDir
+
+preparePaths :: FilePath -> [FilePath] -> IO [FilePath]
+preparePaths baseDir paths =
+  fmap concat $ interleave
+    [ do isDir  <- doesDirectoryExist (baseDir </> path)
+         if isDir
+           then do entries <- getDirectoryContentsRecursive (baseDir </> path)
+                   let entries' = map (path </>) entries
+                       dir = FilePath.Native.addTrailingPathSeparator path
+                   if null path then return entries'
+                                else return (dir : entries')
+           else return [path]
+    | path <- paths ]
+
+packPaths :: FilePath -> [FilePath] -> IO [Entry]
+packPaths baseDir paths =
+  interleave
+    [ do tarpath <- either fail return (toTarPath isDir relpath)
+         if isDir then packDirectoryEntry filepath tarpath
+                  else packFileEntry      filepath tarpath
+    | relpath <- paths
+    , let isDir    = FilePath.Native.hasTrailingPathSeparator filepath
+          filepath = baseDir </> relpath ]
+
+interleave :: [IO a] -> IO [a]
+interleave = unsafeInterleaveIO . go
+  where
+    go []     = return []
+    go (x:xs) = do
+      x'  <- x
+      xs' <- interleave xs
+      return (x':xs')
+
+-- | Construct a tar 'Entry' based on a local file.
+--
+-- This sets the entry size, the data contained in the file and the file's
+-- modification time. If the file is executable then that information is also
+-- preserved. File ownership and detailed permissions are not preserved.
+--
+-- * The file contents is read lazily.
+--
+packFileEntry :: FilePath -- ^ Full path to find the file on the local disk
+              -> TarPath  -- ^ Path to use for the tar Entry in the archive
+              -> IO Entry
+packFileEntry filepath tarpath = do
+  mtime   <- getModTime filepath
+  perms   <- getPermissions filepath
+  file    <- openBinaryFile filepath ReadMode
+  size    <- hFileSize file
+  content <- BS.hGetContents file
+  return (simpleEntry tarpath (NormalFile content (fromIntegral size))) {
+    entryPermissions = if executable perms then executableFilePermissions
+                                           else ordinaryFilePermissions,
+    entryTime = mtime
+  }
+
+-- | Construct a tar 'Entry' based on a local directory (but not its contents).
+--
+-- The only attribute of the directory that is used is its modification time.
+-- Directory ownership and detailed permissions are not preserved.
+--
+packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk
+                   -> TarPath  -- ^ Path to use for the tar Entry in the archive
+                   -> IO Entry
+packDirectoryEntry filepath tarpath = do
+  mtime   <- getModTime filepath
+  return (directoryEntry tarpath) {
+    entryTime = mtime
+  }
+
+-- | This is a utility function, much like 'getDirectoryContents'. The
+-- difference is that it includes the contents of subdirectories.
+--
+-- The paths returned are all relative to the top directory. Directory paths
+-- are distinguishable by having a trailing path separator
+-- (see 'FilePath.Native.hasTrailingPathSeparator').
+--
+-- All directories are listed before the files that they contain. Amongst the
+-- contents of a directory, subdirectories are listed after normal files. The
+-- overall result is that files within a directory will be together in a single
+-- contiguous group. This tends to improve file layout and IO performance when
+-- creating or extracting tar archives.
+--
+-- * This function returns results lazily. Subdirectories are not scanned
+-- until the files entries in the parent directory have been consumed.
+--
+getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
+getDirectoryContentsRecursive dir0 =
+  fmap tail (recurseDirectories dir0 [""])
+
+recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath]
+recurseDirectories _    []         = return []
+recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do
+  (files, dirs') <- collect [] [] =<< getDirectoryContents (base </> dir)
+
+  files' <- recurseDirectories base (dirs' ++ dirs)
+  return (dir : files ++ files')
+
+  where
+    collect files dirs' []              = return (reverse files, reverse dirs')
+    collect files dirs' (entry:entries) | ignore entry
+                                        = collect files dirs' entries
+    collect files dirs' (entry:entries) = do
+      let dirEntry  = dir </> entry
+          dirEntry' = FilePath.Native.addTrailingPathSeparator dirEntry
+      isDirectory <- doesDirectoryExist (base </> dirEntry)
+      if isDirectory
+        then collect files (dirEntry':dirs') entries
+        else collect (dirEntry:files) dirs' entries
+
+    ignore ['.']      = True
+    ignore ['.', '.'] = True
+    ignore _          = False
+
+getModTime :: FilePath -> IO EpochTime
+getModTime path = do
+#if MIN_VERSION_directory(1,2,0)
+  -- The directory package switched to the new time package
+  t <- getModificationTime path
+  return . floor . utcTimeToPOSIXSeconds $ t
+#else
+  (TOD s _) <- getModificationTime path
+  return $! fromIntegral s
+#endif
diff --git a/Codec/Archive/Tar/Read.hs b/Codec/Archive/Tar/Read.hs
new file mode 100644
index 0000000..1b7667a
--- /dev/null
+++ b/Codec/Archive/Tar/Read.hs
@@ -0,0 +1,273 @@
+{-# LANGUAGE CPP, DeriveDataTypeable, BangPatterns #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Codec.Archive.Tar.Read
+-- Copyright   :  (c) 2007 Bjorn Bringert,
+--                    2008 Andrea Vezzosi,
+--                    2008-2009 Duncan Coutts,
+--                    2011 Max Bolingbroke
+-- License     :  BSD3
+--
+-- Maintainer  :  duncan@community.haskell.org
+-- Portability :  portable
+--
+-----------------------------------------------------------------------------
+module Codec.Archive.Tar.Read (read, FormatError(..)) where
+
+import Codec.Archive.Tar.Types
+
+import Data.Char     (ord)
+import Data.Int      (Int64)
+import Data.Bits     (Bits(shiftL))
+import Control.Exception (Exception(..))
+import Data.Typeable (Typeable)
+import Control.Applicative
+import Control.Monad
+import Control.DeepSeq
+
+import qualified Data.ByteString        as BS
+import qualified Data.ByteString.Char8  as BS.Char8
+import qualified Data.ByteString.Unsafe as BS
+import qualified Data.ByteString.Lazy   as LBS
+
+import Prelude hiding (read)
+
+#if !MIN_VERSION_bytestring(0,10,0)
+import Data.Monoid (Monoid(..))
+import qualified Data.ByteString.Lazy.Internal as LBS
+#endif
+
+-- | Errors that can be encountered when parsing a Tar archive.
+data FormatError
+  = TruncatedArchive
+  | ShortTrailer
+  | BadTrailer
+  | TrailingJunk
+  | ChecksumIncorrect
+  | NotTarFormat
+  | UnrecognisedTarFormat
+  | HeaderBadNumericEncoding
+#if MIN_VERSION_base(4,8,0)
+  deriving (Eq, Show, Typeable)
+
+instance Exception FormatError where
+  displayException TruncatedArchive         = "truncated tar archive"
+  displayException ShortTrailer             = "short tar trailer"
+  displayException BadTrailer               = "bad tar trailer"
+  displayException TrailingJunk             = "tar file has trailing junk"
+  displayException ChecksumIncorrect        = "tar checksum error"
+  displayException NotTarFormat             = "data is not in tar format"
+  displayException UnrecognisedTarFormat    = "tar entry not in a recognised format"
+  displayException HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)"
+#else
+  deriving (Eq, Typeable)
+
+instance Show FormatError where
+  show TruncatedArchive         = "truncated tar archive"
+  show ShortTrailer             = "short tar trailer"
+  show BadTrailer               = "bad tar trailer"
+  show TrailingJunk             = "tar file has trailing junk"
+  show ChecksumIncorrect        = "tar checksum error"
+  show NotTarFormat             = "data is not in tar format"
+  show UnrecognisedTarFormat    = "tar entry not in a recognised format"
+  show HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)"
+
+instance Exception FormatError
+#endif
+
+instance NFData    FormatError where
+  rnf !_ = () -- enumerations are fully strict by construction
+
+-- | Convert a data stream in the tar file format into an internal data
+-- structure. Decoding errors are reported by the 'Fail' constructor of the
+-- 'Entries' type.
+--
+-- * The conversion is done lazily.
+--
+read :: LBS.ByteString -> Entries FormatError
+read = unfoldEntries getEntry
+
+getEntry :: LBS.ByteString -> Either FormatError (Maybe (Entry, LBS.ByteString))
+getEntry bs
+  | BS.length header < 512 = Left TruncatedArchive
+
+  -- Tar files end with at least two blocks of all '0'. Checking this serves
+  -- two purposes. It checks the format but also forces the tail of the data
+  -- which is necessary to close the file if it came from a lazily read file.
+  | LBS.head bs == 0 = case LBS.splitAt 1024 bs of
+      (end, trailing)
+        | LBS.length end /= 1024        -> Left ShortTrailer
+        | not (LBS.all (== 0) end)      -> Left BadTrailer
+        | not (LBS.all (== 0) trailing) -> Left TrailingJunk
+        | otherwise                     -> Right Nothing
+
+  | otherwise  = partial $ do
+
+  case (chksum_, format_) of
+    (Ok chksum, _   ) | correctChecksum header chksum -> return ()
+    (Ok _,      Ok _) -> Error ChecksumIncorrect
+    _                 -> Error NotTarFormat
+
+  -- These fields are partial, have to check them
+  format   <- format_;   mode     <- mode_;
+  uid      <- uid_;      gid      <- gid_;
+  size     <- size_;     mtime    <- mtime_;
+  devmajor <- devmajor_; devminor <- devminor_;
+
+  let content = LBS.take size (LBS.drop 512 bs)
+      padding = (512 - size) `mod` 512
+      bs'     = LBS.drop (512 + size + padding) bs
+
+      entry = Entry {
+        entryTarPath     = TarPath name prefix,
+        entryContent     = case typecode of
+                   '\0' -> NormalFile      content size
+                   '0'  -> NormalFile      content size
+                   '1'  -> HardLink        (LinkTarget linkname)
+                   '2'  -> SymbolicLink    (LinkTarget linkname)
+                   _ | format == V7Format
+                        -> OtherEntryType  typecode content size
+                   '3'  -> CharacterDevice devmajor devminor
+                   '4'  -> BlockDevice     devmajor devminor
+                   '5'  -> Directory
+                   '6'  -> NamedPipe
+                   '7'  -> NormalFile      content size
+                   _    -> OtherEntryType  typecode content size,
+        entryPermissions = mode,
+        entryOwnership   = Ownership (BS.Char8.unpack uname)
+                                     (BS.Char8.unpack gname) uid gid,
+        entryTime        = mtime,
+        entryFormat      = format
+    }
+
+  return (Just (entry, bs'))
+
+  where
+#if MIN_VERSION_bytestring(0,10,0)
+   header = LBS.toStrict (LBS.take 512 bs)
+#else
+   header = toStrict (LBS.take 512 bs)
+   toStrict = LBS.foldrChunks mappend mempty
+#endif
+
+   name       = getString   0 100 header
+   mode_      = getOct    100   8 header
+   uid_       = getOct    108   8 header
+   gid_       = getOct    116   8 header
+   size_      = getOct    124  12 header
+   mtime_     = getOct    136  12 header
+   chksum_    = getOct    148   8 header
+   typecode   = getByte   156     header
+   linkname   = getString 157 100 header
+   magic      = getChars  257   8 header
+   uname      = getString 265  32 header
+   gname      = getString 297  32 header
+   devmajor_  = getOct    329   8 header
+   devminor_  = getOct    337   8 header
+   prefix     = getString 345 155 header
+-- trailing   = getBytes  500  12 header
+
+   format_
+     | magic == ustarMagic = return UstarFormat
+     | magic == gnuMagic   = return GnuFormat
+     | magic == v7Magic    = return V7Format
+     | otherwise           = Error UnrecognisedTarFormat
+
+v7Magic, ustarMagic, gnuMagic :: BS.ByteString
+v7Magic    = BS.Char8.pack "\0\0\0\0\0\0\0\0"
+ustarMagic = BS.Char8.pack "ustar\NUL00"
+gnuMagic   = BS.Char8.pack "ustar  \NUL"
+
+correctChecksum :: BS.ByteString -> Int -> Bool
+correctChecksum header checksum = checksum == checksum'
+  where
+    -- sum of all 512 bytes in the header block,
+    -- treating each byte as an 8-bit unsigned value
+    sumchars  = BS.foldl' (\x y -> x + fromIntegral y) 0
+    -- treating the 8 bytes of chksum as blank characters.
+    checksum' = sumchars (BS.take 148 header)
+              + 256 -- 256 = sumchars (BS.Char8.replicate 8 ' ')
+              + sumchars (BS.drop 156 header)
+
+-- * TAR format primitive input
+
+{-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int   #-}
+{-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int64 #-}
+getOct :: (Integral a, Bits a) => Int -> Int -> BS.ByteString -> Partial FormatError a
+getOct off len = parseOct
+               . BS.Char8.takeWhile (\c -> c /= '\NUL' && c /= ' ')
+               . BS.Char8.dropWhile (== ' ')
+               . getBytes off len
+  where
+    parseOct s | BS.null s = return 0
+    -- As a star extension, octal fields can hold a base-256 value if the high
+    -- bit of the initial character is set. The initial character can be:
+    --   0x80 ==> trailing characters hold a positive base-256 value
+    --   0xFF ==> trailing characters hold a negative base-256 value
+    --
+    -- In both cases, there won't be a trailing NUL/space.
+    --
+    -- GNU tar seems to contain a half-implementation of code that deals with
+    -- extra bits in the first character, but I don't think it works and the
+    -- docs I can find on star seem to suggest that these will always be 0,
+    -- which is what I will assume.
+    parseOct s | BS.head s == 128 = return (readBytes (BS.tail s))
+               | BS.head s == 255 = return (negate (readBytes (BS.tail s)))
+    parseOct s  = case readOct s of
+      Just x  -> return x
+      Nothing -> Error HeaderBadNumericEncoding
+
+    readBytes :: (Integral a, Bits a) => BS.ByteString -> a
+    readBytes = BS.foldl' (\acc x -> acc `shiftL` 8 + fromIntegral x) 0
+
+getBytes :: Int -> Int -> BS.ByteString -> BS.ByteString
+getBytes off len = BS.take len . BS.drop off
+
+getByte :: Int -> BS.ByteString -> Char
+getByte off bs = BS.Char8.index bs off
+
+getChars :: Int -> Int -> BS.ByteString -> BS.ByteString
+getChars off len = getBytes off len
+
+getString :: Int -> Int -> BS.ByteString -> BS.ByteString
+getString off len = BS.copy . BS.Char8.takeWhile (/='\0') . getBytes off len
+
+-- These days we'd just use Either, but in older versions of base there was no
+-- Monad instance for Either, it was in mtl with an anoying Error constraint.
+--
+data Partial e a = Error e | Ok a
+
+partial :: Partial e a -> Either e a
+partial (Error msg) = Left msg
+partial (Ok x)      = Right x
+
+instance Functor (Partial e) where
+    fmap = liftM
+
+instance Applicative (Partial e) where
+    pure  = Ok
+    (<*>) = ap
+
+instance Monad (Partial e) where
+    return        = pure
+    Error m >>= _ = Error m
+    Ok    x >>= k = k x
+    fail          = error "fail @(Partial e)"
+
+{-# SPECIALISE readOct :: BS.ByteString -> Maybe Int   #-}
+{-# SPECIALISE readOct :: BS.ByteString -> Maybe Int64 #-}
+readOct :: Integral n => BS.ByteString -> Maybe n
+readOct bs0 = case go 0 0 bs0 of
+                -1 -> Nothing
+                n  -> Just n
+  where
+    go :: Integral n => Int -> n -> BS.ByteString -> n
+    go !i !n !bs
+      | BS.null bs = if i == 0 then -1 else n
+      | otherwise  =
+          case BS.unsafeHead bs of
+            w | w >= 0x30
+             && w <= 0x39 -> go (i+1)
+                                (n * 8 + (fromIntegral w - 0x30))
+                                (BS.unsafeTail bs)
+              | otherwise -> -1
diff --git a/Codec/Archive/Tar/Types.hs b/Codec/Archive/Tar/Types.hs
new file mode 100644
index 0000000..0689be0
--- /dev/null
+++ b/Codec/Archive/Tar/Types.hs
@@ -0,0 +1,697 @@
+{-# LANGUAGE CPP, GeneralizedNewtypeDeriving, BangPatterns #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Codec.Archive.Tar.Types
+-- Copyright   :  (c) 2007 Bjorn Bringert,
+--                    2008 Andrea Vezzosi,
+--                    2008-2009 Duncan Coutts
+--                    2011 Max Bolingbroke
+-- License     :  BSD3
+--
+-- Maintainer  :  duncan@community.haskell.org
+-- Portability :  portable
+--
+-- Types to represent the content of @.tar@ archives.
+--
+-----------------------------------------------------------------------------
+module Codec.Archive.Tar.Types (
+
+  Entry(..),
+  entryPath,
+  EntryContent(..),
+  FileSize,
+  Permissions,
+  Ownership(..),
+  EpochTime,
+  TypeCode,
+  DevMajor,
+  DevMinor,
+  Format(..),
+
+  simpleEntry,
+  fileEntry,
+  directoryEntry,
+
+  ordinaryFilePermissions,
+  executableFilePermissions,
+  directoryPermissions,
+
+  TarPath(..),
+  toTarPath,
+  fromTarPath,
+  fromTarPathToPosixPath,
+  fromTarPathToWindowsPath,
+
+  LinkTarget(..),
+  toLinkTarget,
+  fromLinkTarget,
+  fromLinkTargetToPosixPath,
+  fromLinkTargetToWindowsPath,
+
+  Entries(..),
+  mapEntries,
+  mapEntriesNoFail,
+  foldEntries,
+  foldlEntries,
+  unfoldEntries,
+
+#ifdef TESTS
+  limitToV7FormatCompat
+#endif
+  ) where
+
+import Data.Int      (Int64)
+import Data.Monoid   (Monoid(..))
+import qualified Data.ByteString       as BS
+import qualified Data.ByteString.Char8 as BS.Char8
+import qualified Data.ByteString.Lazy  as LBS
+import Control.DeepSeq
+
+import qualified System.FilePath as FilePath.Native
+         ( joinPath, splitDirectories, addTrailingPathSeparator )
+import qualified System.FilePath.Posix as FilePath.Posix
+         ( joinPath, splitPath, splitDirectories, hasTrailingPathSeparator
+         , addTrailingPathSeparator )
+import qualified System.FilePath.Windows as FilePath.Windows
+         ( joinPath, addTrailingPathSeparator )
+import System.Posix.Types
+         ( FileMode )
+
+#ifdef TESTS
+import Test.QuickCheck
+import Control.Applicative ((<$>), pure, (<*>))
+#endif
+
+
+type FileSize  = Int64
+-- | The number of seconds since the UNIX epoch
+type EpochTime = Int64
+type DevMajor  = Int
+type DevMinor  = Int
+type TypeCode  = Char
+type Permissions = FileMode
+
+-- | Tar archive entry.
+--
+data Entry = Entry {
+
+    -- | The path of the file or directory within the archive. This is in a
+    -- tar-specific form. Use 'entryPath' to get a native 'FilePath'.
+    entryTarPath :: {-# UNPACK #-} !TarPath,
+
+    -- | The real content of the entry. For 'NormalFile' this includes the
+    -- file data. An entry usually contains a 'NormalFile' or a 'Directory'.
+    entryContent :: !EntryContent,
+
+    -- | File permissions (Unix style file mode).
+    entryPermissions :: {-# UNPACK #-} !Permissions,
+
+    -- | The user and group to which this file belongs.
+    entryOwnership :: {-# UNPACK #-} !Ownership,
+
+    -- | The time the file was last modified.
+    entryTime :: {-# UNPACK #-} !EpochTime,
+
+    -- | The tar format the archive is using.
+    entryFormat :: !Format
+  }
+  deriving (Eq, Show)
+
+-- | Native 'FilePath' of the file or directory within the archive.
+--
+entryPath :: Entry -> FilePath
+entryPath = fromTarPath . entryTarPath
+
+-- | The content of a tar archive entry, which depends on the type of entry.
+--
+-- Portable archives should contain only 'NormalFile' and 'Directory'.
+--
+data EntryContent = NormalFile      LBS.ByteString {-# UNPACK #-} !FileSize
+                  | Directory
+                  | SymbolicLink    !LinkTarget
+                  | HardLink        !LinkTarget
+                  | CharacterDevice {-# UNPACK #-} !DevMajor
+                                    {-# UNPACK #-} !DevMinor
+                  | BlockDevice     {-# UNPACK #-} !DevMajor
+                                    {-# UNPACK #-} !DevMinor
+                  | NamedPipe
+                  | OtherEntryType  {-# UNPACK #-} !TypeCode LBS.ByteString
+                                    {-# UNPACK #-} !FileSize
+    deriving (Eq, Ord, Show)
+
+data Ownership = Ownership {
+    -- | The owner user name. Should be set to @\"\"@ if unknown.
+    ownerName :: String,
+
+    -- | The owner group name. Should be set to @\"\"@ if unknown.
+    groupName :: String,
+
+    -- | Numeric owner user id. Should be set to @0@ if unknown.
+    ownerId :: {-# UNPACK #-} !Int,
+
+    -- | Numeric owner group id. Should be set to @0@ if unknown.
+    groupId :: {-# UNPACK #-} !Int
+  }
+    deriving (Eq, Ord, Show)
+
+-- | There have been a number of extensions to the tar file format over the
+-- years. They all share the basic entry fields and put more meta-data in
+-- different extended headers.
+--
+data Format =
+
+     -- | This is the classic Unix V7 tar format. It does not support owner and
+     -- group names, just numeric Ids. It also does not support device numbers.
+     V7Format
+
+     -- | The \"USTAR\" format is an extension of the classic V7 format. It was
+     -- later standardised by POSIX. It has some restrictions but is the most
+     -- portable format.
+     --
+   | UstarFormat
+
+     -- | The GNU tar implementation also extends the classic V7 format, though
+     -- in a slightly different way from the USTAR format. In general for new
+     -- archives the standard USTAR/POSIX should be used.
+     --
+   | GnuFormat
+  deriving (Eq, Ord, Show)
+
+instance NFData Entry where
+  rnf (Entry _ c _ _ _ _) = rnf c
+
+instance NFData EntryContent where
+  rnf x = case x of
+      NormalFile       c _  -> rnflbs c
+      OtherEntryType _ c _  -> rnflbs c
+      _                     -> seq x ()
+    where
+#if MIN_VERSION_bytestring(0,10,0)
+      rnflbs = rnf
+#else
+      rnflbs = foldr (\ !_bs r -> r) () . LBS.toChunks
+#endif
+
+instance NFData Ownership where
+  rnf (Ownership o g _ _) = rnf o `seq` rnf g
+
+-- | @rw-r--r--@ for normal files
+ordinaryFilePermissions :: Permissions
+ordinaryFilePermissions   = 0o0644
+
+-- | @rwxr-xr-x@ for executable files
+executableFilePermissions :: Permissions
+executableFilePermissions = 0o0755
+
+-- | @rwxr-xr-x@ for directories
+directoryPermissions :: Permissions
+directoryPermissions  = 0o0755
+
+-- | An 'Entry' with all default values except for the file name and type. It
+-- uses the portable USTAR/POSIX format (see 'UstarHeader').
+--
+-- You can use this as a basis and override specific fields, eg:
+--
+-- > (emptyEntry name HardLink) { linkTarget = target }
+--
+simpleEntry :: TarPath -> EntryContent -> Entry
+simpleEntry tarpath content = Entry {
+    entryTarPath     = tarpath,
+    entryContent     = content,
+    entryPermissions = case content of
+                         Directory -> directoryPermissions
+                         _         -> ordinaryFilePermissions,
+    entryOwnership   = Ownership "" "" 0 0,
+    entryTime        = 0,
+    entryFormat      = UstarFormat
+  }
+
+-- | A tar 'Entry' for a file.
+--
+-- Entry  fields such as file permissions and ownership have default values.
+--
+-- You can use this as a basis and override specific fields. For example if you
+-- need an executable file you could use:
+--
+-- > (fileEntry name content) { fileMode = executableFileMode }
+--
+fileEntry :: TarPath -> LBS.ByteString -> Entry
+fileEntry name fileContent =
+  simpleEntry name (NormalFile fileContent (LBS.length fileContent))
+
+-- | A tar 'Entry' for a directory.
+--
+-- Entry fields such as file permissions and ownership have default values.
+--
+directoryEntry :: TarPath -> Entry
+directoryEntry name = simpleEntry name Directory
+
+--
+-- * Tar paths
+--
+
+-- | The classic tar format allowed just 100 characters for the file name. The
+-- USTAR format extended this with an extra 155 characters, however it uses a
+-- complex method of splitting the name between the two sections.
+--
+-- Instead of just putting any overflow into the extended area, it uses the
+-- extended area as a prefix. The aggravating insane bit however is that the
+-- prefix (if any) must only contain a directory prefix. That is the split
+-- between the two areas must be on a directory separator boundary. So there is
+-- no simple calculation to work out if a file name is too long. Instead we
+-- have to try to find a valid split that makes the name fit in the two areas.
+--
+-- The rationale presumably was to make it a bit more compatible with old tar
+-- programs that only understand the classic format. A classic tar would be
+-- able to extract the file name and possibly some dir prefix, but not the
+-- full dir prefix. So the files would end up in the wrong place, but that's
+-- probably better than ending up with the wrong names too.
+--
+-- So it's understandable but rather annoying.
+--
+-- * Tar paths use Posix format (ie @\'/\'@ directory separators), irrespective
+--   of the local path conventions.
+--
+-- * The directory separator between the prefix and name is /not/ stored.
+--
+data TarPath = TarPath {-# UNPACK #-} !BS.ByteString -- path name, 100 characters max.
+                       {-# UNPACK #-} !BS.ByteString -- path prefix, 155 characters max.
+  deriving (Eq, Ord)
+
+instance NFData TarPath where
+  rnf (TarPath _ _) = () -- fully strict by construction
+
+instance Show TarPath where
+  show = show . fromTarPath
+
+-- | Convert a 'TarPath' to a native 'FilePath'.
+--
+-- The native 'FilePath' will use the native directory separator but it is not
+-- otherwise checked for validity or sanity. In particular:
+--
+-- * The tar path may be invalid as a native path, eg the file name @\"nul\"@
+--   is not valid on Windows.
+--
+-- * The tar path may be an absolute path or may contain @\"..\"@ components.
+--   For security reasons this should not usually be allowed, but it is your
+--   responsibility to check for these conditions (eg using 'checkSecurity').
+--
+fromTarPath :: TarPath -> FilePath
+fromTarPath (TarPath namebs prefixbs) = adjustDirectory $
+  FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix
+                          ++ FilePath.Posix.splitDirectories name
+  where
+    name   = BS.Char8.unpack namebs
+    prefix = BS.Char8.unpack prefixbs
+    adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
+                    = FilePath.Native.addTrailingPathSeparator
+                    | otherwise = id
+
+-- | Convert a 'TarPath' to a Unix\/Posix 'FilePath'.
+--
+-- The difference compared to 'fromTarPath' is that it always returns a Unix
+-- style path irrespective of the current operating system.
+--
+-- This is useful to check how a 'TarPath' would be interpreted on a specific
+-- operating system, eg to perform portability checks.
+--
+fromTarPathToPosixPath :: TarPath -> FilePath
+fromTarPathToPosixPath (TarPath namebs prefixbs) = adjustDirectory $
+  FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories prefix
+                         ++ FilePath.Posix.splitDirectories name
+  where
+    name   = BS.Char8.unpack namebs
+    prefix = BS.Char8.unpack prefixbs
+    adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
+                    = FilePath.Posix.addTrailingPathSeparator
+                    | otherwise = id
+
+-- | Convert a 'TarPath' to a Windows 'FilePath'.
+--
+-- The only difference compared to 'fromTarPath' is that it always returns a
+-- Windows style path irrespective of the current operating system.
+--
+-- This is useful to check how a 'TarPath' would be interpreted on a specific
+-- operating system, eg to perform portability checks.
+--
+fromTarPathToWindowsPath :: TarPath -> FilePath
+fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $
+  FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories prefix
+                           ++ FilePath.Posix.splitDirectories name
+  where
+    name   = BS.Char8.unpack namebs
+    prefix = BS.Char8.unpack prefixbs
+    adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
+                    = FilePath.Windows.addTrailingPathSeparator
+                    | otherwise = id
+
+-- | Convert a native 'FilePath' to a 'TarPath'.
+--
+-- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a
+-- description of the problem with splitting long 'FilePath's.
+--
+toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for
+                  -- directories a 'TarPath' must always use a trailing @\/@.
+          -> FilePath -> Either String TarPath
+toTarPath isDir = splitLongPath
+                . addTrailingSep
+                . FilePath.Posix.joinPath
+                . FilePath.Native.splitDirectories
+  where
+    addTrailingSep | isDir     = FilePath.Posix.addTrailingPathSeparator
+                   | otherwise = id
+
+-- | Take a sanitised path, split on directory separators and try to pack it
+-- into the 155 + 100 tar file name format.
+--
+-- The strategy is this: take the name-directory components in reverse order
+-- and try to fit as many components into the 100 long name area as possible.
+-- If all the remaining components fit in the 155 name area then we win.
+--
+splitLongPath :: FilePath -> Either String TarPath
+splitLongPath path =
+  case packName nameMax (reverse (FilePath.Posix.splitPath path)) of
+    Left err                 -> Left err
+    Right (name, [])         -> Right $! TarPath (BS.Char8.pack name)
+                                                  BS.empty
+    Right (name, first:rest) -> case packName prefixMax remainder of
+      Left err               -> Left err
+      Right (_     , (_:_))  -> Left "File name too long (cannot split)"
+      Right (prefix, [])     -> Right $! TarPath (BS.Char8.pack name)
+                                                 (BS.Char8.pack prefix)
+      where
+        -- drop the '/' between the name and prefix:
+        remainder = init first : rest
+
+  where
+    nameMax, prefixMax :: Int
+    nameMax   = 100
+    prefixMax = 155
+
+    packName _      []     = Left "File name empty"
+    packName maxLen (c:cs)
+      | n > maxLen         = Left "File name too long"
+      | otherwise          = Right (packName' maxLen n [c] cs)
+      where n = length c
+
+    packName' maxLen n ok (c:cs)
+      | n' <= maxLen             = packName' maxLen n' (c:ok) cs
+                                     where n' = n + length c
+    packName' _      _ ok    cs  = (FilePath.Posix.joinPath ok, cs)
+
+-- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and
+-- 'HardLink' entry types.
+--
+newtype LinkTarget = LinkTarget BS.ByteString
+  deriving (Eq, Ord, Show)
+
+instance NFData LinkTarget where
+#if MIN_VERSION_bytestring(0,10,0)
+    rnf (LinkTarget bs) = rnf bs
+#else
+    rnf (LinkTarget !_bs) = ()
+#endif
+
+-- | Convert a native 'FilePath' to a tar 'LinkTarget'. This may fail if the
+-- string is longer than 100 characters or if it contains non-portable
+-- characters.
+--
+toLinkTarget   :: FilePath -> Maybe LinkTarget
+toLinkTarget path | length path <= 100 = Just $! LinkTarget (BS.Char8.pack path)
+                  | otherwise          = Nothing
+
+-- | Convert a tar 'LinkTarget' to a native 'FilePath'.
+--
+fromLinkTarget :: LinkTarget -> FilePath
+fromLinkTarget (LinkTarget pathbs) = adjustDirectory $
+  FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path
+  where
+    path = BS.Char8.unpack pathbs
+    adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
+                    = FilePath.Native.addTrailingPathSeparator
+                    | otherwise = id
+
+-- | Convert a tar 'LinkTarget' to a Unix/Posix 'FilePath'.
+--
+fromLinkTargetToPosixPath :: LinkTarget -> FilePath
+fromLinkTargetToPosixPath (LinkTarget pathbs) = adjustDirectory $
+  FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories path
+  where
+    path = BS.Char8.unpack pathbs
+    adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
+                    = FilePath.Native.addTrailingPathSeparator
+                    | otherwise = id
+
+-- | Convert a tar 'LinkTarget' to a Windows 'FilePath'.
+--
+fromLinkTargetToWindowsPath :: LinkTarget -> FilePath
+fromLinkTargetToWindowsPath (LinkTarget pathbs) = adjustDirectory $
+  FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories path
+  where
+    path = BS.Char8.unpack pathbs
+    adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
+                    = FilePath.Windows.addTrailingPathSeparator
+                    | otherwise = id
+
+--
+-- * Entries type
+--
+
+-- | A tar archive is a sequence of entries.
+--
+-- The point of this type as opposed to just using a list is that it makes the
+-- failure case explicit. We need this because the sequence of entries we get
+-- from reading a tarball can include errors.
+--
+-- It is a concrete data type so you can manipulate it directly but it is often
+-- clearer to use the provided functions for mapping, folding and unfolding.
+--
+-- Converting from a list can be done with just @foldr Next Done@. Converting
+-- back into a list can be done with 'foldEntries' however in that case you
+-- must be prepared to handle the 'Fail' case inherent in the 'Entries' type.
+--
+-- The 'Monoid' instance lets you concatenate archives or append entries to an
+-- archive.
+--
+data Entries e = Next Entry (Entries e)
+               | Done
+               | Fail e
+  deriving (Eq, Show)
+
+infixr 5 `Next`
+
+-- | This is like the standard 'unfoldr' function on lists, but for 'Entries'.
+-- It includes failure as an extra possibility that the stepper function may
+-- return.
+--
+-- It can be used to generate 'Entries' from some other type. For example it is
+-- used internally to lazily unfold entries from a 'LBS.ByteString'.
+--
+unfoldEntries :: (a -> Either e (Maybe (Entry, a))) -> a -> Entries e
+unfoldEntries f = unfold
+  where
+    unfold x = case f x of
+      Left err             -> Fail err
+      Right Nothing        -> Done
+      Right (Just (e, x')) -> Next e (unfold x')
+
+-- | This is like the standard 'foldr' function on lists, but for 'Entries'.
+-- Compared to 'foldr' it takes an extra function to account for the
+-- possibility of failure.
+--
+-- This is used to consume a sequence of entries. For example it could be used
+-- to scan a tarball for problems or to collect an index of the contents.
+--
+foldEntries :: (Entry -> a -> a) -> a -> (e -> a) -> Entries e -> a
+foldEntries next done fail' = fold
+  where
+    fold (Next e es) = next e (fold es)
+    fold Done        = done
+    fold (Fail err)  = fail' err
+
+-- | A 'foldl'-like function on Entries. It either returns the final
+-- accumulator result, or the failure along with the intermediate accumulator
+-- value.
+--
+foldlEntries :: (a -> Entry -> a) -> a -> Entries e -> Either (e, a) a
+foldlEntries f z = go z
+  where
+    go !acc (Next e es) = go (f acc e) es
+    go !acc  Done       = Right acc
+    go !acc (Fail err)  = Left (err, acc)
+
+-- | This is like the standard 'map' function on lists, but for 'Entries'. It
+-- includes failure as a extra possible outcome of the mapping function.
+--
+-- If your mapping function cannot fail it may be more convenient to use
+-- 'mapEntriesNoFail'
+mapEntries :: (Entry -> Either e' Entry) -> Entries e -> Entries (Either e e')
+mapEntries f =
+  foldEntries (\entry rest -> either (Fail . Right) (flip Next rest) (f entry)) Done (Fail . Left)
+
+-- | Like 'mapEntries' but the mapping function itself cannot fail.
+--
+mapEntriesNoFail :: (Entry -> Entry) -> Entries e -> Entries e
+mapEntriesNoFail f =
+  foldEntries (\entry -> Next (f entry)) Done Fail
+
+instance Monoid (Entries e) where
+  mempty      = Done
+  mappend a b = foldEntries Next b Fail a
+
+instance Functor Entries where
+  fmap f = foldEntries Next Done (Fail . f)
+
+instance NFData e => NFData (Entries e) where
+  rnf (Next e es) = rnf e `seq` rnf es
+  rnf  Done       = ()
+  rnf (Fail e)    = rnf e
+
+
+-------------------------
+-- QuickCheck instances
+--
+
+#ifdef TESTS
+
+instance Arbitrary Entry where
+  arbitrary = Entry <$> arbitrary <*> arbitrary <*> arbitraryPermissions
+                    <*> arbitrary <*> arbitraryEpochTime <*> arbitrary
+    where
+      arbitraryPermissions :: Gen Permissions
+      arbitraryPermissions = fromIntegral <$> (arbitraryOctal 7 :: Gen Int)
+
+      arbitraryEpochTime :: Gen EpochTime
+      arbitraryEpochTime = fromIntegral <$> (arbitraryOctal 11 :: Gen Int)
+
+  shrink (Entry path content perms author time format) =
+      [ Entry path' content' perms author' time' format 
+      | (path', content', author', time') <-
+         shrink (path, content, author, time) ]
+   ++ [ Entry path content perms' author time format
+      | perms' <- shrinkIntegral perms ]
+
+instance Arbitrary TarPath where
+  arbitrary = either error id
+            . toTarPath False
+            . FilePath.Posix.joinPath
+          <$> listOf1ToN (255 `div` 5)
+                         (elements (map (replicate 4) "abcd"))
+
+  shrink = map (either error id . toTarPath False)
+         . map FilePath.Posix.joinPath
+         . filter (not . null)
+         . shrinkList shrinkNothing
+         . FilePath.Posix.splitPath
+         . fromTarPathToPosixPath
+
+instance Arbitrary LinkTarget where
+  arbitrary = maybe (error "link target too large") id
+            . toLinkTarget
+            . FilePath.Native.joinPath
+          <$> listOf1ToN (100 `div` 5)
+                         (elements (map (replicate 4) "abcd"))
+
+  shrink = map (maybe (error "link target too large") id . toLinkTarget)
+         . map FilePath.Posix.joinPath
+         . filter (not . null)
+         . shrinkList shrinkNothing
+         . FilePath.Posix.splitPath
+         . fromLinkTargetToPosixPath
+
+
+listOf1ToN :: Int -> Gen a -> Gen [a]
+listOf1ToN n g = sized $ \sz -> do
+    n <- choose (1, min n (max 1 sz))
+    vectorOf n g
+
+listOf0ToN :: Int -> Gen a -> Gen [a]
+listOf0ToN n g = sized $ \sz -> do
+    n <- choose (0, min n sz)
+    vectorOf n g
+
+instance Arbitrary EntryContent where
+  arbitrary =
+    frequency
+      [ (16, do bs <- arbitrary;
+                return (NormalFile bs (LBS.length bs)))
+      , (2, pure Directory)
+      , (1, SymbolicLink    <$> arbitrary)
+      , (1, HardLink        <$> arbitrary)
+      , (1, CharacterDevice <$> arbitraryOctal 7 <*> arbitraryOctal 7)
+      , (1, BlockDevice     <$> arbitraryOctal 7 <*> arbitraryOctal 7)
+      , (1, pure NamedPipe)
+      , (1, do c  <- elements (['A'..'Z']++['a'..'z'])
+               bs <- arbitrary;
+               return (OtherEntryType c bs (LBS.length bs)))
+      ]
+
+  shrink (NormalFile bs _)   = [ NormalFile bs' (LBS.length bs') 
+                               | bs' <- shrink bs ]
+  shrink  Directory          = []
+  shrink (SymbolicLink link) = [ SymbolicLink link' | link' <- shrink link ]
+  shrink (HardLink     link) = [ HardLink     link' | link' <- shrink link ]
+  shrink (CharacterDevice ma mi) = [ CharacterDevice ma' mi'
+                                   | (ma', mi') <- shrink (ma, mi) ]
+  shrink (BlockDevice     ma mi) = [ BlockDevice ma' mi'
+                                   | (ma', mi') <- shrink (ma, mi) ]
+  shrink  NamedPipe              = []
+  shrink (OtherEntryType c bs _) = [ OtherEntryType c bs' (LBS.length bs') 
+                                   | bs' <- shrink bs ]
+
+instance Arbitrary LBS.ByteString where
+  arbitrary = fmap LBS.pack arbitrary
+  shrink    = map LBS.pack . shrink . LBS.unpack
+
+instance Arbitrary BS.ByteString where
+  arbitrary = fmap BS.pack arbitrary
+  shrink    = map BS.pack . shrink . BS.unpack
+
+instance Arbitrary Ownership where
+  arbitrary = Ownership <$> name <*> name
+                        <*> idno <*> idno
+    where
+      name = listOf0ToN 32 (arbitrary `suchThat` (/= '\0'))
+      idno = arbitraryOctal 7
+
+  shrink (Ownership oname gname oid gid) =
+    [ Ownership oname' gname' oid' gid'
+    | (oname', gname', oid', gid') <- shrink (oname, gname, oid, gid) ]
+
+instance Arbitrary Format where
+  arbitrary = elements [V7Format, UstarFormat, GnuFormat]
+
+
+--arbitraryOctal :: (Integral n, Random n) => Int -> Gen n
+arbitraryOctal n =
+    oneof [ pure 0
+          , choose (0, upperBound)
+          , pure upperBound
+          ]
+  where
+    upperBound = 8^n-1
+
+-- For QC tests it's useful to have a way to limit the info to that which can
+-- be expressed in the old V7 format
+limitToV7FormatCompat :: Entry -> Entry
+limitToV7FormatCompat entry@Entry { entryFormat = V7Format } =
+    entry {
+      entryContent = case entryContent entry of
+        CharacterDevice _ _ -> OtherEntryType  '3' LBS.empty 0
+        BlockDevice     _ _ -> OtherEntryType  '4' LBS.empty 0
+        Directory           -> OtherEntryType  '5' LBS.empty 0
+        NamedPipe           -> OtherEntryType  '6' LBS.empty 0
+        other               -> other,
+
+      entryOwnership = (entryOwnership entry) {
+        groupName = "",
+        ownerName = ""
+      },
+
+      entryTarPath = let TarPath name _prefix = entryTarPath entry
+                      in TarPath name BS.empty
+    }
+limitToV7FormatCompat entry = entry
+
+#endif
+
diff --git a/Codec/Archive/Tar/Unpack.hs b/Codec/Archive/Tar/Unpack.hs
new file mode 100644
index 0000000..38bda1b
--- /dev/null
+++ b/Codec/Archive/Tar/Unpack.hs
@@ -0,0 +1,124 @@
+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Codec.Archive.Tar
+-- Copyright   :  (c) 2007 Bjorn Bringert,
+--                    2008 Andrea Vezzosi,
+--                    2008-2009, 2012, 2016 Duncan Coutts
+-- License     :  BSD3
+--
+-- Maintainer  :  duncan@community.haskell.org
+-- Portability :  portable
+--
+-----------------------------------------------------------------------------
+module Codec.Archive.Tar.Unpack (
+  unpack,
+  ) where
+
+import Codec.Archive.Tar.Types
+import Codec.Archive.Tar.Check
+
+import qualified Data.ByteString.Lazy as BS
+import System.FilePath
+         ( (</>) )
+import qualified System.FilePath as FilePath.Native
+         ( takeDirectory )
+import System.Directory
+         ( createDirectoryIfMissing, copyFile )
+import Control.Exception
+         ( Exception, throwIO )
+#if MIN_VERSION_directory(1,2,3)
+import System.Directory
+         ( setModificationTime )
+import Data.Time.Clock.POSIX
+         ( posixSecondsToUTCTime )
+import Control.Exception as Exception
+         ( catch )
+import System.IO.Error
+         ( isPermissionError )
+#endif
+
+
+-- | Create local files and directories based on the entries of a tar archive.
+--
+-- This is a portable implementation of unpacking suitable for portable
+-- archives. It handles 'NormalFile' and 'Directory' entries and has simulated
+-- support for 'SymbolicLink' and 'HardLink' entries. Links are implemented by
+-- copying the target file. This therefore works on Windows as well as Unix.
+-- All other entry types are ignored, that is they are not unpacked and no
+-- exception is raised.
+--
+-- If the 'Entries' ends in an error then it is raised an an exception. Any
+-- files or directories that have been unpacked before the error was
+-- encountered will not be deleted. For this reason you may want to unpack
+-- into an empty directory so that you can easily clean up if unpacking fails
+-- part-way.
+--
+-- On its own, this function only checks for security (using 'checkSecurity').
+-- You can do other checks by applying checking functions to the 'Entries' that
+-- you pass to this function. For example:
+--
+-- > unpack dir (checkTarbomb expectedDir entries)
+--
+-- If you care about the priority of the reported errors then you may want to
+-- use 'checkSecurity' before 'checkTarbomb' or other checks.
+--
+unpack :: Exception e => FilePath -> Entries e -> IO ()
+unpack baseDir entries = unpackEntries [] (checkSecurity entries)
+                     >>= emulateLinks
+
+  where
+    -- We're relying here on 'checkSecurity' to make sure we're not scribbling
+    -- files all over the place.
+
+    unpackEntries _     (Fail err)      = either throwIO throwIO err
+    unpackEntries links Done            = return links
+    unpackEntries links (Next entry es) = case entryContent entry of
+      NormalFile file _ -> extractFile path file mtime
+                        >> unpackEntries links es
+      Directory         -> extractDir path mtime
+                        >> unpackEntries links es
+      HardLink     link -> (unpackEntries $! saveLink path link links) es
+      SymbolicLink link -> (unpackEntries $! saveLink path link links) es
+      _                 -> unpackEntries links es --ignore other file types
+      where
+        path  = entryPath entry
+        mtime = entryTime entry
+
+    extractFile path content mtime = do
+      -- Note that tar archives do not make sure each directory is created
+      -- before files they contain, indeed we may have to create several
+      -- levels of directory.
+      createDirectoryIfMissing True absDir
+      BS.writeFile absPath content
+      setModTime absPath mtime
+      where
+        absDir  = baseDir </> FilePath.Native.takeDirectory path
+        absPath = baseDir </> path
+
+    extractDir path mtime = do
+      createDirectoryIfMissing True absPath
+      setModTime absPath mtime
+      where
+        absPath = baseDir </> path
+
+    saveLink path link links = seq (length path)
+                             $ seq (length link')
+                             $ (path, link'):links
+      where link' = fromLinkTarget link
+
+    emulateLinks = mapM_ $ \(relPath, relLinkTarget) ->
+      let absPath   = baseDir </> relPath
+          absTarget = FilePath.Native.takeDirectory absPath </> relLinkTarget
+       in copyFile absTarget absPath
+
+setModTime :: FilePath -> EpochTime -> IO ()
+#if MIN_VERSION_directory(1,2,3)
+-- functionality only supported as of directory-1.2.3.x
+setModTime path t =
+    setModificationTime path (posixSecondsToUTCTime (fromIntegral t))
+      `Exception.catch` \e ->
+        if isPermissionError e then return () else throwIO e
+#else
+setModTime _path _t = return ()
+#endif
diff --git a/Codec/Archive/Tar/Write.hs b/Codec/Archive/Tar/Write.hs
new file mode 100644
index 0000000..e522539
--- /dev/null
+++ b/Codec/Archive/Tar/Write.hs
@@ -0,0 +1,143 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Codec.Archive.Tar.Write
+-- Copyright   :  (c) 2007 Bjorn Bringert,
+--                    2008 Andrea Vezzosi,
+--                    2008-2009 Duncan Coutts
+-- License     :  BSD3
+--
+-- Maintainer  :  duncan@community.haskell.org
+-- Portability :  portable
+--
+-----------------------------------------------------------------------------
+module Codec.Archive.Tar.Write (write) where
+
+import Codec.Archive.Tar.Types
+
+import Data.Char     (ord)
+import Data.List     (foldl')
+import Data.Monoid   (mempty)
+import Numeric       (showOct)
+
+import qualified Data.ByteString             as BS
+import qualified Data.ByteString.Char8       as BS.Char8
+import qualified Data.ByteString.Lazy        as LBS
+import qualified Data.ByteString.Lazy.Char8  as LBS.Char8
+
+
+-- | Create the external representation of a tar archive by serialising a list
+-- of tar entries.
+--
+-- * The conversion is done lazily.
+--
+write :: [Entry] -> LBS.ByteString
+write es = LBS.concat $ map putEntry es ++ [LBS.replicate (512*2) 0]
+
+putEntry :: Entry -> LBS.ByteString
+putEntry entry = case entryContent entry of
+  NormalFile       content size -> LBS.concat [ header, content, padding size ]
+  OtherEntryType _ content size -> LBS.concat [ header, content, padding size ]
+  _                             -> header
+  where
+    header       = putHeader entry
+    padding size = LBS.replicate paddingSize 0
+      where paddingSize = fromIntegral (negate size `mod` 512)
+
+putHeader :: Entry -> LBS.ByteString
+putHeader entry =
+     LBS.Char8.pack
+   $ take 148 block
+  ++ putOct 7 checksum
+  ++ ' ' : drop 156 block
+--  ++ putOct 8 checksum
+--  ++ drop 156 block
+  where
+    block    = putHeaderNoChkSum entry
+    checksum = foldl' (\x y -> x + ord y) 0 block
+
+putHeaderNoChkSum :: Entry -> String
+putHeaderNoChkSum Entry {
+    entryTarPath     = TarPath name prefix,
+    entryContent     = content,
+    entryPermissions = permissions,
+    entryOwnership   = ownership,
+    entryTime        = modTime,
+    entryFormat      = format
+  } =
+
+  concat
+    [ putBString 100 $ name
+    , putOct       8 $ permissions
+    , putOct       8 $ ownerId ownership
+    , putOct       8 $ groupId ownership
+    , putOct      12 $ contentSize
+    , putOct      12 $ modTime
+    , fill         8 $ ' ' -- dummy checksum
+    , putChar8       $ typeCode
+    , putBString 100 $ linkTarget
+    ] ++
+  case format of
+  V7Format    ->
+      fill 255 '\NUL'
+  UstarFormat -> concat
+    [ putBString   8 $ ustarMagic
+    , putString   32 $ ownerName ownership
+    , putString   32 $ groupName ownership
+    , putOct       8 $ deviceMajor
+    , putOct       8 $ deviceMinor
+    , putBString 155 $ prefix
+    , fill        12 $ '\NUL'
+    ]
+  GnuFormat -> concat
+    [ putBString   8 $ gnuMagic
+    , putString   32 $ ownerName ownership
+    , putString   32 $ groupName ownership
+    , putGnuDev    8 $ deviceMajor
+    , putGnuDev    8 $ deviceMinor
+    , putBString 155 $ prefix
+    , fill        12 $ '\NUL'
+    ]
+  where
+    (typeCode, contentSize, linkTarget,
+     deviceMajor, deviceMinor) = case content of
+       NormalFile      _ size            -> ('0' , size, mempty, 0,     0)
+       Directory                         -> ('5' , 0,    mempty, 0,     0)
+       SymbolicLink    (LinkTarget link) -> ('2' , 0,    link,   0,     0)
+       HardLink        (LinkTarget link) -> ('1' , 0,    link,   0,     0)
+       CharacterDevice major minor       -> ('3' , 0,    mempty, major, minor)
+       BlockDevice     major minor       -> ('4' , 0,    mempty, major, minor)
+       NamedPipe                         -> ('6' , 0,    mempty, 0,     0)
+       OtherEntryType  code _ size       -> (code, size, mempty, 0,     0)
+
+    putGnuDev w n = case content of
+      CharacterDevice _ _ -> putOct w n
+      BlockDevice     _ _ -> putOct w n
+      _                   -> replicate w '\NUL'
+
+ustarMagic, gnuMagic :: BS.ByteString
+ustarMagic = BS.Char8.pack "ustar\NUL00"
+gnuMagic   = BS.Char8.pack "ustar  \NUL"
+
+-- * TAR format primitive output
+
+type FieldWidth = Int
+
+putBString :: FieldWidth -> BS.ByteString -> String
+putBString n s = BS.Char8.unpack (BS.take n s) ++ fill (n - BS.length s) '\NUL'
+
+putString :: FieldWidth -> String -> String
+putString n s = take n s ++ fill (n - length s) '\NUL'
+
+--TODO: check integer widths, eg for large file sizes
+putOct :: (Integral a, Show a) => FieldWidth -> a -> String
+putOct n x =
+  let octStr = take (n-1) $ showOct x ""
+   in fill (n - length octStr - 1) '0'
+   ++ octStr
+   ++ putChar8 '\NUL'
+
+putChar8 :: Char -> String
+putChar8 c = [c]
+
+fill :: FieldWidth -> Char -> String
+fill n c = replicate n c
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..3a7b8df
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2007      Björn Bringert,
+              2008-2015 Duncan Coutts,
+              2011      Max Bolingbroke
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, 
+  this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright 
+  notice, this list of conditions and the following disclaimer in the 
+  documentation and/or other materials provided with the distribution.
+- Neither the names of the copyright owners nor the names of the 
+  contributors may be used to endorse or promote products derived 
+  from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Setup.lhs b/Setup.lhs
new file mode 100644
index 0000000..37d1ded
--- /dev/null
+++ b/Setup.lhs
@@ -0,0 +1,4 @@
+> import Distribution.Simple
+
+> main :: IO ()
+> main = defaultMain
diff --git a/bench/Main.hs b/bench/Main.hs
new file mode 100644
index 0000000..c136b64
--- /dev/null
+++ b/bench/Main.hs
@@ -0,0 +1,45 @@
+module Main where
+
+import qualified Codec.Archive.Tar       as Tar
+import qualified Codec.Archive.Tar.Index as TarIndex
+
+import qualified Data.ByteString.Lazy    as BS
+import Control.Exception
+
+import Criterion
+import Criterion.Main
+
+main = defaultMain benchmarks
+
+benchmarks :: [Benchmark]
+benchmarks =
+  [ env loadTarFile $ \tarfile ->
+      bench "read" (nf Tar.read tarfile)
+
+  , env loadTarEntriesList $ \entries ->
+      bench "write" (nf Tar.write entries)
+
+  , env loadTarEntries $ \entries ->
+      bench "index build" (nf TarIndex.build entries)
+
+  , env loadTarIndex $ \entries ->
+      bench "index rebuild" (nf (TarIndex.finalise . TarIndex.unfinalise) entries)
+  ]
+
+loadTarFile :: IO BS.ByteString
+loadTarFile =
+    BS.readFile "01-index.tar"
+
+loadTarEntries :: IO (Tar.Entries Tar.FormatError)
+loadTarEntries =
+    fmap Tar.read loadTarFile
+
+loadTarEntriesList :: IO [Tar.Entry]
+loadTarEntriesList =
+    fmap (Tar.foldEntries (:) [] throw) loadTarEntries
+
+loadTarIndex :: IO TarIndex.TarIndex
+loadTarIndex =
+    fmap (either throw id . TarIndex.build)
+         loadTarEntries
+
diff --git a/changelog.md b/changelog.md
new file mode 100644
index 0000000..320806d
--- /dev/null
+++ b/changelog.md
@@ -0,0 +1,73 @@
+0.5.0.3 Duncan Coutts <duncan@community.haskell.org> May 2016
+
+  * Fix tarbomb logic to ignore special PAX entries. Was breaking many
+    valid tarballs. https://github.com/haskell/cabal/issues/3390
+
+0.5.0.2 Duncan Coutts <duncan@community.haskell.org> April 2016
+
+  * Fix compatability when using ghc-7.4.x and directory >= 1.2.3
+
+0.5.0.1 Duncan Coutts <duncan@community.haskell.org> January 2016
+
+  * Fix compatability with directory-1.2.3+
+
+0.5.0.0 Duncan Coutts <duncan@community.haskell.org> January 2016
+
+  * Work with old version of bytestring (using bytestring-builder package).
+  * Builds with GHC 6.10 -- 8.0.
+  * Change type of Index.serialise to be simply strict bytestring.
+  * Preserve file timestamps on unpack (with directory-1.2.3+)
+
+0.4.5.0 Duncan Coutts <duncan@community.haskell.org> January 2016
+
+  * Revert accidental minor API change in 0.4.x series (the type of the
+    owner and group name strings). The 0.4.3.0 and 0.4.4.0 releases
+    contained the accidental API change.
+  * Add a handy foldlEntries function
+
+0.4.4.0 Duncan Coutts <duncan@community.haskell.org> January 2016
+
+  * Build and warning fixes for GHC 7.10 and 8.0
+  * New Index module function `toList` to get all index entries
+
+0.4.3.0 Duncan Coutts <duncan@community.haskell.org> January 2016
+
+  * New Index function `unfinalise` to extend existing index
+  * 9x  faster reading
+  * 9x  faster index construction
+  * 24x faster index extension
+  * More compact entry types, using ByteStrings
+  * More Eq and Show instances
+  * Greater QC test coverage
+  * Fix minor bug in reading non-standard v7 format entries
+
+0.4.2.2 Edsko de Vries <edsko@well-typed.com> October 2015
+
+  * Fix bug in Index
+
+0.4.2.1 Duncan Coutts <duncan@community.haskell.org> July 2015
+
+  * Fix tests for the Index modules (the code was right)
+
+0.4.2.0 Duncan Coutts <duncan@community.haskell.org> July 2015
+
+  * New Index module for random access to tar file contents
+  * New lower level tar file I/O actions
+  * New tarball file 'append' action
+
+0.4.1.0 Duncan Coutts <duncan@community.haskell.org> January 2015
+
+  * Build with GHC 7.10
+  * Switch from old-time to time package
+  * Added more instance for Entries type
+
+0.4.0.1 Duncan Coutts <duncan@community.haskell.org> October 2012
+
+  * fixes to work with directory 1.2
+  * More Eq/Ord instances
+
+0.4.0.0 Duncan Coutts <duncan@community.haskell.org> February 2012
+
+  * More explicit error types and error handling
+  * Support star base-256 number format
+  * Improved API documentation
diff --git a/tar.cabal b/tar.cabal
new file mode 100644
index 0000000..d227c8c
--- /dev/null
+++ b/tar.cabal
@@ -0,0 +1,128 @@
+name:            tar
+version:         0.5.0.3
+license:         BSD3
+license-file:    LICENSE
+author:          Duncan Coutts <duncan@community.haskell.org>
+                 Bjorn Bringert <bjorn@bringert.net>
+maintainer:      Duncan Coutts <duncan@community.haskell.org>
+bug-reports:     https://github.com/haskell/tar/issues
+copyright:       2007 Bjorn Bringert <bjorn@bringert.net>
+                 2008-2016 Duncan Coutts <duncan@community.haskell.org>
+category:        Codec
+synopsis:        Reading, writing and manipulating ".tar" archive files.
+description:     This library is for working with \"@.tar@\" archive files. It
+                 can read and write a range of common variations of archive
+                 format including V7, POSIX USTAR and GNU formats.
+                 .
+                 It provides support for packing and unpacking portable
+                 archives. This makes it suitable for distribution but not
+                 backup because details like file ownership and exact
+                 permissions are not preserved.
+                 .
+                 It also provides features for random access to archive
+                 content using an index.
+build-type:      Simple
+cabal-version:   >=1.8
+extra-source-files: changelog.md
+tested-with:     GHC==6.10.4, GHC==6.12.3, GHC==7.0.4, GHC==7.2.2, GHC==7.4.2,
+                 GHC==7.6.3,  GHC==7.8.4, GHC==7.10.2, GHC==8.1
+
+source-repository head
+  type: git
+  location: https://github.com/haskell/tar.git
+
+flag old-time
+  default: False
+
+flag old-bytestring
+  default: False
+
+library
+  build-depends: base == 4.*,
+                 filepath,
+                 directory,
+                 array,
+                 containers >= 0.2,
+                 deepseq >= 1.1 && < 1.5
+  if flag(old-time)
+    build-depends: directory < 1.2, old-time
+  else
+    build-depends: directory >= 1.2, time
+
+  if flag(old-bytestring)
+    build-depends: bytestring-builder, bytestring >= 0.9 && <0.10
+  else
+    build-depends: bytestring >= 0.10
+
+  exposed-modules:
+    Codec.Archive.Tar
+    Codec.Archive.Tar.Entry
+    Codec.Archive.Tar.Check
+    Codec.Archive.Tar.Index
+
+  other-modules:
+    Codec.Archive.Tar.Types
+    Codec.Archive.Tar.Read
+    Codec.Archive.Tar.Write
+    Codec.Archive.Tar.Pack
+    Codec.Archive.Tar.Unpack
+    Codec.Archive.Tar.Index.StringTable
+    Codec.Archive.Tar.Index.IntTrie
+
+  other-extensions:
+    CPP, BangPatterns,
+    DeriveDataTypeable, ScopedTypeVariables
+
+  ghc-options: -Wall -fno-warn-unused-imports
+
+test-suite properties
+  type:          exitcode-stdio-1.0
+  build-depends: base,
+                 filepath,
+                 array,
+                 containers,
+                 deepseq,
+                 bytestring-handle,
+                 QuickCheck == 2.*,
+                 tasty            >= 0.10 && <0.12,
+                 tasty-quickcheck == 0.8.*
+
+  if flag(old-time)
+    build-depends: directory < 1.2, old-time
+  else
+    build-depends: directory >= 1.2, time
+
+  if flag(old-bytestring)
+    build-depends: bytestring-builder, bytestring >= 0.9 && <0.10
+  else
+    build-depends: bytestring >= 0.10
+
+  hs-source-dirs: . test
+
+  main-is: test/Properties.hs
+  cpp-options: -DTESTS
+
+  other-modules:
+    Codec.Archive.Tar.Index
+    Codec.Archive.Tar.Index.StringTable
+    Codec.Archive.Tar.Index.IntTrie
+
+  other-extensions:
+    CPP, BangPatterns,
+    DeriveDataTypeable, ScopedTypeVariables
+
+  ghc-options: -fno-ignore-asserts
+
+benchmark bench
+  type:          exitcode-stdio-1.0
+  hs-source-dirs: . bench
+  main-is:       bench/Main.hs
+  build-depends: base,
+                 bytestring,
+                 filepath, directory,
+                 array,
+                 containers,
+                 deepseq,
+                 old-time, time,
+                 criterion >= 1.0
+
diff --git a/test/Properties.hs b/test/Properties.hs
new file mode 100644
index 0000000..7546a83
--- /dev/null
+++ b/test/Properties.hs
@@ -0,0 +1,54 @@
+module Main where
+
+import qualified Codec.Archive.Tar.Index as Index
+import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie
+import qualified Codec.Archive.Tar.Index.StringTable as StringTable
+import qualified Codec.Archive.Tar       as Tar
+
+import qualified Data.ByteString as BS
+
+import Test.Tasty
+import Test.Tasty.QuickCheck
+
+main :: IO ()
+main =
+  defaultMain $
+    testGroup "tar tests" [
+
+      testGroup "write/read" [
+        testProperty "ustar format" Tar.prop_write_read_ustar,
+        testProperty "gnu format"   Tar.prop_write_read_gnu,
+        testProperty "v7 format"    Tar.prop_write_read_v7
+      ]
+
+    , testGroup "string table" [
+        testProperty "construction" StringTable.prop_valid,
+        testProperty "sorted"       StringTable.prop_sorted,
+        testProperty "serialise"    StringTable.prop_serialise_deserialise,
+        testProperty "size"         StringTable.prop_serialiseSize,
+        testProperty "unfinalise"   StringTable.prop_finalise_unfinalise
+      ]
+
+    , testGroup "int trie" [
+        testProperty "unit 1"      IntTrie.test1,
+        testProperty "unit 2"      IntTrie.test2,
+        testProperty "unit 3"      IntTrie.test3,
+        testProperty "lookups"     IntTrie.prop_lookup_mono,
+        testProperty "completions" IntTrie.prop_completions_mono,
+        testProperty "toList"      IntTrie.prop_construct_toList,
+        testProperty "serialise"   IntTrie.prop_serialise_deserialise,
+        testProperty "size"        IntTrie.prop_serialiseSize,
+        testProperty "unfinalise"  IntTrie.prop_finalise_unfinalise
+      ]
+
+    , testGroup "index" [
+        testProperty "lookup"      Index.prop_lookup,
+        testProperty "valid"       Index.prop_valid,
+        testProperty "toList"      Index.prop_toList,
+        testProperty "serialise"   Index.prop_serialise_deserialise,
+        testProperty "size"        Index.prop_serialiseSize,
+        testProperty "matches tar" Index.prop_index_matches_tar,
+        testProperty "unfinalise"  Index.prop_finalise_unfinalise
+      ]
+    ]
+