Blob Blame History Raw
{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, MagicHash,
    Rank2Types, UnboxedTuples #-}
#ifdef GENERICS
{-# LANGUAGE DeriveGeneric, ScopedTypeVariables #-}
#endif

-- | QuickCheck tests for the 'Data.Hashable' module.  We test
-- functions by comparing the C and Haskell implementations.

module Properties (properties) where

import Data.Hashable (Hashable, hash, hashByteArray, hashPtr,
         Hashed, hashed, unhashed, hashWithSalt)
import Data.Hashable.Lifted (hashWithSalt1)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Data.List (nub)
import Control.Monad (ap, liftM)
import System.IO.Unsafe (unsafePerformIO)
import Foreign.Marshal.Array (withArray)
import GHC.Base (ByteArray#, Int(..), newByteArray#, unsafeCoerce#,
                 writeWord8Array#)
import GHC.ST (ST(..), runST)
import GHC.Word (Word8(..))
import Test.QuickCheck hiding ((.&.))
import Test.Framework (Test, testGroup)
import Test.Framework.Providers.QuickCheck2 (testProperty)
#ifdef GENERICS
import GHC.Generics
#endif

#if MIN_VERSION_bytestring(0,10,4)
import qualified Data.ByteString.Short as BS
#endif

------------------------------------------------------------------------
-- * Properties

instance Arbitrary T.Text where
    arbitrary = T.pack `fmap` arbitrary

instance Arbitrary TL.Text where
    arbitrary = TL.pack `fmap` arbitrary

instance Arbitrary B.ByteString where
    arbitrary   = B.pack `fmap` arbitrary

instance Arbitrary BL.ByteString where
    arbitrary   = sized $ \n -> resize (round (sqrt (toEnum n :: Double)))
                  ((BL.fromChunks . map (B.pack . nonEmpty)) `fmap` arbitrary)
      where nonEmpty (NonEmpty a) = a

#if MIN_VERSION_bytestring(0,10,4)
instance Arbitrary BS.ShortByteString where
    arbitrary   = BS.pack `fmap` arbitrary
#endif

-- | Validate the implementation by comparing the C and Haskell
-- versions.
pHash :: [Word8] -> Bool
pHash xs = unsafePerformIO $ withArray xs $ \ p ->
    (hashByteArray (fromList xs) 0 len ==) `fmap` hashPtr p len
  where len = length xs

-- | Content equality implies hash equality.
pText :: T.Text -> T.Text -> Bool
pText a b = if (a == b) then (hash a == hash b) else True

-- | Content equality implies hash equality.
pTextLazy :: TL.Text -> TL.Text -> Bool
pTextLazy a b = if (a == b) then (hash a == hash b) else True

-- | A small positive integer.
newtype ChunkSize = ChunkSize { unCS :: Int }
    deriving (Eq, Ord, Num, Integral, Real, Enum)

instance Show ChunkSize where show = show . unCS

instance Arbitrary ChunkSize where
    arbitrary = (ChunkSize . (`mod` maxChunkSize)) `fmap`
                (arbitrary `suchThat` ((/=0) . (`mod` maxChunkSize)))
        where maxChunkSize = 16

-- | Ensure that the rechunk function causes a rechunked string to
-- still match its original form.
pTextRechunk :: T.Text -> NonEmptyList ChunkSize -> Bool
pTextRechunk t cs = TL.fromStrict t == rechunkText t cs

-- | Lazy strings must hash to the same value no matter how they are
-- chunked.
pTextLazyRechunked :: T.Text
                   -> NonEmptyList ChunkSize -> NonEmptyList ChunkSize -> Bool
pTextLazyRechunked t cs0 cs1 =
    hash (rechunkText t cs0) == hash (rechunkText t cs1)

-- | Break up a string into chunks of different sizes.
rechunkText :: T.Text -> NonEmptyList ChunkSize -> TL.Text
rechunkText t0 (NonEmpty cs0) = TL.fromChunks . go t0 . cycle $ cs0
  where
    go t _ | T.null t = []
    go t (c:cs)       = a : go b cs
      where (a,b)     = T.splitAt (unCS c) t
    go _ []           = error "Properties.rechunk - The 'impossible' happened!"

#if MIN_VERSION_bytestring(0,10,4)
-- | Content equality implies hash equality.
pBSShort :: BS.ShortByteString -> BS.ShortByteString -> Bool
pBSShort a b = if (a == b) then (hash a == hash b) else True
#endif

-- | Content equality implies hash equality.
pBS :: B.ByteString -> B.ByteString -> Bool
pBS a b = if (a == b) then (hash a == hash b) else True

-- | Content equality implies hash equality.
pBSLazy :: BL.ByteString -> BL.ByteString -> Bool
pBSLazy a b = if (a == b) then (hash a == hash b) else True

-- | Break up a string into chunks of different sizes.
rechunkBS :: B.ByteString -> NonEmptyList ChunkSize -> BL.ByteString
rechunkBS t0 (NonEmpty cs0) = BL.fromChunks . go t0 . cycle $ cs0
  where
    go t _ | B.null t = []
    go t (c:cs)       = a : go b cs
      where (a,b)     = B.splitAt (unCS c) t
    go _ []           = error "Properties.rechunkBS - The 'impossible' happened!"

-- | Ensure that the rechunk function causes a rechunked string to
-- still match its original form.
pBSRechunk :: B.ByteString -> NonEmptyList ChunkSize -> Bool
pBSRechunk t cs = fromStrict t == rechunkBS t cs

-- | Lazy bytestrings must hash to the same value no matter how they
-- are chunked.
pBSLazyRechunked :: B.ByteString
                 -> NonEmptyList ChunkSize -> NonEmptyList ChunkSize -> Bool
pBSLazyRechunked t cs1 cs2 = hash (rechunkBS t cs1) == hash (rechunkBS t cs2)

-- This wrapper is required by 'runST'.
data ByteArray = BA { unBA :: ByteArray# }

-- | Create a 'ByteArray#' from a list of 'Word8' values.
fromList :: [Word8] -> ByteArray#
fromList xs0 = unBA (runST $ ST $ \ s1# ->
    case newByteArray# len# s1# of
        (# s2#, marr# #) -> case go s2# 0 marr# xs0 of
            s3# -> (# s3#, BA (unsafeCoerce# marr#) #))
  where
    !(I# len#) = length xs0
    go s# _         _     []           = s#
    go s# i@(I# i#) marr# ((W8# x):xs) =
        case writeWord8Array# marr# i# x s# of
            s2# -> go s2# (i + 1) marr# xs

-- Generics

#ifdef GENERICS

data Product2 a b = Product2 a b
                    deriving (Generic)

instance (Arbitrary a, Arbitrary b) => Arbitrary (Product2 a b) where
    arbitrary = Product2 `liftM` arbitrary `ap` arbitrary

instance (Hashable a, Hashable b) => Hashable (Product2 a b)

data Product3 a b c = Product3 a b c
                    deriving (Generic)

instance (Arbitrary a, Arbitrary b, Arbitrary c) =>
    Arbitrary (Product3 a b c) where
    arbitrary = Product3 `liftM` arbitrary `ap` arbitrary `ap` arbitrary

instance (Hashable a, Hashable b, Hashable c) => Hashable (Product3 a b c)

-- Hashes of all product types of the same shapes should be the same.

pProduct2 :: Int -> String -> Bool
pProduct2 x y = hash (x, y) == hash (Product2 x y)

pProduct3 :: Double -> Maybe Bool -> (Int, String) -> Bool
pProduct3 x y z = hash (x, y, z) == hash (Product3 x y z)

data Sum2 a b = S2a a | S2b b
                deriving (Eq, Ord, Show, Generic)

instance (Hashable a, Hashable b) => Hashable (Sum2 a b)

data Sum3 a b c = S3a a | S3b b | S3c c
                  deriving (Eq, Ord, Show, Generic)

instance (Hashable a, Hashable b, Hashable c) => Hashable (Sum3 a b c)

-- Hashes of the same parameter, but with different sum constructors,
-- should differ. (They might legitimately collide, but that's
-- vanishingly unlikely.)

pSum2_differ :: Int -> Bool
pSum2_differ x = nub hs == hs
  where hs = [ hash (S2a x :: Sum2 Int Int)
             , hash (S2b x :: Sum2 Int Int) ]

pSum3_differ :: Int -> Bool
pSum3_differ x = nub hs == hs
  where hs = [ hash (S3a x :: Sum3 Int Int Int)
             , hash (S3b x :: Sum3 Int Int Int)
             , hash (S3c x :: Sum3 Int Int Int) ]

#endif

instance (Arbitrary a, Hashable a) => Arbitrary (Hashed a) where
  arbitrary = fmap hashed arbitrary
  shrink xs = map hashed $ shrink $ unhashed xs

pLiftedHashed :: Int -> Hashed (Either Int String) -> Bool
pLiftedHashed s h = hashWithSalt s h == hashWithSalt1 s h

properties :: [Test]
properties =
    [ testProperty "bernstein" pHash
    , testGroup "text"
      [ testProperty "text/strict" pText
      , testProperty "text/lazy" pTextLazy
      , testProperty "text/rechunk" pTextRechunk
      , testProperty "text/rechunked" pTextLazyRechunked
      ]
    , testGroup "bytestring"
      [ testProperty "bytestring/strict" pBS
      , testProperty "bytestring/lazy" pBSLazy
#if MIN_VERSION_bytestring(0,10,4)
      , testProperty "bytestring/short" pBSShort
#endif
      , testProperty "bytestring/rechunk" pBSRechunk
      , testProperty "bytestring/rechunked" pBSLazyRechunked
      ]
#ifdef GENERICS
    , testGroup "generics"
      [
      -- Note: "product2" and "product3" have been temporarily
      -- disabled until we have added a 'hash' method to the GHashable
      -- class. Until then (a,b) hashes to a different value than (a
      -- :*: b). While this is not incorrect, it would be nicer if
      -- they didn't. testProperty "product2" pProduct2 , testProperty
      -- "product3" pProduct3
        testProperty "sum2_differ" pSum2_differ
      , testProperty "sum3_differ" pSum3_differ
      ]
#endif
    , testGroup "lifted law"
      [ testProperty "Hashed" pLiftedHashed
      ]
    ]

------------------------------------------------------------------------
-- Utilities

fromStrict :: B.ByteString -> BL.ByteString
#if MIN_VERSION_bytestring(0,10,0)
fromStrict = BL.fromStrict
#else
fromStrict b = BL.fromChunks [b]
#endif