diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..988e09c --- /dev/null +++ b/CHANGES @@ -0,0 +1,97 @@ +* 0.2.3.2 (15 May 2017) + + - Move to github. + +* 0.2.3.1 (2 May 2016) + + - Fix a test case which was causing occasional spurious test suite + failures due to too many discarded tests. Thanks to Doug + Beardsley and Peter Simons for reporting the issue. + +* 0.2.3 (12 January 2016) + + - New function 'divvy' (with associated tests) from Tim Washington. + +* 0.2.2r2 (7 Jan 2016) + + - allow base-4.9 + +* 0.2.2r1 (12 Dec 2014) + + - allow base-4.8 + +* 0.2.2 (14 April 2013) + + - Add 'dropInnerBlanks' combinator for dropping blank chunks between + consecutive delimiters while still keeping the delimiters separate. + +* 0.2.1.3 (28 March 2013) + + - bump upper bound to allow base-4.7 + +* 0.2.1.2 (28 January 2013) + + - Patch from Daniel Wagner to make splitting lazier when using + keepDelimsR. Previously nothing was output until encountering a + delimiter; now it can start outputting a Text chunk before + reaching a delimiter. + +* 0.2.1.1 (24 September 2012) + + - Update this CHANGES file with details from the past two releases. + +* 0.2.1.0 (24 September 2012) + + - Go back to generic definition of 'build' (reverses change + introduced in 0.1.4.3), for simplicity and Haskell2010 compliance. + +* 0.2.0.0 (21 August 2012) + + - test with GHC 7.6.1 and bump base dependency to allow base-4.6 + + - deprecate synonyms: sepBy, sepByOneOf, unintercalate, chunk + + - rename splitEvery to chunksOf + + - unify Delimiter definition, and get rid of GADTs extension + +* 0.1.4.3 (7 June 2012) + + - Import 'build' function from GHC.Exts instead of defining it by + hand, which can lead to some speedups (since GHC has special + rewriting rules for the version in GHC.Exts). Of course this ties + it to GHC; if you want to build split under some other compiler, + let me know and I can add some CPP directives to define 'build' + conditionally. + + - Remove unnecessary Rank2Types extension. + +* 0.1.4.2 (21 December 2011) + + - Bump version upper bound for base and Test with GHC 7.4.1rc1 + +* 0.1.4.1 (3 August 2011) + + - Bump version upper bound for base and test with GHC 7.2.0rc1. + +* 0.1.4 + + - Add 'splitPlacesBlanks' function from Daniel Wagner; like + 'splitPlaces' but pads the output with blank lists to match the + length of the input list of places to split. + +* 0.1.3 + + - Add 'chop' list-processing function. + +* 0.1.2.3 + + - Now builds with GHC 7 + +* 0.1.2.2 + + - Fix typo in documentation (davidL) + + - Lazier implementation of splitInternal from Jan Christiansen. + Performance on large lists with not very many split points is now + greatly improved. \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..648167b --- /dev/null +++ b/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2008 Brent Yorgey, Louis Wasserman + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. Neither the name of the author nor the names of other contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/README b/README new file mode 100644 index 0000000..0b01d4a --- /dev/null +++ b/README @@ -0,0 +1,32 @@ + +Data.List.Split provides a wide range of strategies and a unified +combinator framework for splitting lists with respect to some sort of +delimiter. + +------------------------------------------------------------------------ + +Dependencies: + + There are no dependencies other than the base package. + Data.List.Split has been tested with versions of GHC from 6.8.3 up + through 8.0.1. It is completely Haskell2010 (probably also + Haskell98) compliant, so it probably builds with other compilers as + well. + + The Properties.hs file depends on QuickCheck >= 2.4, but you don't + need it in order to build the library itself, only to run the tests. + +Build with Cabal: + + cabal install + +Building Haddock documentation (recommended): + + cabal haddock + + Once the documentation has been built, you can access it by + pointing your browser to dist/doc/html/split/index.html. + +Running the tests: + + cabal configure --enable-tests && cabal build && cabal test diff --git a/Setup.lhs b/Setup.lhs new file mode 100644 index 0000000..5bde0de --- /dev/null +++ b/Setup.lhs @@ -0,0 +1,3 @@ +#!/usr/bin/env runhaskell +> import Distribution.Simple +> main = defaultMain diff --git a/split.cabal b/split.cabal new file mode 100644 index 0000000..3f94973 --- /dev/null +++ b/split.cabal @@ -0,0 +1,57 @@ +Name: split +Version: 0.2.3.2 +Stability: stable + +Description: A collection of various methods for splitting + lists into parts, akin to the \"split\" function + found in several mainstream languages. Here is + its tale: + . + Once upon a time the standard "Data.List" module + held no function for splitting a list into parts + according to a delimiter. Many a brave + lambda-knight strove to add such a function, but + their striving was in vain, for Lo, the Supreme + Council fell to bickering amongst themselves what + was to be the essential nature of the One True + Function which could cleave a list in twain (or + thrain, or any required number of parts). + . + And thus came to pass the split package, + comprising divers functions for splitting a list + asunder, each according to its nature. And the + Supreme Council had no longer any grounds for + argument, for the favored method of each was + contained therein. + . + To get started, see the "Data.List.Split" module. +Synopsis: Combinator library for splitting lists. +License: BSD3 +License-file: LICENSE +Copyright: (c) Brent Yorgey, Louis Wasserman 2008-2012 +Extra-source-files: README, test/Properties.hs, CHANGES +Author: Brent Yorgey +Maintainer: byorgey@gmail.com +Category: List +Build-type: Simple +Cabal-Version: >= 1.10 +Tested-with: GHC ==7.0.4, GHC ==7.2.1, GHC ==7.4.*, GHC ==7.6.1, GHC ==7.8.3, GHC==7.10.3, GHC==8.0.1 +Bug-reports: https://github.com/byorgey/split/issues + +Test-suite split-tests + type: exitcode-stdio-1.0 + main-is: Properties.hs + build-depends: base, QuickCheck >= 2.4, split + default-language: Haskell2010 + Hs-source-dirs: test + +Source-repository head + type: git + location: http://github.com/byorgey/split.git + +Library + ghc-options: -Wall + build-depends: base <4.11 + exposed-modules: Data.List.Split, Data.List.Split.Internals + default-language: Haskell2010 + Hs-source-dirs: src diff --git a/src/Data/List/Split.hs b/src/Data/List/Split.hs new file mode 100644 index 0000000..e2fc6af --- /dev/null +++ b/src/Data/List/Split.hs @@ -0,0 +1,163 @@ +{-# OPTIONS_HADDOCK prune #-} + +----------------------------------------------------------------------------- +-- | +-- Module : Data.List.Split +-- Copyright : (c) Brent Yorgey, Louis Wasserman 2008-2012 +-- License : BSD-style (see LICENSE) +-- Maintainer : Brent Yorgey +-- Stability : stable +-- Portability : Haskell 2010 +-- +-- The "Data.List.Split" module contains a wide range of strategies +-- for splitting lists with respect to some sort of delimiter, mostly +-- implemented through a unified combinator interface. The goal is to +-- be flexible yet simple. See below for usage, examples, and +-- detailed documentation of all exported functions. If you want to +-- learn about the implementation, see "Data.List.Split.Internals". +-- +-- A git repository containing the source (including a module with +-- over 40 QuickCheck properties) can be found at +-- . +-- +----------------------------------------------------------------------------- +module Data.List.Split ( + + -- * Getting started + -- $started + + -- * Convenience functions + -- $conv + + splitOn + , splitOneOf + , splitWhen + , endBy + , endByOneOf + + , wordsBy + , linesBy + + -- * Other splitting methods + -- $other + , chunksOf + , splitPlaces + , splitPlacesBlanks + , chop + , divvy + + -- * Splitting combinators + -- $comb + + , Splitter + , defaultSplitter + , split + + -- ** Basic strategies + -- $basic + + , oneOf + , onSublist + , whenElt + + -- ** Strategy transformers + -- $transform + + , dropDelims + , keepDelimsL + , keepDelimsR + , condense + , dropInitBlank + , dropFinalBlank + , dropInnerBlanks + + -- ** Derived combinators + -- $derived + + , dropBlanks + , startsWith + , startsWithOneOf + , endsWith + , endsWithOneOf + + -- The following synonyms are deprecated, but + -- still exported for now. No documentation is + -- generated for them via the 'OPTIONS_HADDOCK + -- prune' pragma. + + , sepBy + , sepByOneOf + , unintercalate + , splitEvery + , chunk + + ) where + +import Data.List.Split.Internals + +-- $started +-- To get started, you should take a look at the functions 'splitOn', +-- 'splitOneOf', 'splitWhen', 'endBy', 'chunksOf', 'splitPlaces', +-- and other functions listed in the next two sections. These +-- functions implement various common splitting operations, and one of +-- them will probably do the job 90\% of the time. For example: +-- +-- > > splitOn "x" "axbxc" +-- > ["a","b","c"] +-- > +-- > > splitOn "x" "axbxcx" +-- > ["a","b","c",""] +-- > +-- > > endBy ";" "foo;bar;baz;" +-- > ["foo","bar","baz"] +-- > +-- > > splitWhen (<0) [1,3,-4,5,7,-9,0,2] +-- > [[1,3],[5,7],[0,2]] +-- > +-- > > splitOneOf ";.," "foo,bar;baz.glurk" +-- > ["foo","bar","baz","glurk"] +-- > +-- > > chunksOf 3 ['a'..'z'] +-- > ["abc","def","ghi","jkl","mno","pqr","stu","vwx","yz"] +-- +-- If you want more flexibility, however, you can use the combinator +-- library in terms of which these functions are defined. For more +-- information, see the section labeled \"Splitting Combinators\". +-- +-- The goal of this library is to be flexible yet simple. It does not +-- implement any particularly sophisticated list-splitting methods, +-- nor is it tuned for speed. If you find yourself wanting something +-- more complicated or optimized, it probably means you should use a +-- real parsing or regular expression library. + +-- $conv +-- These functions implement some common splitting strategies. Note +-- that all of the functions in this section drop delimiters from the +-- final output, since that is a more common use case. If you wish to +-- keep the delimiters somehow, see the \"Splitting Combinators\" +-- section. + +-- $other +-- Other useful splitting methods which are not implemented using the +-- combinator framework. + +-- $comb +-- The core of the library is the 'Splitter' type, which represents a +-- particular list-splitting strategy. All of the combinators revolve +-- around constructing or transforming 'Splitter' objects; once a +-- suitable 'Splitter' has been created, it can be run with the +-- 'split' function. For example: +-- +-- > > split (dropBlanks . condense $ whenElt (<0)) [1,2,4,-5,-6,4,9,-19,-30] +-- > [[1,2,4],[-5,-6],[4,9],[-19,-30]] + +-- $basic +-- All these basic strategies have the same parameters as the +-- 'defaultSplitter' except for the delimiter. + +-- $transform +-- Functions for altering splitting strategy parameters. + +-- $derived +-- Combinators which can be defined in terms of other combinators, but +-- are provided for convenience. diff --git a/src/Data/List/Split/Internals.hs b/src/Data/List/Split/Internals.hs new file mode 100644 index 0000000..e83f548 --- /dev/null +++ b/src/Data/List/Split/Internals.hs @@ -0,0 +1,614 @@ +{-# OPTIONS_HADDOCK prune #-} + +----------------------------------------------------------------------------- +-- | +-- Module : Data.List.Split.Internals +-- Copyright : (c) Brent Yorgey, Louis Wasserman 2008-2012 +-- License : BSD-style (see LICENSE) +-- Maintainer : Brent Yorgey +-- Stability : stable +-- Portability : Haskell 2010 +-- +-- Implementation module for "Data.List.Split", a combinator library +-- for splitting lists. See the "Data.List.Split" documentation for +-- more description and examples. +-- +----------------------------------------------------------------------------- + +module Data.List.Split.Internals where + +import Data.List (genericSplitAt) + +-- * Types and utilities + +-- | A splitting strategy. +data Splitter a = Splitter { delimiter :: Delimiter a + -- ^ What delimiter to split on + , delimPolicy :: DelimPolicy + -- ^ What to do with delimiters (drop + -- from output, keep as separate + -- elements in output, or merge with + -- previous or following chunks) + , condensePolicy :: CondensePolicy + -- ^ What to do with multiple + -- consecutive delimiters + , initBlankPolicy :: EndPolicy + -- ^ Drop an initial blank? + , finalBlankPolicy :: EndPolicy + -- ^ Drop a final blank? + } + +-- | The default splitting strategy: keep delimiters in the output +-- as separate chunks, don't condense multiple consecutive +-- delimiters into one, keep initial and final blank chunks. +-- Default delimiter is the constantly false predicate. +-- +-- Note that 'defaultSplitter' should normally not be used; use +-- 'oneOf', 'onSublist', or 'whenElt' instead, which are the same as +-- the 'defaultSplitter' with just the delimiter overridden. +-- +-- The 'defaultSplitter' strategy with any delimiter gives a +-- maximally information-preserving splitting strategy, in the sense +-- that (a) taking the 'concat' of the output yields the original +-- list, and (b) given only the output list, we can reconstruct a +-- 'Splitter' which would produce the same output list again given +-- the original input list. This default strategy can be overridden +-- to allow discarding various sorts of information. +defaultSplitter :: Splitter a +defaultSplitter = Splitter { delimiter = Delimiter [const False] + , delimPolicy = Keep + , condensePolicy = KeepBlankFields + , initBlankPolicy = KeepBlank + , finalBlankPolicy = KeepBlank + } + +-- | A delimiter is a list of predicates on elements, matched by some +-- contiguous subsequence of a list. +newtype Delimiter a = Delimiter [a -> Bool] + +-- | Try to match a delimiter at the start of a list, either failing +-- or decomposing the list into the portion which matched the delimiter +-- and the remainder. +matchDelim :: Delimiter a -> [a] -> Maybe ([a],[a]) +matchDelim (Delimiter []) xs = Just ([],xs) +matchDelim (Delimiter _) [] = Nothing +matchDelim (Delimiter (p:ps)) (x:xs) + | p x = matchDelim (Delimiter ps) xs >>= \(h,t) -> Just (x:h,t) + | otherwise = Nothing + +-- | What to do with delimiters? +data DelimPolicy = Drop -- ^ Drop delimiters from the output. + | Keep -- ^ Keep delimiters as separate chunks + -- of the output. + | KeepLeft -- ^ Keep delimiters in the output, + -- prepending them to the following + -- chunk. + | KeepRight -- ^ Keep delimiters in the output, + -- appending them to the previous chunk. + deriving (Eq, Show) + +-- | What to do with multiple consecutive delimiters? +data CondensePolicy = Condense -- ^ Condense into a single delimiter. + | DropBlankFields -- ^ Keep consecutive + -- delimiters separate, but + -- don't insert blank chunks in + -- between them. + | KeepBlankFields -- ^ Insert blank chunks + -- between consecutive + -- delimiters. + deriving (Eq, Show) + +-- | What to do with a blank chunk at either end of the list +-- (/i.e./ when the list begins or ends with a delimiter). +data EndPolicy = DropBlank | KeepBlank + deriving (Eq, Show) + +-- | Tag chunks as delimiters or text. +data Chunk a = Delim [a] | Text [a] + deriving (Show, Eq) + +-- | Internal representation of a split list that tracks which pieces +-- are delimiters and which aren't. +type SplitList a = [Chunk a] + +-- | Untag a 'Chunk'. +fromElem :: Chunk a -> [a] +fromElem (Text as) = as +fromElem (Delim as) = as + +-- | Test whether a 'Chunk' is a delimiter. +isDelim :: Chunk a -> Bool +isDelim (Delim _) = True +isDelim _ = False + +-- | Test whether a 'Chunk' is text. +isText :: Chunk a -> Bool +isText (Text _) = True +isText _ = False + +-- * Implementation + +-- | Given a delimiter to use, split a list into an internal +-- representation with chunks tagged as delimiters or text. This +-- transformation is lossless; in particular, +-- +-- @ +-- 'concatMap' 'fromElem' ('splitInternal' d l) == l. +-- @ +splitInternal :: Delimiter a -> [a] -> SplitList a +splitInternal _ [] = [] +splitInternal d xxs + | null xs = toSplitList match + | otherwise = Text xs : toSplitList match + where + (xs,match) = breakDelim d xxs + + toSplitList Nothing = [] + toSplitList (Just ([],r:rs)) = Delim [] : Text [r] : splitInternal d rs + toSplitList (Just (delim,rest)) = Delim delim : splitInternal d rest + +breakDelim :: Delimiter a -> [a] -> ([a],Maybe ([a],[a])) +breakDelim (Delimiter []) xs = ([],Just ([],xs)) +breakDelim _ [] = ([],Nothing) +breakDelim d xxs@(x:xs) = + case matchDelim d xxs of + Nothing -> let (ys,match) = breakDelim d xs in (x:ys,match) + Just match -> ([], Just match) + +-- | Given a split list in the internal tagged representation, produce +-- a new internal tagged representation corresponding to the final +-- output, according to the strategy defined by the given +-- 'Splitter'. +postProcess :: Splitter a -> SplitList a -> SplitList a +postProcess s = dropFinal (finalBlankPolicy s) + . dropInitial (initBlankPolicy s) + . doMerge (delimPolicy s) + . doDrop (delimPolicy s) + . insertBlanks (condensePolicy s) + . doCondense (condensePolicy s) + +-- | Drop delimiters if the 'DelimPolicy' is 'Drop'. +doDrop :: DelimPolicy -> SplitList a -> SplitList a +doDrop Drop l = [ c | c@(Text _) <- l ] +doDrop _ l = l + +-- | Condense multiple consecutive delimiters into one if the +-- 'CondensePolicy' is 'Condense'. +doCondense :: CondensePolicy -> SplitList a -> SplitList a +doCondense Condense ls = condense' ls + where condense' [] = [] + condense' (c@(Text _) : l) = c : condense' l + condense' l = (Delim $ concatMap fromElem ds) : condense' rest + where (ds,rest) = span isDelim l +doCondense _ ls = ls + +-- | Insert blank chunks between any remaining consecutive delimiters +-- (unless the condense policy is 'DropBlankFields'), and at the +-- beginning or end if the first or last element is a delimiter. +insertBlanks :: CondensePolicy -> SplitList a -> SplitList a +insertBlanks _ [] = [Text []] +insertBlanks cp (d@(Delim _) : l) = Text [] : insertBlanks' cp (d:l) +insertBlanks cp l = insertBlanks' cp l + +-- | Insert blank chunks between consecutive delimiters. +insertBlanks' :: CondensePolicy -> SplitList a -> SplitList a +insertBlanks' _ [] = [] +insertBlanks' cp@DropBlankFields (d1@(Delim _) : d2@(Delim _) : l) + = d1 : insertBlanks' cp (d2:l) +insertBlanks' cp (d1@(Delim _) : d2@(Delim _) : l) + = d1 : Text [] : insertBlanks' cp (d2:l) +insertBlanks' _ [d@(Delim _)] = [d, Text []] +insertBlanks' cp (c : l) = c : insertBlanks' cp l + +-- | Merge delimiters into adjacent chunks according to the 'DelimPolicy'. +doMerge :: DelimPolicy -> SplitList a -> SplitList a +doMerge KeepLeft = mergeLeft +doMerge KeepRight = mergeRight +doMerge _ = id + +-- | Merge delimiters with adjacent chunks to the right (yes, that's +-- not a typo: the delimiters should end up on the left of the +-- chunks, so they are merged with chunks to their right). +mergeLeft :: SplitList a -> SplitList a +mergeLeft [] = [] +mergeLeft ((Delim d) : (Text c) : l) = Text (d++c) : mergeLeft l +mergeLeft (c : l) = c : mergeLeft l + +-- | Merge delimiters with adjacent chunks to the left. +mergeRight :: SplitList a -> SplitList a +mergeRight [] = [] +-- below fanciness is with the goal of laziness: we want to start returning +-- stuff before we've necessarily discovered a delimiter, in case we're +-- processing some infinite list with no delimiter +mergeRight ((Text c) : l) = Text (c++d) : mergeRight lTail + where (d, lTail) = case l of + Delim d' : l' -> (d', l') + _ -> ([], l) +mergeRight (c : l) = c : mergeRight l + +-- | Drop an initial blank chunk according to the given 'EndPolicy'. +dropInitial :: EndPolicy -> SplitList a -> SplitList a +dropInitial DropBlank (Text [] : l) = l +dropInitial _ l = l + +-- | Drop a final blank chunk according to the given 'EndPolicy'. +dropFinal :: EndPolicy -> SplitList a -> SplitList a +dropFinal _ [] = [] +dropFinal DropBlank l = dropFinal' l + where dropFinal' [] = [] + dropFinal' [Text []] = [] + dropFinal' (x:xs) = x:dropFinal' xs +dropFinal _ l = l + +-- * Combinators + +-- | Split a list according to the given splitting strategy. This is +-- how to \"run\" a 'Splitter' that has been built using the other +-- combinators. +split :: Splitter a -> [a] -> [[a]] +split s = map fromElem . postProcess s . splitInternal (delimiter s) + +-- ** Basic strategies +-- +-- $ All these basic strategies have the same parameters as the +-- 'defaultSplitter' except for the delimiters. + +-- | A splitting strategy that splits on any one of the given +-- elements. For example: +-- +-- > split (oneOf "xyz") "aazbxyzcxd" == ["aa","z","b","x","","y","","z","c","x","d"] +oneOf :: Eq a => [a] -> Splitter a +oneOf elts = defaultSplitter { delimiter = Delimiter [(`elem` elts)] } + +-- | A splitting strategy that splits on the given list, when it is +-- encountered as an exact subsequence. For example: +-- +-- > split (onSublist "xyz") "aazbxyzcxd" == ["aazb","xyz","cxd"] +-- +-- Note that splitting on the empty list is a special case, which +-- splits just before every element of the list being split. For example: +-- +-- > split (onSublist "") "abc" == ["","","a","","b","","c"] +-- > split (dropDelims . dropBlanks $ onSublist "") "abc" == ["a","b","c"] +-- +-- However, if you want to break a list into singleton elements like +-- this, you are better off using @'chunksOf' 1@, or better yet, +-- @'map' (:[])@. +onSublist :: Eq a => [a] -> Splitter a +onSublist lst = defaultSplitter { delimiter = Delimiter (map (==) lst) } + +-- | A splitting strategy that splits on any elements that satisfy the +-- given predicate. For example: +-- +-- > split (whenElt (<0)) [2,4,-3,6,-9,1] == [[2,4],[-3],[6],[-9],[1]] +whenElt :: (a -> Bool) -> Splitter a +whenElt p = defaultSplitter { delimiter = Delimiter [p] } + +-- ** Strategy transformers + +-- | Drop delimiters from the output (the default is to keep +-- them). For example, +-- +-- > split (oneOf ":") "a:b:c" == ["a", ":", "b", ":", "c"] +-- > split (dropDelims $ oneOf ":") "a:b:c" == ["a", "b", "c"] +dropDelims :: Splitter a -> Splitter a +dropDelims s = s { delimPolicy = Drop } + +-- | Keep delimiters in the output by prepending them to adjacent +-- chunks. For example: +-- +-- > split (keepDelimsL $ oneOf "xyz") "aazbxyzcxd" == ["aa","zb","x","y","zc","xd"] +keepDelimsL :: Splitter a -> Splitter a +keepDelimsL s = s { delimPolicy = KeepLeft } + +-- | Keep delimiters in the output by appending them to adjacent +-- chunks. For example: +-- +-- > split (keepDelimsR $ oneOf "xyz") "aazbxyzcxd" == ["aaz","bx","y","z","cx","d"] +keepDelimsR :: Splitter a -> Splitter a +keepDelimsR s = s { delimPolicy = KeepRight } + +-- | Condense multiple consecutive delimiters into one. For example: +-- +-- > split (condense $ oneOf "xyz") "aazbxyzcxd" == ["aa","z","b","xyz","c","x","d"] +-- > split (dropDelims $ oneOf "xyz") "aazbxyzcxd" == ["aa","b","","","c","d"] +-- > split (condense . dropDelims $ oneOf "xyz") "aazbxyzcxd" == ["aa","b","c","d"] +condense :: Splitter a -> Splitter a +condense s = s { condensePolicy = Condense } + +-- | Don't generate a blank chunk if there is a delimiter at the +-- beginning. For example: +-- +-- > split (oneOf ":") ":a:b" == ["",":","a",":","b"] +-- > split (dropInitBlank $ oneOf ":") ":a:b" == [":","a",":","b"] +dropInitBlank :: Splitter a -> Splitter a +dropInitBlank s = s { initBlankPolicy = DropBlank } + +-- | Don't generate a blank chunk if there is a delimiter at the end. +-- For example: +-- +-- > split (oneOf ":") "a:b:" == ["a",":","b",":",""] +-- > split (dropFinalBlank $ oneOf ":") "a:b:" == ["a",":","b",":"] +dropFinalBlank :: Splitter a -> Splitter a +dropFinalBlank s = s { finalBlankPolicy = DropBlank } + +-- | Don't generate blank chunks between consecutive delimiters. +-- For example: +-- +-- > split (oneOf ":") "::b:::a" == ["",":","",":","b",":","",":","",":","a"] +-- > split (dropInnerBlanks $ oneOf ":") "::b:::a" == ["", ":",":","b",":",":",":","a"] +dropInnerBlanks :: Splitter a -> Splitter a +dropInnerBlanks s = s { condensePolicy = DropBlankFields } + +-- ** Derived combinators + +-- | Drop all blank chunks from the output, and condense consecutive +-- delimiters into one. Equivalent to @'dropInitBlank' +-- . 'dropFinalBlank' . 'condense'@. For example: +-- +-- > split (oneOf ":") "::b:::a" == ["",":","",":","b",":","",":","",":","a"] +-- > split (dropBlanks $ oneOf ":") "::b:::a" == ["::","b",":::","a"] +dropBlanks :: Splitter a -> Splitter a +dropBlanks = dropInitBlank . dropFinalBlank . condense + +-- | Make a strategy that splits a list into chunks that all start +-- with the given subsequence (except possibly the first). +-- Equivalent to @'dropInitBlank' . 'keepDelimsL' . 'onSublist'@. +-- For example: +-- +-- > split (startsWith "app") "applyapplicativeapplaudapproachapple" == ["apply","applicative","applaud","approach","apple"] +startsWith :: Eq a => [a] -> Splitter a +startsWith = dropInitBlank . keepDelimsL . onSublist + +-- | Make a strategy that splits a list into chunks that all start +-- with one of the given elements (except possibly the first). +-- Equivalent to @'dropInitBlank' . 'keepDelimsL' . 'oneOf'@. For +-- example: +-- +-- > split (startsWithOneOf ['A'..'Z']) "ACamelCaseIdentifier" == ["A","Camel","Case","Identifier"] +startsWithOneOf :: Eq a => [a] -> Splitter a +startsWithOneOf = dropInitBlank . keepDelimsL . oneOf + +-- | Make a strategy that splits a list into chunks that all end with +-- the given subsequence, except possibly the last. Equivalent to +-- @'dropFinalBlank' . 'keepDelimsR' . 'onSublist'@. For example: +-- +-- > split (endsWith "ly") "happilyslowlygnarlylily" == ["happily","slowly","gnarly","lily"] +endsWith :: Eq a => [a] -> Splitter a +endsWith = dropFinalBlank . keepDelimsR . onSublist + +-- | Make a strategy that splits a list into chunks that all end with +-- one of the given elements, except possibly the last. Equivalent +-- to @'dropFinalBlank' . 'keepDelimsR' . 'oneOf'@. For example: +-- +-- > split (condense $ endsWithOneOf ".,?! ") "Hi, there! How are you?" == ["Hi, ","there! ","How ","are ","you?"] +endsWithOneOf :: Eq a => [a] -> Splitter a +endsWithOneOf = dropFinalBlank . keepDelimsR . oneOf + +-- ** Convenience functions +-- +-- These functions implement some common splitting strategies. Note +-- that all of the functions in this section drop delimiters from +-- the final output, since that is a more common use case even +-- though it is not the default. + +-- | Split on any of the given elements. Equivalent to @'split' +-- . 'dropDelims' . 'oneOf'@. For example: +-- +-- > splitOneOf ";.," "foo,bar;baz.glurk" == ["foo","bar","baz","glurk"] +splitOneOf :: Eq a => [a] -> [a] -> [[a]] +splitOneOf = split . dropDelims . oneOf + +-- | Split on the given sublist. Equivalent to @'split' +-- . 'dropDelims' . 'onSublist'@. For example: +-- +-- > splitOn ".." "a..b...c....d.." == ["a","b",".c","","d",""] +-- +-- In some parsing combinator frameworks this is also known as +-- @sepBy@. +-- +-- Note that this is the right inverse of the 'Data.List.intercalate' function +-- from "Data.List", that is, +-- +-- > intercalate x . splitOn x === id +-- +-- @'splitOn' x . 'Data.List.intercalate' x@ is the identity on +-- certain lists, but it is tricky to state the precise conditions +-- under which this holds. (For example, it is not enough to say +-- that @x@ does not occur in any elements of the input list. +-- Working out why is left as an exercise for the reader.) +splitOn :: Eq a => [a] -> [a] -> [[a]] +splitOn = split . dropDelims . onSublist + +-- | Split on elements satisfying the given predicate. Equivalent to +-- @'split' . 'dropDelims' . 'whenElt'@. For example: +-- +-- > splitWhen (<0) [1,3,-4,5,7,-9,0,2] == [[1,3],[5,7],[0,2]] +splitWhen :: (a -> Bool) -> [a] -> [[a]] +splitWhen = split . dropDelims . whenElt + +{-# DEPRECATED sepBy "Use splitOn." #-} +sepBy :: Eq a => [a] -> [a] -> [[a]] +sepBy = splitOn + +{-# DEPRECATED sepByOneOf "Use splitOneOf." #-} +sepByOneOf :: Eq a => [a] -> [a] -> [[a]] +sepByOneOf = splitOneOf + +-- | Split into chunks terminated by the given subsequence. +-- Equivalent to @'split' . 'dropFinalBlank' . 'dropDelims' +-- . 'onSublist'@. For example: +-- +-- > endBy ";" "foo;bar;baz;" == ["foo","bar","baz"] +-- +-- Note also that the 'lines' function from "Data.List" is equivalent +-- to @'endBy' \"\\n\"@. +endBy :: Eq a => [a] -> [a] -> [[a]] +endBy = split . dropFinalBlank . dropDelims . onSublist + +-- | Split into chunks terminated by one of the given elements. +-- Equivalent to @'split' . 'dropFinalBlank' . 'dropDelims' +-- . 'oneOf'@. For example: +-- +-- > endByOneOf ";," "foo;bar,baz;" == ["foo","bar","baz"] +endByOneOf :: Eq a => [a] -> [a] -> [[a]] +endByOneOf = split . dropFinalBlank . dropDelims . oneOf + +{-# DEPRECATED unintercalate "Use splitOn." #-} +unintercalate :: Eq a => [a] -> [a] -> [[a]] +unintercalate = splitOn + +-- | Split into \"words\", with word boundaries indicated by the given +-- predicate. Satisfies @'Data.List.words' === wordsBy +-- 'Data.Char.isSpace'@; equivalent to @'split' . 'dropBlanks' +-- . 'dropDelims' . 'whenElt'@. For example: +-- +-- > wordsBy (=='x') "dogxxxcatxbirdxx" == ["dog","cat","bird"] +wordsBy :: (a -> Bool) -> [a] -> [[a]] +wordsBy = split . dropBlanks . dropDelims . whenElt + +-- | Split into \"lines\", with line boundaries indicated by the given +-- predicate. Satisfies @'lines' === linesBy (=='\n')@; equivalent to +-- @'split' . 'dropFinalBlank' . 'dropDelims' . 'whenElt'@. For example: +-- +-- > linesBy (=='x') "dogxxxcatxbirdxx" == ["dog","","","cat","bird",""] +linesBy :: (a -> Bool) -> [a] -> [[a]] +linesBy = split . dropFinalBlank . dropDelims . whenElt + +-- * Other splitting methods + +-- | Standard build function, specialized to building lists. +-- +-- Usually build is given the rank-2 type +-- +-- > build :: (forall b. (a -> b -> b) -> b -> b) -> [a] +-- +-- but since we only use it when @(b ~ [a])@, we give it the more +-- restricted type signature in order to avoid needing a +-- non-Haskell2010 extension. +-- +-- Note that the 0.1.4.3 release of this package did away with a +-- custom @build@ implementation in favor of importing one from +-- "GHC.Exts", which was (reportedly) faster for some applications. +-- However, in the interest of simplicity and complete Haskell2010 +-- compliance as @split@ is being included in the Haskel Platform, +-- version 0.2.1.0 has gone back to defining @build@ manually. This +-- is in line with @split@'s design philosophy of having efficiency +-- as a non-goal. +build :: ((a -> [a] -> [a]) -> [a] -> [a]) -> [a] +build g = g (:) [] + +-- | @'chunksOf' n@ splits a list into length-n pieces. The last +-- piece will be shorter if @n@ does not evenly divide the length of +-- the list. If @n <= 0@, @'chunksOf' n l@ returns an infinite list +-- of empty lists. For example: +-- +-- Note that @'chunksOf' n []@ is @[]@, not @[[]]@. This is +-- intentional, and is consistent with a recursive definition of +-- 'chunksOf'; it satisfies the property that +-- +-- @chunksOf n xs ++ chunksOf n ys == chunksOf n (xs ++ ys)@ +-- +-- whenever @n@ evenly divides the length of @xs@. +chunksOf :: Int -> [e] -> [[e]] +chunksOf i ls = map (take i) (build (splitter ls)) where + splitter :: [e] -> ([e] -> a -> a) -> a -> a + splitter [] _ n = n + splitter l c n = l `c` splitter (drop i l) c n + +{-# DEPRECATED chunk "Use chunksOf." #-} +chunk :: Int -> [e] -> [[e]] +chunk = chunksOf + +{-# DEPRECATED splitEvery "Use chunksOf." #-} +splitEvery :: Int -> [e] -> [[e]] +splitEvery = chunksOf + +-- | Split a list into chunks of the given lengths. For example: +-- +-- > splitPlaces [2,3,4] [1..20] == [[1,2],[3,4,5],[6,7,8,9]] +-- > splitPlaces [4,9] [1..10] == [[1,2,3,4],[5,6,7,8,9,10]] +-- > splitPlaces [4,9,3] [1..10] == [[1,2,3,4],[5,6,7,8,9,10]] +-- +-- If the input list is longer than the total of the given lengths, +-- then the remaining elements are dropped. If the list is shorter +-- than the total of the given lengths, then the result may contain +-- fewer chunks than requested, and the last chunk may be shorter +-- than requested. +splitPlaces :: Integral a => [a] -> [e] -> [[e]] +splitPlaces is ys = build (splitPlacer is ys) where + splitPlacer :: Integral i => [i] -> [b] -> ([b] -> t -> t) -> t -> t + splitPlacer [] _ _ n = n + splitPlacer _ [] _ n = n + splitPlacer (l:ls) xs c n = let (x1, x2) = genericSplitAt l xs + in x1 `c` splitPlacer ls x2 c n + +-- | Split a list into chunks of the given lengths. Unlike +-- 'splitPlaces', the output list will always be the same length as +-- the first input argument. If the input list is longer than the +-- total of the given lengths, then the remaining elements are +-- dropped. If the list is shorter than the total of the given +-- lengths, then the last several chunks will be shorter than +-- requested or empty. For example: +-- +-- > splitPlacesBlanks [2,3,4] [1..20] == [[1,2],[3,4,5],[6,7,8,9]] +-- > splitPlacesBlanks [4,9] [1..10] == [[1,2,3,4],[5,6,7,8,9,10]] +-- > splitPlacesBlanks [4,9,3] [1..10] == [[1,2,3,4],[5,6,7,8,9,10],[]] +-- +-- Notice the empty list in the output of the third example, which +-- differs from the behavior of 'splitPlaces'. +splitPlacesBlanks :: Integral a => [a] -> [e] -> [[e]] +splitPlacesBlanks is ys = build (splitPlacer is ys) where + splitPlacer :: Integral i => [i] -> [b] -> ([b] -> t -> t) -> t -> t + splitPlacer [] _ _ n = n + splitPlacer (l:ls) xs c n = let (x1, x2) = genericSplitAt l xs + in x1 `c` splitPlacer ls x2 c n + +-- | A useful recursion pattern for processing a list to produce a new +-- list, often used for \"chopping\" up the input list. Typically +-- chop is called with some function that will consume an initial +-- prefix of the list and produce a value and the rest of the list. +-- +-- For example, many common Prelude functions can be implemented in +-- terms of @chop@: +-- +-- > group :: (Eq a) => [a] -> [[a]] +-- > group = chop (\ xs@(x:_) -> span (==x) xs) +-- > +-- > words :: String -> [String] +-- > words = filter (not . null) . chop (span (not . isSpace) . dropWhile isSpace) + +chop :: ([a] -> (b, [a])) -> [a] -> [b] +chop _ [] = [] +chop f as = b : chop f as' + where (b, as') = f as + +-- | Divides up an input list into a set of sublists, according to 'n' and 'm' +-- input specifications you provide. Each sublist will have 'n' items, and the +-- start of each sublist will be offset by 'm' items from the previous one. +-- +-- > divvy 5 5 [1..20] == [[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15],[16,17,18,19,20]] +-- +-- In the case where a source list's trailing elements do no fill an entire +-- sublist, those trailing elements will be dropped. +-- +-- > divvy 5 2 [1..10] == [[1,2,3,4,5],[3,4,5,6,7],[5,6,7,8,9]] +-- +-- As an example, you can generate a moving average over a list of prices: +-- +-- > type Prices = [Float] +-- > type AveragePrices = [Float] +-- > +-- > average :: [Float] -> Float +-- > average xs = sum xs / (fromIntegral $ length xs) +-- > +-- > simpleMovingAverage :: Prices -> AveragePrices +-- > simpleMovingAverage priceList = +-- > map average divvyedPrices +-- > where divvyedPrices = divvy 20 1 priceList + +divvy :: Int -> Int -> [a] -> [[a]] +divvy _ _ [] = [] +divvy n m lst = filter (\ws -> (n == length ws)) choppedl + where choppedl = chop (\xs -> (take n xs , drop m xs)) lst + diff --git a/test/Properties.hs b/test/Properties.hs new file mode 100644 index 0000000..69a3141 --- /dev/null +++ b/test/Properties.hs @@ -0,0 +1,404 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE StandaloneDeriving #-} +module Main where + +import Data.List.Split.Internals +import Test.QuickCheck +import Test.QuickCheck.Function + +import Control.Monad +import System.Environment +import Text.Printf + +import Data.Char +import Data.Functor +import Data.List (genericTake, group, intercalate, + isInfixOf, isPrefixOf, isSuffixOf, + tails) +import Data.Maybe (isJust) + +newtype Elt = Elt { unElt :: Char } + deriving (Eq) + +instance Show Elt where + show (Elt c) = show c + +instance Arbitrary Elt where + arbitrary = elements (map Elt "abcde") + +instance CoArbitrary Elt where + coarbitrary = coarbitrary . ord . unElt + +instance Function Elt where + function = functionMap unElt Elt + +deriving instance Show (Splitter Elt) + +instance Show (Delimiter Elt) where + show (Delimiter ps) = show (map function ps) + +instance (Arbitrary a, CoArbitrary a, Function a) => Arbitrary (Delimiter a) where + arbitrary = (Delimiter . map apply) <$> arbitrary + +instance Arbitrary a => Arbitrary (Chunk a) where + arbitrary = oneof [ liftM Text (listOf arbitrary) + , liftM Delim (listOf arbitrary) + ] + +instance Arbitrary DelimPolicy where + arbitrary = elements [Drop, Keep, KeepLeft, KeepRight] + +instance Arbitrary CondensePolicy where + arbitrary = elements [Condense, KeepBlankFields] + +instance Arbitrary EndPolicy where + arbitrary = elements [DropBlank, KeepBlank] + +instance (Arbitrary a, CoArbitrary a, Function a) => Arbitrary (Splitter a) where + arbitrary = liftM5 Splitter arbitrary arbitrary arbitrary arbitrary arbitrary + +type Delim a = [Fun a Bool] + +unDelim :: Delim a -> Delimiter a +unDelim = Delimiter . map apply + +main :: IO () +main = do + results <- mapM (\(s,t) -> printf "%-40s" s >> t) tests + when (not . all isSuccess $ results) $ fail "Not all tests passed!" + where + isSuccess (Success{}) = True + isSuccess _ = False + qc x = quickCheckWithResult (stdArgs { maxSuccess = 200 }) x + tests = [ ("default/id", qc prop_default_id) + , ("match/decompose", qc prop_match_decompose) + , ("match/yields delim", qc prop_match_yields_delim) + , ("splitInternal/lossless", qc prop_splitInternal_lossless) + , ("splitInternal/yields delims", qc prop_splitInternal_yields_delims) + , ("splitInternal/text", qc prop_splitInternal_text_not_delims) + , ("doCondense/no consec delims", qc prop_doCondense_no_consec_delims) + , ("insBlanks/no consec delims", qc prop_insBlanks_no_consec_delims) + , ("insBlanks/fl not delims", qc prop_insBlanks_fl_not_delim) + , ("mergeL/no delims", qc prop_mergeL_no_delims) + , ("mergeR/no delims", qc prop_mergeR_no_delims) + , ("oneOf", qc prop_oneOf) + , ("oneOf/not text", qc prop_oneOf_not_text) + , ("onSublist", qc prop_onSublist) + , ("onSublist/not text", qc prop_onSublist_not_text) + , ("whenElt", qc prop_whenElt) + , ("whenElt/not text", qc prop_whenElt_not_text) + , ("process/dropDelims", qc prop_dropDelims) + , ("process/keepDelimsL no delims", qc prop_keepDelimsL_no_delims) + , ("process/keepDelimsR no delims", qc prop_keepDelimsR_no_delims) + , ("process/keepDelimsL match", qc prop_keepDelimsL_match) + , ("process/keepDelimsR match", qc prop_keepDelimsR_match) + , ("condense/no consec delims", qc prop_condense_no_consec_delims) + , ("condense/all delims", qc prop_condense_all_delims) + , ("dropInitBlank", qc prop_dropInitBlank) + , ("dropFinalBlank", qc prop_dropFinalBlank) + , ("dropBlanks", qc prop_dropBlanks) + , ("startsWith", qc prop_startsWith) + , ("startsWithOneOf", qc prop_startsWithOneOf) + , ("endsWith", qc prop_endsWith) + , ("endsWithOneOf", qc prop_endsWithOneOf) + , ("splitOn/right inv", qc prop_splitOn_right_inv) + , ("splitOn/idem", qc prop_splitOn_intercalate_idem) + , ("splitOn/empty delim", qc prop_splitOn_empty_delim) + , ("split/empty delim", qc prop_split_empty_delim_drop) + , ("chunksOf/lengths", qc prop_chunksOf_all_n) + , ("chunksOf/last <= n", qc prop_chunksOf_last_less_n) + , ("chunksOf/preserve", qc prop_chunksOf_preserve) + , ("splitPlaces/lengths", qc prop_splitPlaces_lengths) + , ("splitPlaces/last <= n", qc prop_splitPlaces_last_less_n) + , ("splitPlaces/preserve", qc prop_splitPlaces_preserve) + , ("splitPlaces/chunksOf", qc prop_splitPlaces_chunksOf) + , ("splitPlacesB/length", qc prop_splitPlacesB_length) + , ("splitPlacesB/last <= n", qc prop_splitPlacesB_last_less_n) + , ("splitPlacesB/preserve", qc prop_splitPlacesB_preserve) + , ("lines", qc prop_lines) + , ("wordsBy/words", qc prop_wordsBy_words) + , ("linesBy/lines", qc prop_linesBy_lines) + , ("chop/group", qc prop_chop_group) + , ("chop/words", qc prop_chop_words) + , ("divvy/evenly", qc prop_divvy_evenly) + , ("divvy/discard_remainder", qc prop_divvy_discard_remainder) + , ("divvy/outputlists_allsame_length", qc prop_divvy_outputlists_allsame_length) + , ("divvy/output_are_sublists", qc prop_divvy_output_are_sublists) + , ("divvy/heads", qc prop_divvy_heads) + ] + +prop_default_id :: [Elt] -> Bool +prop_default_id l = split defaultSplitter l == [l] + +prop_match_decompose :: Delim Elt -> [Elt] -> Bool +prop_match_decompose d l = maybe True ((==l) . uncurry (++)) $ matchDelim (unDelim d) l + +isDelimMatch :: Delim Elt -> [Elt] -> Bool +isDelimMatch d l = matchDelim (unDelim d) l == Just (l,[]) + +prop_match_yields_delim :: Delim Elt -> [Elt] -> Bool +prop_match_yields_delim d l = + case matchDelim (unDelim d) l of + Nothing -> True + Just (del,rest) -> isDelimMatch d del + +prop_splitInternal_lossless :: Delim Elt -> [Elt] -> Bool +prop_splitInternal_lossless d l = concatMap fromElem (splitInternal (unDelim d) l) == l + +prop_splitInternal_yields_delims :: Delim Elt -> [Elt] -> Bool +prop_splitInternal_yields_delims d l = + all (isDelimMatch d) $ [ del | (Delim del) <- splitInternal d' l ] + where d' = unDelim d + +prop_splitInternal_text_not_delims :: Delim Elt -> [Elt] -> Bool +prop_splitInternal_text_not_delims d l = + all (not . isDelimMatch d) $ [ ch | (Text ch) <- splitInternal d' l ] + where d' = unDelim d + +noConsecDelims :: SplitList Elt -> Bool +noConsecDelims [] = True +noConsecDelims [x] = True +noConsecDelims (Delim _ : Delim _ : _) = False +noConsecDelims (_ : xs) = noConsecDelims xs + +prop_doCondense_no_consec_delims :: SplitList Elt -> Bool +prop_doCondense_no_consec_delims l = noConsecDelims $ doCondense Condense l + +prop_insBlanks_no_consec_delims :: SplitList Elt -> Bool +prop_insBlanks_no_consec_delims l = noConsecDelims $ insertBlanks Condense l + +prop_insBlanks_fl_not_delim :: SplitList Elt -> Bool +prop_insBlanks_fl_not_delim l = + case insertBlanks Condense l of + [] -> True + xs -> (not . isDelim $ head xs) && (not . isDelim $ last xs) + +prop_mergeL_no_delims :: SplitList Elt -> Bool +prop_mergeL_no_delims = all (not . isDelim) . mergeLeft . insertBlanks Condense + +prop_mergeR_no_delims :: SplitList Elt -> Bool +prop_mergeR_no_delims = all (not . isDelim) . mergeRight . insertBlanks Condense + +getDelims :: Splitter Elt -> [Elt] -> [[Elt]] +getDelims s l = [ d | Delim d <- splitInternal (delimiter s) l ] + +getTexts :: Splitter Elt -> [Elt] -> [[Elt]] +getTexts s l = [ c | Text c <- splitInternal (delimiter s) l ] + +prop_oneOf :: [Elt] -> [Elt] -> Bool +prop_oneOf elts l = all ((==1) . length) ds && all ((`elem` elts) . head) ds + where ds = getDelims (oneOf elts) l + +prop_oneOf_not_text :: [Elt] -> [Elt] -> Bool +prop_oneOf_not_text elts l = all (not . (`elem` elts)) (concat cs) + where cs = getTexts (oneOf elts) l + +prop_onSublist :: [Elt] -> [Elt] -> Bool +prop_onSublist sub l = all (==sub) $ getDelims (onSublist sub) l + +prop_onSublist_not_text :: [Elt] -> [Elt] -> Property +prop_onSublist_not_text sub l = + (not . null $ sub) ==> + all (not . isInfixOf sub) $ getTexts (onSublist sub) l + +prop_whenElt :: (Fun Elt Bool) -> [Elt] -> Bool +prop_whenElt (Fun _ p) l = all ((==1) . length) ds && all (p . head) ds + where ds = getDelims (whenElt p) l + +prop_whenElt_not_text :: (Fun Elt Bool) -> [Elt] -> Bool +prop_whenElt_not_text (Fun _ p) l = all (not . p) (concat cs) + where cs = getTexts (whenElt p) l + +process :: Splitter Elt -> [Elt] -> SplitList Elt +process s = postProcess s . splitInternal (delimiter s) + +prop_dropDelims :: Splitter Elt -> [Elt] -> Bool +prop_dropDelims s l = all (not . isDelim) (process (dropDelims s) l) + +prop_keepDelimsL_no_delims :: Splitter Elt -> [Elt] -> Bool +prop_keepDelimsL_no_delims s l = all (not . isDelim) (process (keepDelimsL s) l) + +prop_keepDelimsL_match :: Splitter Elt -> NonEmptyList Elt -> Bool +prop_keepDelimsL_match s (NonEmpty l) = + all (isJust . matchDelim (delimiter s)) [ c | Text c <- tail p ] + where p = process (keepDelimsL s) l + +prop_keepDelimsR_no_delims :: Splitter Elt -> [Elt] -> Bool +prop_keepDelimsR_no_delims s l = all (not . isDelim) (process (keepDelimsR s) l) + +prop_keepDelimsR_match :: Splitter Elt -> NonEmptyList Elt -> Bool +prop_keepDelimsR_match s (NonEmpty l) = + all (any (isJust . matchDelim (delimiter s)) . tails) + [ c | Text c <- init p ] + where p = process (keepDelimsR s) l + +prop_condense_no_consec_delims :: Splitter Elt -> [Elt] -> Bool +prop_condense_no_consec_delims s l = noConsecDelims $ process (condense s) l + +prop_condense_all_delims :: Splitter Elt -> [Elt] -> Bool +prop_condense_all_delims s l = all allDelims p + where p = [ d | Delim d <- process (condense s) l ] + allDelims t = all isDelim (splitInternal (delimiter s) t) + +prop_dropInitBlank :: Splitter Elt -> NonEmptyList Elt -> Bool +prop_dropInitBlank s (NonEmpty l) = head p /= Text [] + where p = process (dropInitBlank $ s { delimPolicy = Keep } ) l + +prop_dropFinalBlank :: Splitter Elt -> NonEmptyList Elt -> Bool +prop_dropFinalBlank s (NonEmpty l) = last p /= Text [] + where p = process (dropFinalBlank $ s { delimPolicy = Keep } ) l + +prop_dropBlanks :: Splitter Elt -> [Elt] -> Bool +prop_dropBlanks s = null . filter (== (Text [])) . process (dropBlanks s) + +prop_startsWith :: [Elt] -> NonEmptyList Elt -> Bool +prop_startsWith s (NonEmpty l) = all (s `isPrefixOf`) (tail $ split (startsWith s) l) + +prop_startsWithOneOf :: [Elt] -> NonEmptyList Elt -> Bool +prop_startsWithOneOf elts (NonEmpty l) = all ((`elem` elts) . head) (tail $ split (startsWithOneOf elts) l) + +prop_endsWith :: [Elt] -> NonEmptyList Elt -> Bool +prop_endsWith s (NonEmpty l) = all (s `isSuffixOf`) (init $ split (endsWith s) l) + +prop_endsWithOneOf :: [Elt] -> NonEmptyList Elt -> Bool +prop_endsWithOneOf elts (NonEmpty l) = all ((`elem` elts) . last) (init $ split (endsWithOneOf elts) l) + +prop_splitOn_right_inv :: [Elt] -> [Elt] -> Bool +prop_splitOn_right_inv x l = intercalate x (splitOn x l) == l + +{- This property fails: for example, + + splitOn "dd" (intercalate "dd" ["d",""]) == ["","d"] + + so it's not enough just to say that the delimiter is not an infix of + any elements of l! + + +prop_splitOn_left_inv :: [Elt] -> NonEmptyList [Elt] -> Property +prop_splitOn_left_inv x (NonEmpty ls) = not (any (x `isInfixOf`) ls) ==> + splitOn x (intercalate x ls) == ls +-} + +-- Note, the below property is in fact logically entailed by +-- prop_splitOn_right_inv, but we keep it here just for kicks. +prop_splitOn_intercalate_idem :: [Elt] -> [[Elt]] -> Bool +prop_splitOn_intercalate_idem x ls = f (f ls) == f ls + where f = splitOn x . intercalate x + +prop_splitOn_empty_delim :: [Elt] -> Bool +prop_splitOn_empty_delim ls = splitOn [] ls == [] : map (:[]) ls + +prop_split_empty_delim_drop :: [Elt] -> Bool +prop_split_empty_delim_drop ls + = split (dropDelims . dropBlanks $ onSublist []) ls == map (:[]) ls + +prop_chunksOf_all_n :: Positive Int -> NonEmptyList Elt -> Bool +prop_chunksOf_all_n (Positive n) (NonEmpty l) = all ((==n) . length) (init $ chunksOf n l) + +prop_chunksOf_last_less_n :: Positive Int -> NonEmptyList Elt -> Bool +prop_chunksOf_last_less_n (Positive n) (NonEmpty l) = (<=n) . length . last $ chunksOf n l + +prop_chunksOf_preserve :: Positive Int -> [Elt] -> Bool +prop_chunksOf_preserve (Positive n) l = concat (chunksOf n l) == l + +prop_splitPlaces_lengths :: [NonNegative Int] -> [Elt] -> Bool +prop_splitPlaces_lengths ps = and . mInit . zipWith (==) ps' . map length . splitPlaces ps' + where ps' = map unNN ps + +prop_splitPlaces_last_less_n :: NonEmptyList (NonNegative Int) -> NonEmptyList Elt -> Bool +prop_splitPlaces_last_less_n (NonEmpty ps) (NonEmpty l) = (head $ drop (length l' - 1) ps') >= length (last l') + where l' = splitPlaces ps' l + ps' = map unNN ps + +prop_splitPlaces_preserve :: [NonNegative Integer] -> [Elt] -> Bool +prop_splitPlaces_preserve ps l = concat (splitPlaces ps' l) == genericTake (sum ps') l + where ps' = map unNN ps + +prop_splitPlaces_chunksOf :: Positive Int -> [Elt] -> Bool +prop_splitPlaces_chunksOf (Positive n) l = splitPlaces (repeat n) l == chunksOf n l + +prop_splitPlacesB_length :: [NonNegative Int] -> [Elt] -> Bool +prop_splitPlacesB_length ps xs = length ps' == length (splitPlacesBlanks ps' xs) + where ps' = map unNN ps + +prop_splitPlacesB_last_less_n :: NonEmptyList (NonNegative Int) -> NonEmptyList Elt -> Bool +prop_splitPlacesB_last_less_n (NonEmpty ps) (NonEmpty l) = (head $ drop (length l' - 1) ps') >= length (last l') + where l' = splitPlacesBlanks ps' l + ps' = map unNN ps + +prop_splitPlacesB_preserve :: [NonNegative Integer] -> [Elt] -> Bool +prop_splitPlacesB_preserve ps l = concat (splitPlacesBlanks ps' l) == genericTake (sum ps') l + where ps' = map unNN ps + +unNN :: NonNegative a -> a +unNN (NonNegative x) = x + +mInit :: [a] -> [a] +mInit [] = [] +mInit [x] = [] +mInit (x:xs) = x : init xs + +newtype EltWS = EltWS { unEltWS :: Char } + deriving (Eq, Show) + +instance Arbitrary EltWS where + arbitrary = elements (map EltWS "abcde \n") + +prop_lines :: [EltWS] -> Bool +prop_lines s = lines s' == endBy "\n" s' + where s' = map unEltWS s + +prop_wordsBy_words :: [EltWS] -> Bool +prop_wordsBy_words s = words s' == wordsBy isSpace s' + where s' = map unEltWS s + +prop_linesBy_lines :: [EltWS] -> Bool +prop_linesBy_lines s = lines s' == linesBy (=='\n') s' + where s' = map unEltWS s + +prop_chop_group :: [Elt] -> Bool +prop_chop_group s = chop (\xs@(x:_) -> span (==x) xs) s == group s + +prop_chop_words :: [EltWS] -> Bool +prop_chop_words s = words s' == (filter (not . null) . chop (span (not . isSpace) . dropWhile isSpace) $ s') + where s' = map unEltWS s + +prop_divvy_evenly :: [Elt] -> Positive Int -> Bool +prop_divvy_evenly elems (Positive n) = concat (divvy n n elems') == elems' + where + -- Chop off the smallest possible tail of elems to make the length + -- evenly divisible by n. This property used to have a + -- precondition (length elemens `mod` n == 0), but that led to too + -- many discarded test cases and occasional test suite failures. + elems' = take ((length elems `div` n) * n) elems + +prop_divvy_discard_remainder :: [Elt] -> Positive Int -> Bool +prop_divvy_discard_remainder elems (Positive n) = + concat (divvy n n elems) == (reverse . drop (length elems `mod` n) . reverse $ elems) + +prop_divvy_outputlists_allsame_length :: [Elt] -> Positive Int -> Positive Int -> Bool +prop_divvy_outputlists_allsame_length elems (Positive n) (Positive m) = allSame xs + where + allSame :: [Int] -> Bool + allSame [] = True + allSame zs = and $ map (== head zs) (tail zs) + xs = map length (divvy n m elems) + +prop_divvy_output_are_sublists :: [Elt] -> Positive Int -> Positive Int -> Bool +prop_divvy_output_are_sublists elems (Positive n) (Positive m) = and $ map (\x -> isInfixOf x elems) xs + where xs = divvy n m elems + +takeEvery :: Int -> [a] -> [a] +takeEvery _ [] = [] +takeEvery n lst = (map head . chunksOf n) $ lst + +initNth :: Int -> [a] -> [a] +initNth _ [] = [] +initNth n lst = (reverse . drop n . reverse) $ lst + +prop_divvy_heads :: [Elt] -> Positive Int -> Positive Int -> Bool +prop_divvy_heads [] _ _ = True +prop_divvy_heads elems (Positive n) (Positive m) = hds1 == hds2 + where hds1 = takeEvery m (initNth (n - 1) elems) + hds2 = map head $ divvy n m elems +