{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
-- ------------------------------------------------------------
module Main
where
import Control.Arrow
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.String (IsString (..))
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import System.Exit (ExitCode (..),
exitWith)
import Text.Regex.XMLSchema.Generic
import Text.Regex.XMLSchema.Generic.Regex
import Text.Regex.XMLSchema.Generic.StringLike
import Test.HUnit
-- ------------------------------------------------------------
newtype Test' a = Test' {unTest' :: Test}
type BS = B.ByteString
type BL = BL.ByteString
type Text = T.Text
type TextL = TL.Text
-- ------------------------------------------------------------
parseTestsStdLatin1 :: forall s . StringLike s => Test' s
parseTestsStdLatin1 = parseTestsStd' testsLatin1
parseTestsStdUnicode :: forall s . StringLike s => Test' s
parseTestsStdUnicode = parseTestsStd' testsUnicode
parseTestsStd' :: forall s . StringLike s => [(String, String)] -> Test' s
parseTestsStd' tests
= Test' $
TestLabel "standard XML parse tests" $
TestList $
map parseTest $ tests
where
parseTest (re0, rep)
= TestCase $
assertEqual (show re ++ " must be parsed as " ++ show rep)
rep
(show . parseRegexExt $ re)
where
re :: s
re = fromString re0
testsLatin1 :: [(String, String)]
testsLatin1
= [ ("", "()")
, (".", ".")
, (".*", "(.*)")
, ("(())", "()")
, ("(a*)*", "(a*)")
, ("(a*)+", "(a*)")
, ("(a+)*", "(a*)")
, ("(a+)+", "(a+)")
, ("(a?){2,}", "(a*)")
, ("((a?){2,}){0,}", "(a*)")
, ("((a?){2,}){3,}", "(a*)")
, ("(a{0,}){2,}", "(a*)")
, ("(a{2,}){3,}", "(a{6,})")
, ("[9-0]", "{empty char range}")
, ("[0-9]", "[0-9]")
, ("[0-99-0]", "[0-9]")
, ("[abc]", "[a-c]")
, ("[abc-[b]]", "[ac]" )
, ("a|b|c|d", "[a-d]" )
, ("(a|b)|c", "[a-c]" )
, ("a|(b|c)", "[a-c]" )
, ("abc", "(a(bc))" ) -- seq is right ass
, ("({1}a+)", "({1}(a+))" ) -- extension: labeled subexpressions
, ("({1}({2}({3}a)))", "({1}({2}({3}a)))" )
]
testsUnicode :: [(String, String)]
testsUnicode
= [ ("[\0-\1114111]", "\\a")
, ("[\0-\1114111]|[0-9]", "\\a")
, ("[\0-\1114110]", "[�-]" )
]
parseTestsExtLatin1 :: forall s . StringLike s => Test' s
parseTestsExtLatin1 = parseTestsExt' testsExtLatin1
parseTestsExtUnicode :: forall s . StringLike s => Test' s
parseTestsExtUnicode = parseTestsExt' testsExtUnicode
parseTestsExt' :: forall s . StringLike s => [(String, String)] -> Test' s
parseTestsExt' tests
= Test' $
TestLabel "extended parse tests" $
TestList $
map parseTest $ tests
where
parseTest (re0, rep)
= TestCase $
assertEqual (show re ++ " must be parsed as " ++ show rep)
rep
(show . parseRegexExt $ re)
where
re :: s
re = fromString re0
testsExtLatin1 :: [(String, String)]
testsExtLatin1
= [ ("", "()")
, (".", ".")
, (".*", "(.*)")
, ("\\a", "\\a")
, ("\\A", "\\A")
, ("(())", "()")
, ("(a*)*", "(a*)")
, ("(a*)+", "(a*)")
, ("(a+)*", "(a*)")
, ("(a+)+", "(a+)")
, ("(a?){2,}", "(a*)")
, ("((a?){2,}){0,}", "(a*)")
, ("((a?){2,}){3,}", "(a*)")
, ("(a{0,}){2,}", "(a*)")
, ("(a{2,}){3,}", "(a{6,})")
, ("[9-0]", "{empty char range}")
, ("[0-9]", "[0-9]")
, ("[0-99-0]", "[0-9]")
, ("[abc]", "[a-c]")
, ("[abc-[b]]", "[ac]" )
, ("a|b|c|d", "[a-d]" )
, ("(a|b)|c", "[a-c]" )
, ("a|(b|c)", "[a-c]" )
, ("abc", "(a(bc))" ) -- seq is right ass
, ("a*{^}b*", "((a*){^}(b*))" ) -- extension: exor
, ("a*{^}b*{^}c*", "((a*){^}((b*){^}(c*)))")
, ("a*{&}b*", "((a*){&}(b*))" ) -- extension: intersection
, ("a*{&}b*{&}c*", "((a*){&}((b*){&}(c*)))")
, ("a*{\\}b*", "((a*){\\}(b*))" ) -- extension: set difference
, ("a*{\\}b*{\\}c*", "(((a*){\\}(b*)){\\}(c*))" )
, ("(a|b)*{\\}(.*aa.*)", "(([ab]*){\\}((.*)(a(a(.*)))))" )
, ("({1}a+)", "({1}(a+))" ) -- extension: labeled subexpressions
, ("({1}({2}({3}a)))", "({1}({2}({3}a)))" )
, ("({1}do){|}({2}[a-z]+)", "(({1}(do)){|}({2}([a-z]+)))" ) -- deterministic choice of submatches
, ("a{:}b{:}c", "(a{:}(b{:}c))" ) -- interleave
]
testsExtUnicode :: [(String, String)]
testsExtUnicode
= [ ("[\0-\1114111]", "\\a")
, ("[\0-\1114111]|[0-9]", "\\a")
, ("[\0-\1114110]", "[�-]" )
, ("[abc-[b]]", "[ac]" )
, ("a|b|c|d", "[a-d]" )
, ("(a|b)|c", "[a-c]" )
, ("a|(b|c)", "[a-c]" )
, ("abc", "(a(bc))" ) -- seq is right ass
, ("a*{^}b*", "((a*){^}(b*))" ) -- extension: exor
, ("a*{^}b*{^}c*", "((a*){^}((b*){^}(c*)))")
, ("a*{&}b*", "((a*){&}(b*))" ) -- extension: intersection
, ("a*{&}b*{&}c*", "((a*){&}((b*){&}(c*)))")
, ("a*{\\}b*", "((a*){\\}(b*))" ) -- extension: set difference
, ("a*{\\}b*{\\}c*", "(((a*){\\}(b*)){\\}(c*))" )
, ("(a|b)*{\\}(.*aa.*)", "(([ab]*){\\}((.*)(a(a(.*)))))" )
, ("({1}a+)", "({1}(a+))" ) -- extension: labeled subexpressions
, ("({1}({2}({3}a)))", "({1}({2}({3}a)))" )
, ("({1}do){|}({2}[a-z]+)", "(({1}(do)){|}({2}([a-z]+)))" ) -- deterministic choice of submatches
, ("a{:}b{:}c", "(a{:}(b{:}c))" ) -- interleave
]
simpleMatchTests :: forall a . StringLike a => Test' a
simpleMatchTests
= Test' $
TestLabel "simple match tests" $
TestList $
concatMap matchTest $ testsMatch
where
matchTest :: (String, [String], [String]) -> [Test]
matchTest (re0, ok, er)
= map (matchOK re . fromString) ok
++
map (matchErr re . fromString) er
where
re :: a
re = fromString re0
matchOK :: a -> a -> Test
matchOK re xs
= TestCase $ assertBool (show xs ++ " must match " ++ show re) (matchExt re xs)
matchErr re xs
= TestCase $ assertBool (show xs ++ " must not match " ++ show re) (not (matchExt re xs))
testsMatch :: [(String, [String], [String])]
testsMatch
= [ ( ""
, [""]
, ["a"]
)
, ( "a"
, ["a"]
, ["", "b", "ab"]
)
, ( "()"
, [""]
, ["a"]
)
, ( "ab"
, ["ab"]
, ["", "b", "abc"]
)
, ( "."
, [".","a","\0","\1114111"]
, ["\n","\r","",".."]
)
, ( "\\a"
, [".","a","\n","\r","\0","\1114111"]
, ["",".."]
)
, ( "\\A"
, ["",".","a","\n","\r","\0","\1114111",".."]
, []
)
, ( "a*"
, ["", "a", "aa"]
, ["b", "ab", "aab"]
)
, ( "a+"
, ["a", "aa", "aaa"]
, ["", "b", "ab"]
)
, ( "a?"
, ["", "a"]
, ["b", "ab"]
)
, ( "a{2}"
, ["aa"]
, ["", "a", "aaa"]
)
, ( "a{2,}"
, ["aa","aaa"]
, ["", "a", "aaab"]
)
, ( "a{2,4}"
, ["aa", "aaa", "aaaa"]
, ["", "a", "aaaaa", "ab"]
)
, ( "a|b"
, ["a", "b"]
, ["", "c", "ab", "abc"]
)
, ( "[0-9]"
, ["0", "5", "9"]
, ["", "a", "00"]
)
, ( "[^0-9]"
, ["a"]
, ["", "0", "9", "00"])
, ( "\32"
, [" "]
, []
)
, ( "[\0-\1114111]"
, ["\0","\1114111","a"]
, ["","aaa"]
)
, ( "[^\0-\1114111]"
, []
, ["","aaa","\0","\1114111","a"]
)
, ( ".*a.*|.*b.*|.*c.*"
, ["a", "abc", "acdc"]
, ["", "dddd"]
)
, ( ".*a.*{&}.*b.*{&}.*c.*"
, ["abc", "abcd", "abcabcd"]
, ["", "a", "bc", "acdc", "dddd"]
)
, ( ".*a.*{&}.*b.*{&}.*c.*{&}.{3}" -- all permutations of "abc"
, ["abc", "acb", "bac", "bca", "cab", "cba"]
, ["", "a", "bc", "acd", "aaaa", "aba"]
)
, ( ".*a.*{&}.*b.*{&}.*c.*" -- all words containing at least 1 a, 1 b and 1 c
, ["abc", "acb", "bac", "bca", "cab", "cba", "abcd", "abcabc"]
, ["", "a", "bc", "acd", "aaaa"]
)
, ( ".*a.*{^}.*b.*" -- all words containing at least 1 a or 1 b but not both a's and b's
, ["a", "b", "ac", "bc", "aaaa", "bbb", "aacc", "ccbb", "acdc"]
, ["", "ab", "abc", "dddd"]
)
, ( "/[*](.*{\\}(.*[*]/.*))[*]/" -- single line C comment of form /*...*/, but without any */ in the comment body
-- this is the way to specify none greedy expessions
-- if multi-line comment are required, substitute .* by \A, so newlines are allowed
, ["/**/","/***/","/*x*/","/*///*/"]
, ["", "/", "/*", "/*/", "/**/*/", "/*xxx*/xxx*/"]
)
, ( "a{:}b{:}c"
, ["abc", "acb", "bac", "bca", "cab", "cba"]
, ["", "a", "ab", "abcc", "abca", "aba"]
)
]
-- ------------------------------------------------------------
simpleSplitTests :: forall a . StringLike a => Test' a
simpleSplitTests
= Test' $
TestLabel "simple split tests" $
TestList $
map splitTest $ testsSplit
where
splitTest (re0, inp0, tok0, rest0)
= TestCase $
assertEqual
("split " ++ show re ++ " " ++ show inp0 ++ " = " ++ show (tok, rest))
(tok, rest)
(split re (fromString inp0))
where
re, tok, rest :: a
re = fromString re0
tok = fromString tok0
rest = fromString rest0
testsSplit :: [(String, String, String, String)]
testsSplit
= [ ("", "a", "", "a" )
, ("a*b", "abc", "ab", "c" )
, ("a*", "bc", "", "bc" )
, ("a+", "bc", "", "bc" )
, ("[", "bc", "", "bc" )
, ("a{2}", "aaa", "aa", "a" )
, ("a{2,}", "aaa", "aaa", "" )
, ("a|b", "ab", "a", "b" )
, ("a|b*", "bbba", "bbb", "a" )
, ("abc", "abcd", "abc", "d" )
]
-- ------------------------------------------------------------
simpleTokenTests :: forall a . StringLike a => Test' a
simpleTokenTests
= Test' $
TestLabel "simple token tests" $
TestList $
map tokenTest $ testsToken
where
tokenTest (re0, inp0, toks0)
= TestCase $
assertEqual
("tokenize " ++ show re ++ " " ++ show inp ++ " = " ++ show toks)
toks
(tokenize re inp)
where
re, inp :: a
re = fromString re0
inp = fromString inp0
toks :: [a]
toks = map fromString toks0
testsToken :: [(String, String, [String])]
testsToken
= [ ("", "", [] )
, ("a", "aba", ["a", "a"] )
, ("a", "b", [] )
, ("a", "ba", ["a"] )
, ("a*", "a", ["a"] )
, ("a*", "ba", ["","a"] )
, ("a*", "aba", ["a", "a"] )
, ("a*", "abba", ["a", "", "a"] )
, ("a+", "abba", ["a", "a"] )
, ("a*b", "abba", ["ab", "b"] )
, (".*", "a\n\nb", ["a", "", "b"] )
, (".*", "a\n\nb\n", ["a", "", "b"] )
, ("\\w+", "a\n\nb\n", ["a", "b"] )
, ("\\w|ab", "aaa\n\nabc\n", ["a", "a", "a", "ab", "c"] )
, ("\\w|ab", "aaa abc", ["a", "a", "a", "ab", "c"] )
]
-- ------------------------------------------------------------
genericTest :: (forall a . StringLike a => Test' a) -> Test
genericTest t
= TestList $
[ TestLabel "Test with 'String'" $ unTest' (t :: Test' String)
, TestLabel "Test with 'Text'" $ unTest' (t :: Test' Text)
, TestLabel "Test with 'Text.Lazy'" $ unTest' (t :: Test' TextL)
, TestLabel "Test with 'ByteString'" $ unTest' (t :: Test' BS)
, TestLabel "Test with 'ByteString.Lazy'" $ unTest' (t :: Test' BL)
]
unicodeTest :: (forall a . StringLike a => Test' a) -> Test
unicodeTest t
= TestList $
[ TestLabel "Test with 'String'" $ unTest' (t :: Test' String)
, TestLabel "Test with 'Text'" $ unTest' (t :: Test' Text)
, TestLabel "Test with 'Text.Lazy'" $ unTest' (t :: Test' TextL)
]
allTests :: Test
allTests
= TestList
[ genericTest parseTestsStdLatin1
, unicodeTest parseTestsStdUnicode
, genericTest parseTestsExtLatin1
, unicodeTest parseTestsExtUnicode
, genericTest simpleMatchTests
, genericTest simpleSplitTests
, genericTest simpleTokenTests
]
main :: IO ()
main
= do
c <- runTestTT allTests
putStrLn $ show c
let errs = errors c
fails = failures c
exitWith (codeGet errs fails)
codeGet :: Int -> Int -> ExitCode
codeGet errs fails
| fails > 0 = ExitFailure 2
| errs > 0 = ExitFailure 1
| otherwise = ExitSuccess
-- ------------------------------------------------------------
deltaTrc :: StringLike s => s -> GenRegex s -> [(s, GenRegex s)]
deltaTrc s@(uncons -> Just (c, cs)) re
= (s, re)
:
( if isZero re'
then [(emptyS,re')]
else deltaTrc cs re'
)
where
re' = delta1 c s re
deltaTrc _ re = [(emptyS, re)]
matchTrc :: StringLike s => s -> s -> (Bool, [(s, GenRegex s)])
matchTrc re s = (nullable . snd . last $ res, res)
where
res = deltaTrc s (parseRegex re)
trcMatch :: StringLike s => s -> s -> IO()
trcMatch re = putStrLn . showTrc . matchTrc re
where
showTrc =
( (show >>> (++ "\n"))
***
(concatMap ( ( (toString >>> (++ "\t"))
***
(show >>> (++"\n"))
)
>>> uncurry (++)
)
)
)
>>>
uncurry (flip (++))
-- ------------------------------------------------------------