{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} -- ------------------------------------------------------------ module Main where import Control.Arrow import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as BL import Data.String (IsString (..)) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import System.Exit (ExitCode (..), exitWith) import Text.Regex.XMLSchema.Generic import Text.Regex.XMLSchema.Generic.Regex import Text.Regex.XMLSchema.Generic.StringLike import Test.HUnit -- ------------------------------------------------------------ newtype Test' a = Test' {unTest' :: Test} type BS = B.ByteString type BL = BL.ByteString type Text = T.Text type TextL = TL.Text -- ------------------------------------------------------------ parseTestsStdLatin1 :: forall s . StringLike s => Test' s parseTestsStdLatin1 = parseTestsStd' testsLatin1 parseTestsStdUnicode :: forall s . StringLike s => Test' s parseTestsStdUnicode = parseTestsStd' testsUnicode parseTestsStd' :: forall s . StringLike s => [(String, String)] -> Test' s parseTestsStd' tests = Test' $ TestLabel "standard XML parse tests" $ TestList $ map parseTest $ tests where parseTest (re0, rep) = TestCase $ assertEqual (show re ++ " must be parsed as " ++ show rep) rep (show . parseRegexExt $ re) where re :: s re = fromString re0 testsLatin1 :: [(String, String)] testsLatin1 = [ ("", "()") , (".", ".") , (".*", "(.*)") , ("(())", "()") , ("(a*)*", "(a*)") , ("(a*)+", "(a*)") , ("(a+)*", "(a*)") , ("(a+)+", "(a+)") , ("(a?){2,}", "(a*)") , ("((a?){2,}){0,}", "(a*)") , ("((a?){2,}){3,}", "(a*)") , ("(a{0,}){2,}", "(a*)") , ("(a{2,}){3,}", "(a{6,})") , ("[9-0]", "{empty char range}") , ("[0-9]", "[0-9]") , ("[0-99-0]", "[0-9]") , ("[abc]", "[a-c]") , ("[abc-[b]]", "[ac]" ) , ("a|b|c|d", "[a-d]" ) , ("(a|b)|c", "[a-c]" ) , ("a|(b|c)", "[a-c]" ) , ("abc", "(a(bc))" ) -- seq is right ass , ("({1}a+)", "({1}(a+))" ) -- extension: labeled subexpressions , ("({1}({2}({3}a)))", "({1}({2}({3}a)))" ) ] testsUnicode :: [(String, String)] testsUnicode = [ ("[\0-\1114111]", "\\a") , ("[\0-\1114111]|[0-9]", "\\a") , ("[\0-\1114110]", "[�-􏿾]" ) ] parseTestsExtLatin1 :: forall s . StringLike s => Test' s parseTestsExtLatin1 = parseTestsExt' testsExtLatin1 parseTestsExtUnicode :: forall s . StringLike s => Test' s parseTestsExtUnicode = parseTestsExt' testsExtUnicode parseTestsExt' :: forall s . StringLike s => [(String, String)] -> Test' s parseTestsExt' tests = Test' $ TestLabel "extended parse tests" $ TestList $ map parseTest $ tests where parseTest (re0, rep) = TestCase $ assertEqual (show re ++ " must be parsed as " ++ show rep) rep (show . parseRegexExt $ re) where re :: s re = fromString re0 testsExtLatin1 :: [(String, String)] testsExtLatin1 = [ ("", "()") , (".", ".") , (".*", "(.*)") , ("\\a", "\\a") , ("\\A", "\\A") , ("(())", "()") , ("(a*)*", "(a*)") , ("(a*)+", "(a*)") , ("(a+)*", "(a*)") , ("(a+)+", "(a+)") , ("(a?){2,}", "(a*)") , ("((a?){2,}){0,}", "(a*)") , ("((a?){2,}){3,}", "(a*)") , ("(a{0,}){2,}", "(a*)") , ("(a{2,}){3,}", "(a{6,})") , ("[9-0]", "{empty char range}") , ("[0-9]", "[0-9]") , ("[0-99-0]", "[0-9]") , ("[abc]", "[a-c]") , ("[abc-[b]]", "[ac]" ) , ("a|b|c|d", "[a-d]" ) , ("(a|b)|c", "[a-c]" ) , ("a|(b|c)", "[a-c]" ) , ("abc", "(a(bc))" ) -- seq is right ass , ("a*{^}b*", "((a*){^}(b*))" ) -- extension: exor , ("a*{^}b*{^}c*", "((a*){^}((b*){^}(c*)))") , ("a*{&}b*", "((a*){&}(b*))" ) -- extension: intersection , ("a*{&}b*{&}c*", "((a*){&}((b*){&}(c*)))") , ("a*{\\}b*", "((a*){\\}(b*))" ) -- extension: set difference , ("a*{\\}b*{\\}c*", "(((a*){\\}(b*)){\\}(c*))" ) , ("(a|b)*{\\}(.*aa.*)", "(([ab]*){\\}((.*)(a(a(.*)))))" ) , ("({1}a+)", "({1}(a+))" ) -- extension: labeled subexpressions , ("({1}({2}({3}a)))", "({1}({2}({3}a)))" ) , ("({1}do){|}({2}[a-z]+)", "(({1}(do)){|}({2}([a-z]+)))" ) -- deterministic choice of submatches , ("a{:}b{:}c", "(a{:}(b{:}c))" ) -- interleave ] testsExtUnicode :: [(String, String)] testsExtUnicode = [ ("[\0-\1114111]", "\\a") , ("[\0-\1114111]|[0-9]", "\\a") , ("[\0-\1114110]", "[�-􏿾]" ) , ("[abc-[b]]", "[ac]" ) , ("a|b|c|d", "[a-d]" ) , ("(a|b)|c", "[a-c]" ) , ("a|(b|c)", "[a-c]" ) , ("abc", "(a(bc))" ) -- seq is right ass , ("a*{^}b*", "((a*){^}(b*))" ) -- extension: exor , ("a*{^}b*{^}c*", "((a*){^}((b*){^}(c*)))") , ("a*{&}b*", "((a*){&}(b*))" ) -- extension: intersection , ("a*{&}b*{&}c*", "((a*){&}((b*){&}(c*)))") , ("a*{\\}b*", "((a*){\\}(b*))" ) -- extension: set difference , ("a*{\\}b*{\\}c*", "(((a*){\\}(b*)){\\}(c*))" ) , ("(a|b)*{\\}(.*aa.*)", "(([ab]*){\\}((.*)(a(a(.*)))))" ) , ("({1}a+)", "({1}(a+))" ) -- extension: labeled subexpressions , ("({1}({2}({3}a)))", "({1}({2}({3}a)))" ) , ("({1}do){|}({2}[a-z]+)", "(({1}(do)){|}({2}([a-z]+)))" ) -- deterministic choice of submatches , ("a{:}b{:}c", "(a{:}(b{:}c))" ) -- interleave ] simpleMatchTests :: forall a . StringLike a => Test' a simpleMatchTests = Test' $ TestLabel "simple match tests" $ TestList $ concatMap matchTest $ testsMatch where matchTest :: (String, [String], [String]) -> [Test] matchTest (re0, ok, er) = map (matchOK re . fromString) ok ++ map (matchErr re . fromString) er where re :: a re = fromString re0 matchOK :: a -> a -> Test matchOK re xs = TestCase $ assertBool (show xs ++ " must match " ++ show re) (matchExt re xs) matchErr re xs = TestCase $ assertBool (show xs ++ " must not match " ++ show re) (not (matchExt re xs)) testsMatch :: [(String, [String], [String])] testsMatch = [ ( "" , [""] , ["a"] ) , ( "a" , ["a"] , ["", "b", "ab"] ) , ( "()" , [""] , ["a"] ) , ( "ab" , ["ab"] , ["", "b", "abc"] ) , ( "." , [".","a","\0","\1114111"] , ["\n","\r","",".."] ) , ( "\\a" , [".","a","\n","\r","\0","\1114111"] , ["",".."] ) , ( "\\A" , ["",".","a","\n","\r","\0","\1114111",".."] , [] ) , ( "a*" , ["", "a", "aa"] , ["b", "ab", "aab"] ) , ( "a+" , ["a", "aa", "aaa"] , ["", "b", "ab"] ) , ( "a?" , ["", "a"] , ["b", "ab"] ) , ( "a{2}" , ["aa"] , ["", "a", "aaa"] ) , ( "a{2,}" , ["aa","aaa"] , ["", "a", "aaab"] ) , ( "a{2,4}" , ["aa", "aaa", "aaaa"] , ["", "a", "aaaaa", "ab"] ) , ( "a|b" , ["a", "b"] , ["", "c", "ab", "abc"] ) , ( "[0-9]" , ["0", "5", "9"] , ["", "a", "00"] ) , ( "[^0-9]" , ["a"] , ["", "0", "9", "00"]) , ( "\32" , [" "] , [] ) , ( "[\0-\1114111]" , ["\0","\1114111","a"] , ["","aaa"] ) , ( "[^\0-\1114111]" , [] , ["","aaa","\0","\1114111","a"] ) , ( ".*a.*|.*b.*|.*c.*" , ["a", "abc", "acdc"] , ["", "dddd"] ) , ( ".*a.*{&}.*b.*{&}.*c.*" , ["abc", "abcd", "abcabcd"] , ["", "a", "bc", "acdc", "dddd"] ) , ( ".*a.*{&}.*b.*{&}.*c.*{&}.{3}" -- all permutations of "abc" , ["abc", "acb", "bac", "bca", "cab", "cba"] , ["", "a", "bc", "acd", "aaaa", "aba"] ) , ( ".*a.*{&}.*b.*{&}.*c.*" -- all words containing at least 1 a, 1 b and 1 c , ["abc", "acb", "bac", "bca", "cab", "cba", "abcd", "abcabc"] , ["", "a", "bc", "acd", "aaaa"] ) , ( ".*a.*{^}.*b.*" -- all words containing at least 1 a or 1 b but not both a's and b's , ["a", "b", "ac", "bc", "aaaa", "bbb", "aacc", "ccbb", "acdc"] , ["", "ab", "abc", "dddd"] ) , ( "/[*](.*{\\}(.*[*]/.*))[*]/" -- single line C comment of form /*...*/, but without any */ in the comment body -- this is the way to specify none greedy expessions -- if multi-line comment are required, substitute .* by \A, so newlines are allowed , ["/**/","/***/","/*x*/","/*///*/"] , ["", "/", "/*", "/*/", "/**/*/", "/*xxx*/xxx*/"] ) , ( "a{:}b{:}c" , ["abc", "acb", "bac", "bca", "cab", "cba"] , ["", "a", "ab", "abcc", "abca", "aba"] ) ] -- ------------------------------------------------------------ simpleSplitTests :: forall a . StringLike a => Test' a simpleSplitTests = Test' $ TestLabel "simple split tests" $ TestList $ map splitTest $ testsSplit where splitTest (re0, inp0, tok0, rest0) = TestCase $ assertEqual ("split " ++ show re ++ " " ++ show inp0 ++ " = " ++ show (tok, rest)) (tok, rest) (split re (fromString inp0)) where re, tok, rest :: a re = fromString re0 tok = fromString tok0 rest = fromString rest0 testsSplit :: [(String, String, String, String)] testsSplit = [ ("", "a", "", "a" ) , ("a*b", "abc", "ab", "c" ) , ("a*", "bc", "", "bc" ) , ("a+", "bc", "", "bc" ) , ("[", "bc", "", "bc" ) , ("a{2}", "aaa", "aa", "a" ) , ("a{2,}", "aaa", "aaa", "" ) , ("a|b", "ab", "a", "b" ) , ("a|b*", "bbba", "bbb", "a" ) , ("abc", "abcd", "abc", "d" ) ] -- ------------------------------------------------------------ simpleTokenTests :: forall a . StringLike a => Test' a simpleTokenTests = Test' $ TestLabel "simple token tests" $ TestList $ map tokenTest $ testsToken where tokenTest (re0, inp0, toks0) = TestCase $ assertEqual ("tokenize " ++ show re ++ " " ++ show inp ++ " = " ++ show toks) toks (tokenize re inp) where re, inp :: a re = fromString re0 inp = fromString inp0 toks :: [a] toks = map fromString toks0 testsToken :: [(String, String, [String])] testsToken = [ ("", "", [] ) , ("a", "aba", ["a", "a"] ) , ("a", "b", [] ) , ("a", "ba", ["a"] ) , ("a*", "a", ["a"] ) , ("a*", "ba", ["","a"] ) , ("a*", "aba", ["a", "a"] ) , ("a*", "abba", ["a", "", "a"] ) , ("a+", "abba", ["a", "a"] ) , ("a*b", "abba", ["ab", "b"] ) , (".*", "a\n\nb", ["a", "", "b"] ) , (".*", "a\n\nb\n", ["a", "", "b"] ) , ("\\w+", "a\n\nb\n", ["a", "b"] ) , ("\\w|ab", "aaa\n\nabc\n", ["a", "a", "a", "ab", "c"] ) , ("\\w|ab", "aaa abc", ["a", "a", "a", "ab", "c"] ) ] -- ------------------------------------------------------------ genericTest :: (forall a . StringLike a => Test' a) -> Test genericTest t = TestList $ [ TestLabel "Test with 'String'" $ unTest' (t :: Test' String) , TestLabel "Test with 'Text'" $ unTest' (t :: Test' Text) , TestLabel "Test with 'Text.Lazy'" $ unTest' (t :: Test' TextL) , TestLabel "Test with 'ByteString'" $ unTest' (t :: Test' BS) , TestLabel "Test with 'ByteString.Lazy'" $ unTest' (t :: Test' BL) ] unicodeTest :: (forall a . StringLike a => Test' a) -> Test unicodeTest t = TestList $ [ TestLabel "Test with 'String'" $ unTest' (t :: Test' String) , TestLabel "Test with 'Text'" $ unTest' (t :: Test' Text) , TestLabel "Test with 'Text.Lazy'" $ unTest' (t :: Test' TextL) ] allTests :: Test allTests = TestList [ genericTest parseTestsStdLatin1 , unicodeTest parseTestsStdUnicode , genericTest parseTestsExtLatin1 , unicodeTest parseTestsExtUnicode , genericTest simpleMatchTests , genericTest simpleSplitTests , genericTest simpleTokenTests ] main :: IO () main = do c <- runTestTT allTests putStrLn $ show c let errs = errors c fails = failures c exitWith (codeGet errs fails) codeGet :: Int -> Int -> ExitCode codeGet errs fails | fails > 0 = ExitFailure 2 | errs > 0 = ExitFailure 1 | otherwise = ExitSuccess -- ------------------------------------------------------------ deltaTrc :: StringLike s => s -> GenRegex s -> [(s, GenRegex s)] deltaTrc s@(uncons -> Just (c, cs)) re = (s, re) : ( if isZero re' then [(emptyS,re')] else deltaTrc cs re' ) where re' = delta1 c s re deltaTrc _ re = [(emptyS, re)] matchTrc :: StringLike s => s -> s -> (Bool, [(s, GenRegex s)]) matchTrc re s = (nullable . snd . last $ res, res) where res = deltaTrc s (parseRegex re) trcMatch :: StringLike s => s -> s -> IO() trcMatch re = putStrLn . showTrc . matchTrc re where showTrc = ( (show >>> (++ "\n")) *** (concatMap ( ( (toString >>> (++ "\t")) *** (show >>> (++"\n")) ) >>> uncurry (++) ) ) ) >>> uncurry (flip (++)) -- ------------------------------------------------------------