|
Packit |
5b08af |
{-# LANGUAGE BangPatterns #-}
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
{- |
|
|
Packit |
5b08af |
Module : Text.Regex.XMLSchema.Generic
|
|
Packit |
5b08af |
Copyright : Copyright (C) 2014- Uwe Schmidt
|
|
Packit |
5b08af |
License : MIT
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
Maintainer : Uwe Schmidt <uwe@fh-wedel.de>
|
|
Packit |
5b08af |
Stability : stable
|
|
Packit |
5b08af |
Portability: portable
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
Convenient functions for W3C XML Schema Regular Expression Matcher.
|
|
Packit |
5b08af |
For internals see 'Text.Regex.XMLSchema.Regex'
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
Grammar can be found under <http://www.w3.org/TR/xmlschema11-2/#regexs>
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-}
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
module Text.Regex.XMLSchema.Generic.Matching
|
|
Packit |
5b08af |
( grep
|
|
Packit |
5b08af |
, grepExt
|
|
Packit |
5b08af |
, grepRE
|
|
Packit |
5b08af |
, grepREwithLineNum
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
, match
|
|
Packit |
5b08af |
, matchExt
|
|
Packit |
5b08af |
, matchSubex
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
, sed
|
|
Packit |
5b08af |
, sedExt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
, split
|
|
Packit |
5b08af |
, splitExt
|
|
Packit |
5b08af |
, splitSubex
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
, tokenize
|
|
Packit |
5b08af |
, tokenizeExt
|
|
Packit |
5b08af |
, tokenize'
|
|
Packit |
5b08af |
, tokenizeExt'
|
|
Packit |
5b08af |
, tokenizeSubex
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
, matchRE
|
|
Packit |
5b08af |
, matchSubexRE
|
|
Packit |
5b08af |
, sedRE
|
|
Packit |
5b08af |
, splitRE
|
|
Packit |
5b08af |
, splitSubexRE
|
|
Packit |
5b08af |
, tokenizeRE
|
|
Packit |
5b08af |
, tokenizeRE'
|
|
Packit |
5b08af |
, tokenizeSubexRE
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
import Control.Arrow
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
import Data.Maybe
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
import Text.Regex.XMLSchema.Generic.Regex
|
|
Packit |
5b08af |
import Text.Regex.XMLSchema.Generic.RegexParser
|
|
Packit |
5b08af |
import Text.Regex.XMLSchema.Generic.StringLike
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
{-
|
|
Packit |
5b08af |
import Debug.Trace (traceShow)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
trc :: Show a => String -> a -> a
|
|
Packit |
5b08af |
trc msg x = traceShow (msg, x) x
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- -}
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | split a string by taking the longest prefix matching a regular expression
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- @Nothing@ is returned in case there is no matching prefix,
|
|
Packit |
5b08af |
-- else the pair of prefix and rest is returned
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
splitRE :: StringLike s => GenRegex s -> s -> Maybe (s, s)
|
|
Packit |
5b08af |
splitRE re input
|
|
Packit |
5b08af |
= do
|
|
Packit |
5b08af |
(sms, rest) <- splitWithRegex re input
|
|
Packit |
5b08af |
return (snd . head $ sms, rest)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | convenient function for 'splitRE'
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- examples:
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- > split "a*b" "abc" = ("ab","c")
|
|
Packit |
5b08af |
-- > split "a*" "bc" = ("", "bc") -- "a*" matches ""
|
|
Packit |
5b08af |
-- > split "a+" "bc" = ("", "bc") -- "a+" does not match, no split
|
|
Packit |
5b08af |
-- > split "[" "abc" = ("", "abc") -- "[" syntax error, no split
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
split :: StringLike s => s -> s -> (s, s)
|
|
Packit |
5b08af |
split = split' parseRegex
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | split with extended syntax
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
splitExt :: StringLike s => s -> s -> (s, s)
|
|
Packit |
5b08af |
splitExt = split' parseRegexExt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
split' :: StringLike s => (s -> GenRegex s) -> s -> s -> (s, s)
|
|
Packit |
5b08af |
split' parseRe re input
|
|
Packit |
5b08af |
= fromMaybe (emptyS, input)
|
|
Packit |
5b08af |
. (splitRE . parseRe $ re) $ input
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | split a string by removing the longest prefix matching a regular expression
|
|
Packit |
5b08af |
-- and then return the list of subexpressions found in the matching part
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- @Nothing@ is returned in case of no matching prefix,
|
|
Packit |
5b08af |
-- else the list of pairs of labels and submatches and the
|
|
Packit |
5b08af |
-- rest is returned
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
splitSubexRE :: StringLike s => GenRegex s -> s -> Maybe ([(s, s)], s)
|
|
Packit |
5b08af |
splitSubexRE re input
|
|
Packit |
5b08af |
= do
|
|
Packit |
5b08af |
(sms, rest) <- splitWithRegex re input
|
|
Packit |
5b08af |
return (map (first fromJust) . drop 1 $ sms, rest)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | convenient function for 'splitSubex', uses extended syntax
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- examples:
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- > splitSubex "({1}a*)b" "abc" = ([("1","a")],"c")
|
|
Packit |
5b08af |
-- > splitSubex "({2}a*)" "bc" = ([("2","")], "bc")
|
|
Packit |
5b08af |
-- > splitSubex "({1}a|b)+" "abc" = ([("1","a"),("1","b")],"c") -- subex 1 matches 2 times
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > splitSubex ".*({x}a*)" "aa" = ([("x",""),("x","a"),("x","aa")],"")
|
|
Packit |
5b08af |
-- > -- nondeterminism: 3 matches for a*
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > splitSubex "({1}do)|({2}[a-z]+)" "do you know"
|
|
Packit |
5b08af |
-- > = ([("1","do"),("2","do")]," you know")
|
|
Packit |
5b08af |
-- > -- nondeterminism: 2 matches for do
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > splitSubex "({1}do){|}({2}[a-z]+)" "do you know"
|
|
Packit |
5b08af |
-- > = ([("1","do")]," you know")
|
|
Packit |
5b08af |
-- > -- no nondeterminism with {|}: 1. match for do
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > splitSubex "({1}a+)" "bcd" = ([], "bcd") -- no match
|
|
Packit |
5b08af |
-- > splitSubex "[" "abc" = ([], "abc") -- syntax error
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
splitSubex :: StringLike s => s -> s -> ([(s, s)], s)
|
|
Packit |
5b08af |
splitSubex re inp
|
|
Packit |
5b08af |
= fromMaybe ([], inp) . (splitSubexRE . parseRegexExt $ re) $ inp
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | The function, that does the real work for 'tokenize'
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tokenizeRE :: StringLike s => GenRegex s -> s -> [s]
|
|
Packit |
5b08af |
tokenizeRE re
|
|
Packit |
5b08af |
= token''
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
fcs = firstChars re
|
|
Packit |
5b08af |
re1 = mkDiff re mkUnit
|
|
Packit |
5b08af |
token'' = token' re fcs
|
|
Packit |
5b08af |
token1'' = token' re1 fcs
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- token' :: StringLike s => GenRegex s -> CharSet -> s -> [s]
|
|
Packit |
5b08af |
token' re' fcs' inp
|
|
Packit |
5b08af |
| nullS inp = []
|
|
Packit |
5b08af |
| otherwise = evalRes . splitWithRegexCS re' fcs' $ inp
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
evalRes Nothing
|
|
Packit |
5b08af |
= token'' (dropS 1 inp) -- re does not match any prefix
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
evalRes (Just (toks, rest))
|
|
Packit |
5b08af |
| nullS tok = tok : token'' (dropS 1 rest) -- re is nullable and only the empty prefix matches
|
|
Packit |
5b08af |
-- discard one char and try again
|
|
Packit |
5b08af |
| otherwise = tok : token1'' rest -- real token found, next token must not be empty
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
tok = snd . head $ toks
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | split a string into tokens (words) by giving a regular expression
|
|
Packit |
5b08af |
-- which all tokens must match.
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- Convenient function for 'tokenizeRE'
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- This can be used for simple tokenizers.
|
|
Packit |
5b08af |
-- It is recommended to use regular expressions where the empty word does not match.
|
|
Packit |
5b08af |
-- Else there will appear a lot of probably useless empty tokens in the output.
|
|
Packit |
5b08af |
-- All none matching chars are discarded. If the given regex contains syntax errors,
|
|
Packit |
5b08af |
-- @Nothing@ is returned
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- examples:
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- > tokenize "a" "aabba" = ["a","a","a"]
|
|
Packit |
5b08af |
-- > tokenize "a*" "aaaba" = ["aaa","a"]
|
|
Packit |
5b08af |
-- > tokenize "a*" "bbb" = ["","",""]
|
|
Packit |
5b08af |
-- > tokenize "a+" "bbb" = []
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenize "a*b" "" = []
|
|
Packit |
5b08af |
-- > tokenize "a*b" "abc" = ["ab"]
|
|
Packit |
5b08af |
-- > tokenize "a*b" "abaab ab" = ["ab","aab","ab"]
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenize "[a-z]{2,}|[0-9]{2,}|[0-9]+[.][0-9]+" "ab123 456.7abc"
|
|
Packit |
5b08af |
-- > = ["ab","123","456.7","abc"]
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenize "[a-z]*|[0-9]{2,}|[0-9]+[.][0-9]+" "cab123 456.7abc"
|
|
Packit |
5b08af |
-- > = ["cab","123","456.7","abc"]
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenize "[^ \t\n\r]*" "abc def\t\n\rxyz"
|
|
Packit |
5b08af |
-- > = ["abc","def","xyz"]
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenize ".*" "\nabc\n123\n\nxyz\n"
|
|
Packit |
5b08af |
-- > = ["","abc","123","","xyz"]
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenize ".*" = lines
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenize "[^ \t\n\r]*" = words
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tokenize :: StringLike s => s -> s -> [s]
|
|
Packit |
5b08af |
tokenize = tokenizeRE . parseRegex
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | tokenize with extended syntax
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tokenizeExt :: StringLike s => s -> s -> [s]
|
|
Packit |
5b08af |
tokenizeExt = tokenizeRE . parseRegexExt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | split a string into tokens and delimierter by giving a regular expression
|
|
Packit |
5b08af |
-- which all tokens must match
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- This is a generalisation of the above 'tokenizeRE' functions.
|
|
Packit |
5b08af |
-- The none matching char sequences are marked with @Left@, the matching ones are marked with @Right@
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- If the regular expression contains syntax errors @Nothing@ is returned
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- The following Law holds:
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- > concat . map (either id id) . tokenizeRE' re == id
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tokenizeRE' :: StringLike s => GenRegex s -> s -> [Either s s]
|
|
Packit |
5b08af |
tokenizeRE' re inp0
|
|
Packit |
5b08af |
= token'' (inp0, 0) inp0
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
fcs = firstChars re
|
|
Packit |
5b08af |
re1 = mkDiff re mkUnit
|
|
Packit |
5b08af |
token'' = token' re fcs
|
|
Packit |
5b08af |
token1'' = token' re1 fcs
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- token' :: StringLike s => GenRegex s -> CharSet -> (s, Int) -> s -> [Either s s]
|
|
Packit |
5b08af |
token' re' fcs' (uns, ! n) inp
|
|
Packit |
5b08af |
| nullS inp = addUnmatched []
|
|
Packit |
5b08af |
| otherwise = evalRes . splitWithRegexCS re' fcs' $ inp
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
addUnmatched
|
|
Packit |
5b08af |
| n == 0 = id
|
|
Packit |
5b08af |
| otherwise = ((Left $ takeS n uns) :)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
addMatched t
|
|
Packit |
5b08af |
= addUnmatched . ((Right t) :)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
evalRes Nothing
|
|
Packit |
5b08af |
= token'' (uns, n + 1) (dropS 1 inp) -- re does not match any prefix
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
evalRes (Just (toks, rest))
|
|
Packit |
5b08af |
| nullS tok = addMatched tok -- re is nullable and only the empty prefix matches
|
|
Packit |
5b08af |
$ token'' (rest, 1)
|
|
Packit |
5b08af |
(dropS 1 rest) -- discard one char and try again
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
| otherwise = addMatched tok
|
|
Packit |
5b08af |
$ token1'' (rest, 0) rest -- real token found, next token must not be empty
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
tok = snd . head $ toks
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | convenient function for 'tokenizeRE''
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- When the regular expression parses as Zero, @[Left input]@ is returned, that means no tokens are found
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tokenize' :: StringLike s => s -> s -> [Either s s]
|
|
Packit |
5b08af |
tokenize' = tokenizeRE' . parseRegex
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tokenizeExt' :: StringLike s => s -> s -> [Either s s]
|
|
Packit |
5b08af |
tokenizeExt' = tokenizeRE' . parseRegexExt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | split a string into tokens (pair of labels and words) by giving a regular expression
|
|
Packit |
5b08af |
-- containing labeled subexpressions.
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- This function should not be called with regular expressions
|
|
Packit |
5b08af |
-- without any labeled subexpressions. This does not make sense, because the result list
|
|
Packit |
5b08af |
-- will always be empty.
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- Result is the list of matching subexpressions
|
|
Packit |
5b08af |
-- This can be used for simple tokenizers.
|
|
Packit |
5b08af |
-- At least one char is consumed by parsing a token.
|
|
Packit |
5b08af |
-- The pairs in the result list contain the matching substrings.
|
|
Packit |
5b08af |
-- All none matching chars are discarded. If the given regex contains syntax errors,
|
|
Packit |
5b08af |
-- @Nothing@ is returned
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tokenizeSubexRE :: StringLike s => GenRegex s -> s -> [(s, s)]
|
|
Packit |
5b08af |
tokenizeSubexRE re
|
|
Packit |
5b08af |
= token''
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
fcs = firstChars re
|
|
Packit |
5b08af |
re1 = mkDiff re mkUnit
|
|
Packit |
5b08af |
token'' = token' re fcs
|
|
Packit |
5b08af |
token1'' = token' re1 fcs
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- token' :: StringLike s => GenRegex s -> CharSet -> s -> [(s, s)]
|
|
Packit |
5b08af |
token' re' fcs' inp
|
|
Packit |
5b08af |
| nullS inp = []
|
|
Packit |
5b08af |
| otherwise = evalRes . splitWithRegexCS re' fcs' $ inp
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
evalRes Nothing
|
|
Packit |
5b08af |
= token'' (dropS 1 inp) -- re does not match any prefix
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
evalRes (Just (toks, rest))
|
|
Packit |
5b08af |
| nullS tok = res ++ token'' (dropS 1 rest) -- re is nullable and only the empty prefix matches
|
|
Packit |
5b08af |
| otherwise = res ++ token1'' rest -- token found, tokenize the rest
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
res = map (first fromJust) . tail $ toks
|
|
Packit |
5b08af |
tok = snd . head $ toks
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | convenient function for 'tokenizeSubexRE' a string
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- examples:
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- > tokenizeSubex "({name}[a-z]+)|({num}[0-9]{2,})|({real}[0-9]+[.][0-9]+)"
|
|
Packit |
5b08af |
-- > "cab123 456.7abc"
|
|
Packit |
5b08af |
-- > = [("name","cab")
|
|
Packit |
5b08af |
-- > ,("num","123")
|
|
Packit |
5b08af |
-- > ,("real","456.7")
|
|
Packit |
5b08af |
-- > ,("name","abc")]
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenizeSubex "({real}({n}[0-9]+)([.]({f}[0-9]+))?)"
|
|
Packit |
5b08af |
-- > "12.34" = [("real","12.34")
|
|
Packit |
5b08af |
-- > ,("n","12")
|
|
Packit |
5b08af |
-- > ,("f","34")]
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenizeSubex "({real}({n}[0-9]+)([.]({f}[0-9]+))?)"
|
|
Packit |
5b08af |
-- > "12 34" = [("real","12"),("n","12")
|
|
Packit |
5b08af |
-- > ,("real","34"),("n","34")]
|
|
Packit |
5b08af |
-- >
|
|
Packit |
5b08af |
-- > tokenizeSubex "({real}({n}[0-9]+)(([.]({f}[0-9]+))|({f})))"
|
|
Packit |
5b08af |
-- > "12 34.56" = [("real","12"),("n","12"),("f","")
|
|
Packit |
5b08af |
-- > ,("real","34.56"),("n","34"),("f","56")]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tokenizeSubex :: StringLike s => s -> s -> [(s, s)]
|
|
Packit |
5b08af |
tokenizeSubex = tokenizeSubexRE . parseRegexExt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | sed like editing function
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- All matching tokens are edited by the 1. argument, the editing function,
|
|
Packit |
5b08af |
-- all other chars remain as they are
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
sedRE :: StringLike s => (s -> s) -> GenRegex s -> s -> s
|
|
Packit |
5b08af |
sedRE edit re = concatS . map (either id edit) . tokenizeRE' re
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | convenient function for 'sedRE'
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- examples:
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- > sed (const "b") "a" "xaxax" = "xbxbx"
|
|
Packit |
5b08af |
-- > sed (\ x -> x ++ x) "a" "xax" = "xaax"
|
|
Packit |
5b08af |
-- > sed undefined "[" "xxx" = "xxx"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
sed :: StringLike s => (s -> s) -> s -> s -> s
|
|
Packit |
5b08af |
sed edit = sedRE edit . parseRegex
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
sedExt :: StringLike s => (s -> s) -> s -> s -> s
|
|
Packit |
5b08af |
sedExt edit = sedRE edit . parseRegexExt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | match a string with a regular expression
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
matchRE :: StringLike s => GenRegex s -> s -> Bool
|
|
Packit |
5b08af |
matchRE = matchWithRegex
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | convenient function for 'matchRE'
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- Examples:
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- > match "x*" "xxx" = True
|
|
Packit |
5b08af |
-- > match "x" "xxx" = False
|
|
Packit |
5b08af |
-- > match "[" "xxx" = False
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
match :: StringLike s => s -> s -> Bool
|
|
Packit |
5b08af |
match = matchWithRegex . parseRegex
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | match with extended regular expressions
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
matchExt :: StringLike s => s -> s -> Bool
|
|
Packit |
5b08af |
matchExt = matchWithRegex . parseRegexExt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | match a string with a regular expression
|
|
Packit |
5b08af |
-- and extract subexpression matches
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
matchSubexRE :: StringLike s => GenRegex s -> s -> [(s, s)]
|
|
Packit |
5b08af |
matchSubexRE re = map (first fromJust) . fromMaybe [] . matchWithRegex' re
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | convenient function for 'matchRE'
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- Examples:
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- > matchSubex "({1}x*)" "xxx" = [("1","xxx")]
|
|
Packit |
5b08af |
-- > matchSubex "({1}x*)" "y" = []
|
|
Packit |
5b08af |
-- > matchSubex "({w}[0-9]+)x({h}[0-9]+)" "800x600" = [("w","800"),("h","600")]
|
|
Packit |
5b08af |
-- > matchSubex "[" "xxx" = []
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
matchSubex :: StringLike s => s -> s -> [(s, s)]
|
|
Packit |
5b08af |
matchSubex = matchSubexRE . parseRegexExt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | grep like filter for lists of strings
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- The regular expression may be prefixed with the usual context spec \"^\" for start of string,
|
|
Packit |
5b08af |
-- and "\\<" for start of word.
|
|
Packit |
5b08af |
-- and suffixed with \"$\" for end of text and "\\>" end of word.
|
|
Packit |
5b08af |
-- Word chars are defined by the multi char escape sequence "\\w"
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- Examples
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- > grep "a" ["_a_", "_a", "a_", "a", "_"] => ["_a_", "_a", "a_", "a"]
|
|
Packit |
5b08af |
-- > grep "^a" ["_a_", "_a", "a_", "a", "_"] => ["a_", "a"]
|
|
Packit |
5b08af |
-- > grep "a$" ["_a_", "_a", "a_", "a", "_"] => ["_a", "a"]
|
|
Packit |
5b08af |
-- > grep "^a$" ["_a_", "_a", "a_", "a", "_"] => ["a"]
|
|
Packit |
5b08af |
-- > grep "\\<a" ["x a b", " ax ", " xa ", "xab"] => ["x a b", " ax "]
|
|
Packit |
5b08af |
-- > grep "a\\>" ["x a b", " ax ", " xa ", "xab"] => ["x a b", " xa "]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
grep :: StringLike s => s -> [s] -> [s]
|
|
Packit |
5b08af |
grep = grep' parseRegex'
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | grep with extended regular expressions
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
grepExt :: StringLike s => s -> [s] -> [s]
|
|
Packit |
5b08af |
grepExt = grep' parseRegexExt'
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
grep' :: StringLike s => (String -> GenRegex s) -> s -> [s] -> [s]
|
|
Packit |
5b08af |
grep' parseRe = grepRE . parseContextRegex parseRe
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | grep with already prepared Regex (ususally with 'parseContextRegex')
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
grepRE :: StringLike s => GenRegex s-> [s] -> [s]
|
|
Packit |
5b08af |
grepRE re = filter (matchRE re)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- | grep with Regex and line numbers
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
grepREwithLineNum :: StringLike s => GenRegex s -> [s] -> [(Int, s)]
|
|
Packit |
5b08af |
grepREwithLineNum re = filter (matchRE re . snd) . zip [(1::Int)..]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|