Blame src/Text/Regex/XMLSchema/Generic/RegexParser.hs

Packit 5b08af
{-# LANGUAGE FlexibleContexts #-}
Packit 5b08af
Packit 5b08af
-- ------------------------------------------------------------
Packit 5b08af
Packit 5b08af
{- |
Packit 5b08af
   Module     : Text.Regex.XMLSchema.RegexParser
Packit 5b08af
   Copyright  : Copyright (C) 2014- Uwe Schmidt
Packit 5b08af
   License    : MIT
Packit 5b08af
Packit 5b08af
   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Packit 5b08af
   Stability  : stable
Packit 5b08af
   Portability: portable
Packit 5b08af
Packit 5b08af
   W3C XML Schema Regular Expression Parser
Packit 5b08af
Packit 5b08af
   This parser supports the full W3C standard, the
Packit 5b08af
   complete grammar can be found under <http://www.w3.org/TR/xmlschema11-2/#regexs>
Packit 5b08af
   and extensions for all missing set operations, intersection,
Packit 5b08af
   difference, exclusive or, interleave, complement
Packit 5b08af
Packit 5b08af
-}
Packit 5b08af
Packit 5b08af
-- ------------------------------------------------------------
Packit 5b08af
Packit 5b08af
module Text.Regex.XMLSchema.Generic.RegexParser
Packit 5b08af
    ( parseRegex
Packit 5b08af
    , parseRegexExt
Packit 5b08af
    , parseRegex'
Packit 5b08af
    , parseRegexExt'
Packit 5b08af
    , parseContextRegex
Packit 5b08af
    )
Packit 5b08af
where
Packit 5b08af
Packit 5b08af
import           Data.Char.Properties.UnicodeBlocks
Packit 5b08af
import           Data.Char.Properties.UnicodeCharProps
Packit 5b08af
import           Data.Char.Properties.XMLCharProps
Packit 5b08af
Packit 5b08af
import           Data.List                               (isPrefixOf,
Packit 5b08af
                                                          isSuffixOf)
Packit 5b08af
import           Data.Maybe
Packit 5b08af
import           Data.Set.CharSet
Packit 5b08af
Packit 5b08af
import           Text.ParserCombinators.Parsec
Packit 5b08af
import           Text.Regex.XMLSchema.Generic.Regex
Packit 5b08af
import           Text.Regex.XMLSchema.Generic.StringLike
Packit 5b08af
Packit 5b08af
-- ------------------------------------------------------------
Packit 5b08af
Packit 5b08af
-- | parse a standard W3C XML Schema regular expression
Packit 5b08af
Packit 5b08af
parseRegex              :: StringLike s => s -> GenRegex s
Packit 5b08af
parseRegex              = parseRegex' . toString
Packit 5b08af
Packit 5b08af
parseRegex'             :: StringLike s => String -> GenRegex s
Packit 5b08af
parseRegex'             = parseRegex'' regExpStd
Packit 5b08af
Packit 5b08af
-- | parse an extended syntax W3C XML Schema regular expression
Packit 5b08af
--
Packit 5b08af
-- The Syntax of the W3C XML Schema spec is extended by
Packit 5b08af
-- further useful set operations, like intersection, difference, exor.
Packit 5b08af
-- Subexpression match becomes possible with \"named\" pairs of parentheses.
Packit 5b08af
-- The multi char escape sequence \\a represents any Unicode char,
Packit 5b08af
-- The multi char escape sequence \\A represents any Unicode word, (\\A = \\a*).
Packit 5b08af
-- All syntactically wrong inputs are mapped to the Zero expression representing the
Packit 5b08af
-- empty set of words. Zero contains as data field a string for an error message.
Packit 5b08af
-- So error checking after parsing becomes possible by checking against Zero ('isZero' predicate)
Packit 5b08af
Packit 5b08af
parseRegexExt   :: StringLike s => s -> GenRegex s
Packit 5b08af
parseRegexExt   = parseRegexExt' . toString
Packit 5b08af
Packit 5b08af
parseRegexExt'  :: StringLike s => String -> GenRegex s
Packit 5b08af
parseRegexExt'  = parseRegex'' regExpExt
Packit 5b08af
Packit 5b08af
parseRegex'' :: StringLike s => Parser (GenRegex s) -> String -> GenRegex s
Packit 5b08af
parseRegex'' regExp'
Packit 5b08af
  = either (mkZero' . ("syntax error: " ++) . show) id
Packit 5b08af
    . parse ( do
Packit 5b08af
                 r <- regExp'
Packit 5b08af
                 eof
Packit 5b08af
                 return r
Packit 5b08af
            ) ""
Packit 5b08af
Packit 5b08af
-- ------------------------------------------------------------
Packit 5b08af
Packit 5b08af
-- | parse a regular expression surrounded by contenxt spec
Packit 5b08af
--
Packit 5b08af
-- a leading @^@ denotes start of text,
Packit 5b08af
-- a trailing @$@ denotes end of text,
Packit 5b08af
-- a leading @\\<@ denotes word start,
Packit 5b08af
-- a trailing @\\>@ denotes word end.
Packit 5b08af
--
Packit 5b08af
-- The 1. param ist the regex parser ('parseRegex' or 'parseRegexExt')
Packit 5b08af
Packit 5b08af
parseContextRegex :: StringLike s => (String -> GenRegex s) -> s -> GenRegex s
Packit 5b08af
parseContextRegex parseRe re0
Packit 5b08af
    = re'
Packit 5b08af
    where
Packit 5b08af
      parseAW = parseRegexExt' "(\\A\\W)?"
Packit 5b08af
      parseWA = parseRegexExt' "(\\W\\A)?"
Packit 5b08af
Packit 5b08af
      re  = toString re0
Packit 5b08af
      re' = mkSeqs . concat $ [ startContext
Packit 5b08af
                              , (:[]) . parseRe $ re2
Packit 5b08af
                              , endContext
Packit 5b08af
                              ]
Packit 5b08af
      (startContext, re1)
Packit 5b08af
          | "^"   `isPrefixOf` re   = ([],             tail   re)
Packit 5b08af
          | "\\<" `isPrefixOf` re   = ([parseAW],      drop 2 re)
Packit 5b08af
          | otherwise               = ([mkStar mkDot],        re)
Packit 5b08af
      (endContext, re2)
Packit 5b08af
          | "$"   `isSuffixOf` re1  = ([],             init          re1)
Packit 5b08af
          | "\\>" `isSuffixOf` re1  = ([parseWA],      init . init $ re1)
Packit 5b08af
          | otherwise               = ([mkStar mkDot],               re1)
Packit 5b08af
Packit 5b08af
-- ------------------------------------------------------------
Packit 5b08af
Packit 5b08af
regExpExt  :: StringLike s => Parser (GenRegex s)
Packit 5b08af
regExpExt  = branchList orElseList
Packit 5b08af
Packit 5b08af
regExpStd  :: StringLike s => Parser (GenRegex s)
Packit 5b08af
regExpStd  = branchList seqListStd
Packit 5b08af
Packit 5b08af
branchList :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s)
Packit 5b08af
branchList exParser
Packit 5b08af
    = do
Packit 5b08af
      r1 <- exParser
Packit 5b08af
      rs <- many branchList1
Packit 5b08af
      return (foldr1 mkAlt $ r1:rs)     -- union is associative, so we use right ass.
Packit 5b08af
                                        -- as with seq, alt and exor
Packit 5b08af
    where
Packit 5b08af
    branchList1
Packit 5b08af
        = do
Packit 5b08af
          _ <- char '|'
Packit 5b08af
          exParser
Packit 5b08af
Packit 5b08af
orElseList      :: StringLike s => Parser (GenRegex s)
Packit 5b08af
orElseList
Packit 5b08af
    = do
Packit 5b08af
      r1 <- interleaveList
Packit 5b08af
      rs <- many orElseList1
Packit 5b08af
      return (foldr1 mkElse $ r1:rs)    -- orElse is associative, so we choose right ass.
Packit 5b08af
                                        -- as with seq and alt ops
Packit 5b08af
    where
Packit 5b08af
    orElseList1
Packit 5b08af
        = do
Packit 5b08af
          _ <- try (string "{|}")
Packit 5b08af
          interleaveList
Packit 5b08af
Packit 5b08af
interleaveList  :: StringLike s => Parser (GenRegex s)
Packit 5b08af
interleaveList
Packit 5b08af
    = do
Packit 5b08af
      r1 <- exorList
Packit 5b08af
      rs <- many interleaveList1
Packit 5b08af
      return (foldr1 mkInterleave $ r1:rs)      -- interleave is associative, so we choose right ass.
Packit 5b08af
                                                -- as with seq and alt ops
Packit 5b08af
    where
Packit 5b08af
    interleaveList1
Packit 5b08af
        = do
Packit 5b08af
          _ <- try (string "{:}")
Packit 5b08af
          exorList
Packit 5b08af
Packit 5b08af
exorList        :: StringLike s => Parser (GenRegex s)
Packit 5b08af
exorList
Packit 5b08af
    = do
Packit 5b08af
      r1 <- diffList
Packit 5b08af
      rs <- many exorList1
Packit 5b08af
      return (foldr1 mkExor $ r1:rs)    -- exor is associative, so we choose right ass.
Packit 5b08af
    where
Packit 5b08af
    exorList1
Packit 5b08af
        = do
Packit 5b08af
          _ <- try (string "{^}")
Packit 5b08af
          diffList
Packit 5b08af
Packit 5b08af
diffList        :: StringLike s => Parser (GenRegex s)
Packit 5b08af
diffList
Packit 5b08af
    = do
Packit 5b08af
      r1 <- intersectList
Packit 5b08af
      rs <- many diffList1
Packit 5b08af
      return (foldl1 mkDiff $ r1:rs)    -- diff is not associative, so we choose left ass.
Packit 5b08af
    where
Packit 5b08af
    diffList1
Packit 5b08af
        = do
Packit 5b08af
          _ <- try (string "{\\}")
Packit 5b08af
          intersectList
Packit 5b08af
Packit 5b08af
intersectList   :: StringLike s => Parser (GenRegex s)
Packit 5b08af
intersectList
Packit 5b08af
    = do
Packit 5b08af
      r1 <- seqListExt
Packit 5b08af
      rs <- many intersectList1
Packit 5b08af
      return (foldr1 mkIsect $ r1:rs)
Packit 5b08af
    where
Packit 5b08af
    intersectList1
Packit 5b08af
        = do
Packit 5b08af
          _ <- try (string "{&}")
Packit 5b08af
          seqListExt
Packit 5b08af
Packit 5b08af
seqListExt      :: StringLike s => Parser (GenRegex s)
Packit 5b08af
seqListExt      = seqList' regExpLabel multiCharEscExt
Packit 5b08af
Packit 5b08af
seqListStd      :: StringLike s => Parser (GenRegex s)
Packit 5b08af
seqListStd      = seqList' regExpStd multiCharEsc
Packit 5b08af
Packit 5b08af
seqList'        :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
Packit 5b08af
seqList' regExp' multiCharEsc'
Packit 5b08af
    = do
Packit 5b08af
      rs <- many piece
Packit 5b08af
      return $ mkSeqs rs
Packit 5b08af
    where
Packit 5b08af
    -- piece :: StringLike s => Parser (GenRegex s)
Packit 5b08af
    piece
Packit 5b08af
        = do
Packit 5b08af
          r <- atom
Packit 5b08af
          quantifier r
Packit 5b08af
Packit 5b08af
    -- atom :: StringLike s => Parser (GenRegex s)
Packit 5b08af
    atom
Packit 5b08af
        = char1
Packit 5b08af
          <|>
Packit 5b08af
          charClass
Packit 5b08af
          <|>
Packit 5b08af
          between (char '(') (char ')') regExp'
Packit 5b08af
Packit 5b08af
    -- charClass :: StringLike s => Parser (GenRegex s)
Packit 5b08af
    charClass
Packit 5b08af
        = charClassEsc multiCharEsc'
Packit 5b08af
          <|>
Packit 5b08af
          charClassExpr multiCharEsc'
Packit 5b08af
          <|>
Packit 5b08af
          wildCardEsc
Packit 5b08af
Packit 5b08af
Packit 5b08af
Packit 5b08af
quantifier      :: StringLike s => GenRegex s -> Parser (GenRegex s)
Packit 5b08af
quantifier r
Packit 5b08af
    = ( do
Packit 5b08af
        _ <- char '?'
Packit 5b08af
        return $ mkOpt r )
Packit 5b08af
      <|>
Packit 5b08af
      ( do
Packit 5b08af
        _ <- char '*'
Packit 5b08af
        return $ mkStar r )
Packit 5b08af
      <|>
Packit 5b08af
      ( do
Packit 5b08af
        _ <- char '+'
Packit 5b08af
        return $ mkRep 1 r )
Packit 5b08af
      <|>
Packit 5b08af
      try ( do
Packit 5b08af
            _ <- char '{'
Packit 5b08af
            res <- quantity r
Packit 5b08af
            _ <- char '}'
Packit 5b08af
            return res
Packit 5b08af
          )
Packit 5b08af
      <|>
Packit 5b08af
      ( return r )
Packit 5b08af
Packit 5b08af
quantity        :: StringLike s => GenRegex s -> Parser (GenRegex s)
Packit 5b08af
quantity r
Packit 5b08af
    = do
Packit 5b08af
      lb <- many1 digit
Packit 5b08af
      quantityRest r (read lb)
Packit 5b08af
Packit 5b08af
quantityRest    :: StringLike s => GenRegex s -> Int -> Parser (GenRegex s)
Packit 5b08af
quantityRest r lb
Packit 5b08af
    = ( do
Packit 5b08af
        _ <- char ','
Packit 5b08af
        ub <- many digit
Packit 5b08af
        return ( if null ub
Packit 5b08af
                 then mkRep lb r
Packit 5b08af
                 else mkRng lb (read ub) r
Packit 5b08af
               )
Packit 5b08af
      )
Packit 5b08af
      <|>
Packit 5b08af
      ( return $ mkRng lb lb r)
Packit 5b08af
Packit 5b08af
regExpLabel :: StringLike s => Parser (GenRegex s)
Packit 5b08af
regExpLabel
Packit 5b08af
    = do
Packit 5b08af
      lab <- option id (between (char '{') (char '}') label')
Packit 5b08af
      r    <- regExpExt
Packit 5b08af
      return $ lab r
Packit 5b08af
    where
Packit 5b08af
    label'
Packit 5b08af
        = do
Packit 5b08af
          l <- many1 (satisfy isXmlNameChar)
Packit 5b08af
          return $ mkBr' l
Packit 5b08af
Packit 5b08af
char1   :: StringLike s => Parser (GenRegex s)
Packit 5b08af
char1
Packit 5b08af
    = do
Packit 5b08af
      c <- satisfy (`notElem` ".\\?*+{}()|[]")
Packit 5b08af
      return $ mkSym1 c
Packit 5b08af
Packit 5b08af
charClassEsc    :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s)
Packit 5b08af
charClassEsc multiCharEsc'
Packit 5b08af
    = do
Packit 5b08af
      _ <- char '\\'
Packit 5b08af
      ( singleCharEsc
Packit 5b08af
        <|>
Packit 5b08af
        multiCharEsc'
Packit 5b08af
        <|>
Packit 5b08af
        catEsc
Packit 5b08af
        <|>
Packit 5b08af
        complEsc )
Packit 5b08af
Packit 5b08af
singleCharEsc   :: StringLike s => Parser (GenRegex s)
Packit 5b08af
singleCharEsc
Packit 5b08af
    = do
Packit 5b08af
      c <- singleCharEsc'
Packit 5b08af
      return $ mkSym1 c
Packit 5b08af
Packit 5b08af
singleCharEsc'  :: Parser Char
Packit 5b08af
singleCharEsc'
Packit 5b08af
    = do
Packit 5b08af
      c <- satisfy (`elem` "nrt\\|.?*+(){}-[]^")
Packit 5b08af
      return $ maybe c id . lookup c . zip "ntr" $ "\n\r\t"
Packit 5b08af
Packit 5b08af
multiCharEscExt    :: StringLike s => Parser (GenRegex s)
Packit 5b08af
multiCharEscExt
Packit 5b08af
    = multiCharEsc
Packit 5b08af
      <|>
Packit 5b08af
      ( do                      -- extension: \a represents the whole alphabet inclusive newline chars: \a == .|\n|\r
Packit 5b08af
        _ <- char 'a'
Packit 5b08af
        return mkDot )
Packit 5b08af
      <|>
Packit 5b08af
      ( do                      -- extension: \A represents all words: \A == \a* or \A == (.|\n|\r)*
Packit 5b08af
        _ <- char 'A'
Packit 5b08af
        return mkAll )
Packit 5b08af
Packit 5b08af
multiCharEsc    :: StringLike s => Parser (GenRegex s)
Packit 5b08af
multiCharEsc
Packit 5b08af
    = ( do
Packit 5b08af
        c <- satisfy (`elem` es)
Packit 5b08af
        return $ mkSym . fromJust . lookup c $ pm )
Packit 5b08af
    where
Packit 5b08af
    es = map fst pm
Packit 5b08af
    pm = [ ('s',        charPropXmlSpaceChar          )
Packit 5b08af
         , ('S', compCS charPropXmlSpaceChar          )
Packit 5b08af
         , ('i',        charPropXmlNameStartChar      )
Packit 5b08af
         , ('I', compCS charPropXmlNameStartChar      )
Packit 5b08af
         , ('c',        charPropXmlNameChar           )
Packit 5b08af
         , ('C', compCS charPropXmlNameChar           )
Packit 5b08af
         , ('d',        charPropDigit                 )
Packit 5b08af
         , ('D', compCS charPropDigit                 )
Packit 5b08af
         , ('w', compCS charPropNotWord               )
Packit 5b08af
         , ('W',        charPropNotWord               )
Packit 5b08af
         ]
Packit 5b08af
    charPropDigit   = rangeCS '0' '9'
Packit 5b08af
    charPropNotWord = charPropUnicodeP
Packit 5b08af
                      `unionCS`
Packit 5b08af
                      charPropUnicodeZ
Packit 5b08af
                      `unionCS`
Packit 5b08af
                      charPropUnicodeC
Packit 5b08af
Packit 5b08af
catEsc  :: StringLike s => Parser (GenRegex s)
Packit 5b08af
catEsc
Packit 5b08af
    = do
Packit 5b08af
      _ <- char 'p'
Packit 5b08af
      s <- between (char '{') (char '}') charProp
Packit 5b08af
      return $ mkSym s
Packit 5b08af
Packit 5b08af
charProp        :: Parser CharSet
Packit 5b08af
charProp
Packit 5b08af
    = isCategory
Packit 5b08af
      <|>
Packit 5b08af
      isBlock
Packit 5b08af
Packit 5b08af
isBlock         :: Parser CharSet
Packit 5b08af
isBlock
Packit 5b08af
    = do
Packit 5b08af
      _ <- string "Is"
Packit 5b08af
      name <- many1 (satisfy legalChar)
Packit 5b08af
      case lookup name codeBlocks of
Packit 5b08af
        Just b  -> return $ uncurry rangeCS b
Packit 5b08af
        Nothing -> fail $ "unknown Unicode code block " ++ show name
Packit 5b08af
    where
Packit 5b08af
    legalChar c  = 'A' <= c && c <= 'Z' ||
Packit 5b08af
                   'a' <= c && c <= 'z' ||
Packit 5b08af
                   '0' <= c && c <= '9' ||
Packit 5b08af
                   '-' == c
Packit 5b08af
Packit 5b08af
isCategory      :: Parser CharSet
Packit 5b08af
isCategory
Packit 5b08af
    = do
Packit 5b08af
      pr <- isCategory'
Packit 5b08af
      return $ fromJust (lookup pr categories)
Packit 5b08af
Packit 5b08af
categories      :: [(String, CharSet)]
Packit 5b08af
categories
Packit 5b08af
    = [ ("C",  charPropUnicodeC )
Packit 5b08af
      , ("Cc", charPropUnicodeCc)
Packit 5b08af
      , ("Cf", charPropUnicodeCf)
Packit 5b08af
      , ("Co", charPropUnicodeCo)
Packit 5b08af
      , ("Cs", charPropUnicodeCs)
Packit 5b08af
      , ("L",  charPropUnicodeL )
Packit 5b08af
      , ("Ll", charPropUnicodeLl)
Packit 5b08af
      , ("Lm", charPropUnicodeLm)
Packit 5b08af
      , ("Lo", charPropUnicodeLo)
Packit 5b08af
      , ("Lt", charPropUnicodeLt)
Packit 5b08af
      , ("Lu", charPropUnicodeLu)
Packit 5b08af
      , ("M",  charPropUnicodeM )
Packit 5b08af
      , ("Mc", charPropUnicodeMc)
Packit 5b08af
      , ("Me", charPropUnicodeMe)
Packit 5b08af
      , ("Mn", charPropUnicodeMn)
Packit 5b08af
      , ("N",  charPropUnicodeN )
Packit 5b08af
      , ("Nd", charPropUnicodeNd)
Packit 5b08af
      , ("Nl", charPropUnicodeNl)
Packit 5b08af
      , ("No", charPropUnicodeNo)
Packit 5b08af
      , ("P",  charPropUnicodeP )
Packit 5b08af
      , ("Pc", charPropUnicodePc)
Packit 5b08af
      , ("Pd", charPropUnicodePd)
Packit 5b08af
      , ("Pe", charPropUnicodePe)
Packit 5b08af
      , ("Pf", charPropUnicodePf)
Packit 5b08af
      , ("Pi", charPropUnicodePi)
Packit 5b08af
      , ("Po", charPropUnicodePo)
Packit 5b08af
      , ("Ps", charPropUnicodePs)
Packit 5b08af
      , ("S",  charPropUnicodeS )
Packit 5b08af
      , ("Sc", charPropUnicodeSc)
Packit 5b08af
      , ("Sk", charPropUnicodeSk)
Packit 5b08af
      , ("Sm", charPropUnicodeSm)
Packit 5b08af
      , ("So", charPropUnicodeSo)
Packit 5b08af
      , ("Z",  charPropUnicodeZ )
Packit 5b08af
      , ("Zl", charPropUnicodeZl)
Packit 5b08af
      , ("Zp", charPropUnicodeZp)
Packit 5b08af
      , ("Zs", charPropUnicodeZs)
Packit 5b08af
      ]
Packit 5b08af
Packit 5b08af
isCategory'     :: Parser String
Packit 5b08af
isCategory'
Packit 5b08af
    = ( foldr1 (<|>) . map (uncurry prop) $
Packit 5b08af
        [ ('L', "ultmo")
Packit 5b08af
        , ('M', "nce")
Packit 5b08af
        , ('N', "dlo")
Packit 5b08af
        , ('P', "cdseifo")
Packit 5b08af
        , ('Z', "slp")
Packit 5b08af
        , ('S', "mcko")
Packit 5b08af
        , ('C', "cfon")
Packit 5b08af
        ]
Packit 5b08af
      )  "illegal Unicode character property"
Packit 5b08af
    where
Packit 5b08af
    prop c1 cs2
Packit 5b08af
        = do
Packit 5b08af
          _ <- char c1
Packit 5b08af
          s2 <- option ""
Packit 5b08af
                ( do
Packit 5b08af
                  c2 <- satisfy (`elem` cs2)
Packit 5b08af
                  return [c2] )
Packit 5b08af
          return $ c1:s2
Packit 5b08af
Packit 5b08af
complEsc        :: StringLike s => Parser (GenRegex s)
Packit 5b08af
complEsc
Packit 5b08af
    = do
Packit 5b08af
      _ <- char 'P'
Packit 5b08af
      s <- between (char '{') (char '}') charProp
Packit 5b08af
      return $ mkSym $ compCS s
Packit 5b08af
Packit 5b08af
charClassExpr   :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s)
Packit 5b08af
charClassExpr multiCharEsc'
Packit 5b08af
    = between (char '[') (char ']') charGroup
Packit 5b08af
    where
Packit 5b08af
Packit 5b08af
    -- charGroup       :: StringLike s => Parser (GenRegex s)
Packit 5b08af
    charGroup
Packit 5b08af
        = do
Packit 5b08af
          r <- ( negCharGroup       -- a ^ at beginning denotes negation, not start of posCharGroup
Packit 5b08af
                 <|>
Packit 5b08af
                 posCharGroup
Packit 5b08af
               )
Packit 5b08af
          s <- option (mkZero' "")   -- charClassSub
Packit 5b08af
               ( do
Packit 5b08af
                 _ <- char '-'
Packit 5b08af
                 charClassExpr multiCharEsc'
Packit 5b08af
               )
Packit 5b08af
          return $ mkDiff r s
Packit 5b08af
Packit 5b08af
    -- posCharGroup    :: StringLike s => Parser (GenRegex s)
Packit 5b08af
    posCharGroup
Packit 5b08af
        = do
Packit 5b08af
          rs <- many1 (charRange <|> charClassEsc multiCharEsc')
Packit 5b08af
          return $ foldr1 mkAlt rs
Packit 5b08af
Packit 5b08af
    -- negCharGroup    :: StringLike s => Parser (GenRegex s)
Packit 5b08af
    negCharGroup
Packit 5b08af
        = do
Packit 5b08af
          _ <- char '^'
Packit 5b08af
          r <- posCharGroup
Packit 5b08af
          return $ mkDiff mkDot r
Packit 5b08af
Packit 5b08af
charRange       :: StringLike s => Parser (GenRegex s)
Packit 5b08af
charRange
Packit 5b08af
    = try seRange
Packit 5b08af
      <|>
Packit 5b08af
      xmlCharIncDash
Packit 5b08af
Packit 5b08af
seRange :: StringLike s => Parser (GenRegex s)
Packit 5b08af
seRange
Packit 5b08af
    = do
Packit 5b08af
      c1 <- charOrEsc'
Packit 5b08af
      _ <- char '-'
Packit 5b08af
      c2 <- charOrEsc'
Packit 5b08af
      return $ mkSymRng c1 c2
Packit 5b08af
Packit 5b08af
charOrEsc'      :: Parser Char
Packit 5b08af
charOrEsc'
Packit 5b08af
    = ( do
Packit 5b08af
        _ <- char '\\'
Packit 5b08af
        singleCharEsc'
Packit 5b08af
      )
Packit 5b08af
      <|>
Packit 5b08af
      satisfy (`notElem` "\\-[]")
Packit 5b08af
Packit 5b08af
xmlCharIncDash  :: StringLike s => Parser (GenRegex s)
Packit 5b08af
xmlCharIncDash
Packit 5b08af
    = try ( do                          -- dash is only allowed if not followed by a [, else charGroup differences do not parse correctly
Packit 5b08af
            _ <- char '-'
Packit 5b08af
            notFollowedBy (char '[')
Packit 5b08af
            return $ mkSym1 '-'
Packit 5b08af
          )
Packit 5b08af
      <|>
Packit 5b08af
      ( do
Packit 5b08af
        c <- satisfy (`notElem` "-\\[]")
Packit 5b08af
        return $ mkSym1 c
Packit 5b08af
      )
Packit 5b08af
Packit 5b08af
wildCardEsc     :: StringLike s => Parser (GenRegex s)
Packit 5b08af
wildCardEsc
Packit 5b08af
    = do
Packit 5b08af
      _ <- char '.'
Packit 5b08af
      return . mkSym . compCS $ stringCS "\n\r"
Packit 5b08af
Packit 5b08af
Packit 5b08af
-- ------------------------------------------------------------