Blame Text/XML/Stream/Parse.hs

Packit 899799
{-# LANGUAGE BangPatterns               #-}
Packit 899799
{-# LANGUAGE CPP                        #-}
Packit 899799
{-# LANGUAGE DeriveDataTypeable         #-}
Packit 899799
{-# LANGUAGE DeriveFunctor              #-}
Packit 899799
{-# LANGUAGE FlexibleContexts           #-}
Packit 899799
{-# LANGUAGE FlexibleInstances          #-}
Packit 899799
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
Packit 899799
{-# LANGUAGE OverloadedStrings          #-}
Packit 899799
{-# LANGUAGE PatternGuards              #-}
Packit 899799
{-# LANGUAGE RankNTypes                 #-}
Packit 899799
{-# LANGUAGE StandaloneDeriving         #-}
Packit 899799
{-# LANGUAGE TupleSections              #-}
Packit 899799
{-# LANGUAGE TypeFamilies               #-}
Packit 899799
-- | This module provides both a native Haskell solution for parsing XML
Packit 899799
-- documents into a stream of events, and a set of parser combinators for
Packit 899799
-- dealing with a stream of events.
Packit 899799
--
Packit 899799
-- As a simple example, if you have the following XML file:
Packit 899799
--
Packit 899799
-- > 
Packit 899799
-- > <people>
Packit 899799
-- >     <person age="25">Michael</person>
Packit 899799
-- >     <person age="2">Eliezer</person>
Packit 899799
-- > </people>
Packit 899799
--
Packit 899799
-- Then this code:
Packit 899799
--
Packit 899799
-- > {-# LANGUAGE OverloadedStrings #-}
Packit 899799
-- > import Control.Monad.Trans.Resource
Packit 899799
-- > import Data.Conduit (Consumer, Sink, ($$))
Packit 899799
-- > import Data.Text (Text, unpack)
Packit 899799
-- > import Text.XML.Stream.Parse
Packit 899799
-- > import Data.XML.Types (Event)
Packit 899799
-- >
Packit 899799
-- > data Person = Person Int Text
Packit 899799
-- >     deriving Show
Packit 899799
-- >
Packit 899799
-- > parsePerson :: MonadThrow m => Consumer Event m (Maybe Person)
Packit 899799
-- > parsePerson = tag' "person" (requireAttr "age") $ \age -> do
Packit 899799
-- >     name <- content
Packit 899799
-- >     return $ Person (read $ unpack age) name
Packit 899799
-- >
Packit 899799
-- > parsePeople :: MonadThrow m => Sink Event m (Maybe [Person])
Packit 899799
-- > parsePeople = tagNoAttr "people" $ many parsePerson
Packit 899799
-- >
Packit 899799
-- > main = do
Packit 899799
-- >     people <- runResourceT $
Packit 899799
-- >             parseFile def "people.xml" $$ force "people required" parsePeople
Packit 899799
-- >     print people
Packit 899799
--
Packit 899799
-- will produce:
Packit 899799
--
Packit 899799
-- > [Person 25 "Michael",Person 2 "Eliezer"]
Packit 899799
--
Packit 899799
-- This module also supports streaming results using 'yield'.
Packit 899799
-- This allows parser results to be processed using conduits
Packit 899799
-- while a particular parser (e.g. 'many') is still running.
Packit 899799
-- Without using streaming results, you have to wait until the parser finished
Packit 899799
-- before you can process the result list. Large XML files might be easier
Packit 899799
-- to process by using streaming results.
Packit 899799
-- See http://stackoverflow.com/q/21367423/2597135 for a related discussion.
Packit 899799
--
Packit 899799
-- > {-# LANGUAGE OverloadedStrings #-}
Packit 899799
-- > import Control.Monad (void)
Packit 899799
-- > import Control.Monad.Trans.Class (lift)
Packit 899799
-- > import Control.Monad.Trans.Resource
Packit 899799
-- > import Data.Conduit
Packit 899799
-- > import qualified Data.Conduit.List as CL
Packit 899799
-- > import Data.Text (Text, unpack)
Packit 899799
-- > import Data.XML.Types (Event)
Packit 899799
-- > import Text.XML.Stream.Parse
Packit 899799
-- >
Packit 899799
-- > data Person = Person Int Text deriving Show
Packit 899799
-- >
Packit 899799
-- > parsePerson :: MonadThrow m => Consumer Event m (Maybe Person)
Packit 899799
-- > parsePerson = tag' "person" (requireAttr "age") $ \age -> do
Packit 899799
-- >     name <- content
Packit 899799
-- >     return $ Person (read $ unpack age) name
Packit 899799
-- >
Packit 899799
-- > parsePeople :: MonadThrow m => Conduit Event m Person
Packit 899799
-- > parsePeople = void $ tagNoAttr "people" $ manyYield parsePerson
Packit 899799
-- >
Packit 899799
-- > main = runResourceT $
Packit 899799
-- >     parseFile def "people.xml" $$ parsePeople =$ CL.mapM_ (lift . print)
Packit 899799
--
Packit 899799
-- Previous versions of this module contained a number of more sophisticated
Packit 899799
-- functions written by Aristid Breitkreuz and Dmitry Olshansky. To keep this
Packit 899799
-- package simpler, those functions are being moved to a separate package. This
Packit 899799
-- note will be updated with the name of the package(s) when available.
Packit 899799
module Text.XML.Stream.Parse
Packit 899799
    ( -- * Parsing XML files
Packit 899799
      parseBytes
Packit 899799
    , parseBytesPos
Packit 899799
    , parseText'
Packit 899799
    , parseText
Packit 899799
    , parseTextPos
Packit 899799
    , detectUtf
Packit 899799
    , parseFile
Packit 899799
    , parseLBS
Packit 899799
      -- ** Parser settings
Packit 899799
    , ParseSettings
Packit 899799
    , def
Packit 899799
    , DecodeEntities
Packit 899799
    , psDecodeEntities
Packit 899799
    , psRetainNamespaces
Packit 899799
      -- *** Entity decoding
Packit 899799
    , decodeXmlEntities
Packit 899799
    , decodeHtmlEntities
Packit 899799
      -- * Event parsing
Packit 899799
    , tag
Packit 899799
    , tag'
Packit 899799
    , tagNoAttr
Packit 899799
    , tagIgnoreAttrs
Packit 899799
    , content
Packit 899799
    , contentMaybe
Packit 899799
      -- * Ignoring tags/trees
Packit 899799
    , ignoreTag
Packit 899799
    , ignoreEmptyTag
Packit 899799
    , ignoreTree
Packit 899799
    , ignoreTreeContent
Packit 899799
    , ignoreAnyTreeContent
Packit 899799
    , ignoreAllTreesContent
Packit 899799
      -- * Streaming events
Packit 899799
    , takeContent
Packit 899799
    , takeTree
Packit 899799
    , takeTreeContent
Packit 899799
    , takeAnyTreeContent
Packit 899799
    , takeAllTreesContent
Packit 899799
      -- * Tag name matching
Packit 899799
    , NameMatcher(..)
Packit 899799
    , matching
Packit 899799
    , anyOf
Packit 899799
    , anyName
Packit 899799
      -- * Attribute parsing
Packit 899799
    , AttrParser
Packit 899799
    , attr
Packit 899799
    , requireAttr
Packit 899799
    , optionalAttr
Packit 899799
    , requireAttrRaw
Packit 899799
    , optionalAttrRaw
Packit 899799
    , ignoreAttrs
Packit 899799
      -- * Combinators
Packit 899799
    , orE
Packit 899799
    , choose
Packit 899799
    , many
Packit 899799
    , many_
Packit 899799
    , manyIgnore
Packit 899799
    , many'
Packit 899799
    , force
Packit 899799
      -- * Streaming combinators
Packit 899799
    , manyYield
Packit 899799
    , manyYield'
Packit 899799
    , manyIgnoreYield
Packit 899799
      -- * Exceptions
Packit 899799
    , XmlException (..)
Packit 899799
      -- * Other types
Packit 899799
    , PositionRange
Packit 899799
    , EventPos
Packit 899799
    ) where
Packit 899799
import           Control.Applicative          (Alternative (empty, (<|>)),
Packit 899799
                                               Applicative (..), (<$>))
Packit 899799
import qualified Control.Applicative          as A
Packit 899799
import           Control.Arrow                ((***))
Packit 899799
import           Control.Exception            (Exception (..), SomeException)
Packit 899799
import           Control.Monad                (ap, liftM, void)
Packit 899799
import           Control.Monad.Fix            (fix)
Packit 899799
import           Control.Monad.Trans.Class    (lift)
Packit 899799
import           Control.Monad.Trans.Maybe    (MaybeT (..))
Packit 899799
import           Control.Monad.Trans.Resource (MonadResource, MonadThrow (..),
Packit 899799
                                               monadThrow)
Packit 899799
import           Data.Attoparsec.Text         (Parser, anyChar, char, manyTill,
Packit 899799
                                               skipWhile, string, takeWhile,
Packit 899799
                                               takeWhile1, try)
Packit 899799
import qualified Data.Attoparsec.Text         as AT
Packit 899799
import qualified Data.ByteString              as S
Packit 899799
import qualified Data.ByteString.Lazy         as L
Packit 899799
import           Data.Char                    (isSpace)
Packit 899799
import           Data.Conduit
Packit 899799
import           Data.Conduit.Attoparsec      (PositionRange, conduitParser)
Packit 899799
import           Data.Conduit.Binary          (sourceFile)
Packit 899799
import qualified Data.Conduit.List            as CL
Packit 899799
import qualified Data.Conduit.Text            as CT
Packit 899799
import           Data.Default.Class           (Default (..))
Packit 899799
import           Data.List                    (intercalate)
Packit 899799
import           Data.List                    (foldl')
Packit 899799
import qualified Data.Map                     as Map
Packit 899799
import           Data.Maybe                   (fromMaybe, isNothing)
Packit 899799
import           Data.String                  (IsString (..))
Packit 899799
import           Data.Text                    (Text, pack)
Packit 899799
import qualified Data.Text                    as T
Packit 899799
import           Data.Text.Encoding           (decodeUtf8With)
Packit 899799
import           Data.Text.Encoding.Error     (lenientDecode)
Packit 899799
import           Data.Typeable                (Typeable)
Packit 899799
import           Data.XML.Types               (Content (..), Event (..),
Packit 899799
                                               ExternalID (..),
Packit 899799
                                               Instruction (..), Name (..))
Packit 899799
import           Prelude                      hiding (takeWhile)
Packit 899799
import           Text.XML.Stream.Token
Packit 899799
Packit 899799
type Ents = [(Text, Text)]
Packit 899799
Packit 899799
tokenToEvent :: ParseSettings -> Ents -> [NSLevel] -> Token -> (Ents, [NSLevel], [Event])
Packit 899799
tokenToEvent _ es n (TokenXMLDeclaration _) = (es, n, [])
Packit 899799
tokenToEvent _ es n (TokenInstruction i) = (es, n, [EventInstruction i])
Packit 899799
tokenToEvent ps es n (TokenBeginElement name as isClosed _) =
Packit 899799
    (es, n', if isClosed then [begin, end] else [begin])
Packit 899799
  where
Packit 899799
    l0 = case n of
Packit 899799
            []  -> NSLevel Nothing Map.empty
Packit 899799
            x:_ -> x
Packit 899799
    (as', l') = foldl' go (id, l0) as
Packit 899799
    go (front, l) (TName kpref kname, val) =
Packit 899799
        (addNS front, l'')
Packit 899799
      where
Packit 899799
        isPrefixed = kpref == Just "xmlns"
Packit 899799
        isUnprefixed = isNothing kpref && kname == "xmlns"
Packit 899799
Packit 899799
        addNS
Packit 899799
            | not (psRetainNamespaces ps) && (isPrefixed || isUnprefixed) = id
Packit 899799
            | otherwise = (((tname, map resolve val):) .)
Packit 899799
          where
Packit 899799
            tname
Packit 899799
                | isPrefixed = TName Nothing ("xmlns:" `T.append` kname)
Packit 899799
                | otherwise = TName kpref kname
Packit 899799
Packit 899799
        l''
Packit 899799
            | isPrefixed =
Packit 899799
                l { prefixes = Map.insert kname (contentsToText val)
Packit 899799
                                     $ prefixes l }
Packit 899799
            | isUnprefixed =
Packit 899799
                l { defaultNS = if T.null $ contentsToText val
Packit 899799
                                            then Nothing
Packit 899799
                                            else Just $ contentsToText val }
Packit 899799
            | otherwise = l
Packit 899799
Packit 899799
    resolve (ContentEntity e)
Packit 899799
        | Just t <- lookup e es = ContentText t
Packit 899799
    resolve c = c
Packit 899799
    n' = if isClosed then n else l' : n
Packit 899799
    fixAttName (name', val) = (tnameToName True l' name', val)
Packit 899799
    elementName = tnameToName False l' name
Packit 899799
    begin = EventBeginElement elementName $ map fixAttName $ as' []
Packit 899799
    end = EventEndElement elementName
Packit 899799
tokenToEvent _ es n (TokenEndElement name) =
Packit 899799
    (es, n', [EventEndElement $ tnameToName False l name])
Packit 899799
  where
Packit 899799
    (l, n') =
Packit 899799
        case n of
Packit 899799
            []   -> (NSLevel Nothing Map.empty, [])
Packit 899799
            x:xs -> (x, xs)
Packit 899799
tokenToEvent _ es n (TokenContent (ContentEntity e))
Packit 899799
    | Just t <- lookup e es = (es, n, [EventContent $ ContentText t])
Packit 899799
tokenToEvent _ es n (TokenContent c) = (es, n, [EventContent c])
Packit 899799
tokenToEvent _ es n (TokenComment c) = (es, n, [EventComment c])
Packit 899799
tokenToEvent _ es n (TokenDoctype t eid es') = (es ++ es', n, [EventBeginDoctype t eid, EventEndDoctype])
Packit 899799
tokenToEvent _ es n (TokenCDATA t) = (es, n, [EventCDATA t])
Packit 899799
Packit 899799
tnameToName :: Bool -> NSLevel -> TName -> Name
Packit 899799
tnameToName _ _ (TName (Just "xml") name) =
Packit 899799
    Name name (Just "http://www.w3.org/XML/1998/namespace") (Just "xml")
Packit 899799
tnameToName isAttr (NSLevel def' _) (TName Nothing name) =
Packit 899799
    Name name (if isAttr then Nothing else def') Nothing
Packit 899799
tnameToName _ (NSLevel _ m) (TName (Just pref) name) =
Packit 899799
    case Map.lookup pref m of
Packit 899799
        Just ns -> Name name (Just ns) (Just pref)
Packit 899799
        Nothing -> Name name Nothing (Just pref) -- FIXME is this correct?
Packit 899799
Packit 899799
-- | Automatically determine which UTF variant is being used. This function
Packit 899799
-- first checks for BOMs, removing them as necessary, and then check for the
Packit 899799
-- equivalent of 
Packit 899799
-- defaults to assuming UTF-8.
Packit 899799
detectUtf :: MonadThrow m => Conduit S.ByteString m T.Text
Packit 899799
detectUtf =
Packit 899799
    conduit id
Packit 899799
  where
Packit 899799
    conduit front = await >>= maybe (return ()) (push front)
Packit 899799
Packit 899799
    push front bss =
Packit 899799
        either conduit
Packit 899799
               (uncurry checkXMLDecl)
Packit 899799
               (getEncoding front bss)
Packit 899799
Packit 899799
    getEncoding front bs'
Packit 899799
        | S.length bs < 4 =
Packit 899799
            Left (bs `S.append`)
Packit 899799
        | otherwise =
Packit 899799
            Right (bsOut, mcodec)
Packit 899799
      where
Packit 899799
        bs = front bs'
Packit 899799
        bsOut = S.append (S.drop toDrop x) y
Packit 899799
        (x, y) = S.splitAt 4 bs
Packit 899799
        (toDrop, mcodec) =
Packit 899799
            case S.unpack x of
Packit 899799
                [0x00, 0x00, 0xFE, 0xFF] -> (4, Just CT.utf32_be)
Packit 899799
                [0xFF, 0xFE, 0x00, 0x00] -> (4, Just CT.utf32_le)
Packit 899799
                0xFE : 0xFF: _           -> (2, Just CT.utf16_be)
Packit 899799
                0xFF : 0xFE: _           -> (2, Just CT.utf16_le)
Packit 899799
                0xEF : 0xBB: 0xBF : _    -> (3, Just CT.utf8)
Packit 899799
                [0x00, 0x00, 0x00, 0x3C] -> (0, Just CT.utf32_be)
Packit 899799
                [0x3C, 0x00, 0x00, 0x00] -> (0, Just CT.utf32_le)
Packit 899799
                [0x00, 0x3C, 0x00, 0x3F] -> (0, Just CT.utf16_be)
Packit 899799
                [0x3C, 0x00, 0x3F, 0x00] -> (0, Just CT.utf16_le)
Packit 899799
                _                        -> (0, Nothing) -- Assuming UTF-8
Packit 899799
Packit 899799
checkXMLDecl :: MonadThrow m
Packit 899799
             => S.ByteString
Packit 899799
             -> Maybe CT.Codec
Packit 899799
             -> Conduit S.ByteString m T.Text
Packit 899799
checkXMLDecl bs (Just codec) = leftover bs >> CT.decode codec
Packit 899799
checkXMLDecl bs0 Nothing =
Packit 899799
    loop [] (AT.parse (parseToken decodeXmlEntities)) bs0
Packit 899799
  where
Packit 899799
    loop chunks0 parser nextChunk =
Packit 899799
        case parser $ decodeUtf8With lenientDecode nextChunk of
Packit 899799
            AT.Fail{} -> fallback
Packit 899799
            AT.Partial f -> await >>= maybe fallback (loop chunks f)
Packit 899799
            AT.Done _ (TokenXMLDeclaration attrs) -> findEncoding attrs
Packit 899799
            AT.Done{} -> fallback
Packit 899799
      where
Packit 899799
        chunks = nextChunk : chunks0
Packit 899799
        fallback = complete CT.utf8
Packit 899799
        complete codec = mapM_ leftover chunks >> CT.decode codec
Packit 899799
Packit 899799
        findEncoding [] = fallback
Packit 899799
        findEncoding ((TName _ "encoding", [ContentText enc]):_) =
Packit 899799
            case T.toLower enc of
Packit 899799
                "iso-8859-1" -> complete CT.iso8859_1
Packit 899799
                "utf-8"      -> complete CT.utf8
Packit 899799
                _            -> complete CT.utf8
Packit 899799
        findEncoding (_:xs) = findEncoding xs
Packit 899799
Packit 899799
type EventPos = (Maybe PositionRange, Event)
Packit 899799
Packit 899799
-- | Parses a byte stream into 'Event's. This function is implemented fully in
Packit 899799
-- Haskell using attoparsec-text for parsing. The produced error messages do
Packit 899799
-- not give line/column information, so you may prefer to stick with the parser
Packit 899799
-- provided by libxml-enumerator. However, this has the advantage of not
Packit 899799
-- relying on any C libraries.
Packit 899799
--
Packit 899799
-- This relies on 'detectUtf' to determine character encoding, and 'parseText''
Packit 899799
-- to do the actual parsing.
Packit 899799
parseBytes :: MonadThrow m
Packit 899799
           => ParseSettings
Packit 899799
           -> Conduit S.ByteString m Event
Packit 899799
parseBytes = mapOutput snd . parseBytesPos
Packit 899799
Packit 899799
parseBytesPos :: MonadThrow m
Packit 899799
              => ParseSettings
Packit 899799
              -> Conduit S.ByteString m EventPos
Packit 899799
parseBytesPos ps = detectUtf =$= parseTextPos ps
Packit 899799
Packit 899799
dropBOM :: Monad m => Conduit T.Text m T.Text
Packit 899799
dropBOM =
Packit 899799
    await >>= maybe (return ()) push
Packit 899799
  where
Packit 899799
    push t =
Packit 899799
        case T.uncons t of
Packit 899799
            Nothing -> dropBOM
Packit 899799
            Just (c, cs) ->
Packit 899799
                let output
Packit 899799
                        | c == '\xfeef' = cs
Packit 899799
                        | otherwise = t
Packit 899799
                 in yield output >> idConduit
Packit 899799
    idConduit = await >>= maybe (return ()) (\x -> yield x >> idConduit)
Packit 899799
Packit 899799
-- | Parses a character stream into 'Event's. This function is implemented
Packit 899799
-- fully in Haskell using attoparsec-text for parsing. The produced error
Packit 899799
-- messages do not give line/column information, so you may prefer to stick
Packit 899799
-- with the parser provided by libxml-enumerator. However, this has the
Packit 899799
-- advantage of not relying on any C libraries.
Packit 899799
--
Packit 899799
-- Since 1.2.4
Packit 899799
parseText' :: MonadThrow m
Packit 899799
           => ParseSettings
Packit 899799
           -> Conduit T.Text m Event
Packit 899799
parseText' = mapOutput snd . parseTextPos
Packit 899799
Packit 899799
{-# DEPRECATED parseText "Please use 'parseText'' or 'parseTextPos'." #-}
Packit 899799
parseText :: MonadThrow m
Packit 899799
          => ParseSettings
Packit 899799
          -> Conduit T.Text m EventPos
Packit 899799
parseText = parseTextPos
Packit 899799
Packit 899799
-- | Same as 'parseText'', but includes the position of each event.
Packit 899799
--
Packit 899799
-- Since 1.2.4
Packit 899799
parseTextPos :: MonadThrow m
Packit 899799
          => ParseSettings
Packit 899799
          -> Conduit T.Text m EventPos
Packit 899799
parseTextPos de =
Packit 899799
    dropBOM
Packit 899799
        =$= tokenize
Packit 899799
        =$= toEventC de
Packit 899799
        =$= addBeginEnd
Packit 899799
  where
Packit 899799
    tokenize = conduitToken de
Packit 899799
    addBeginEnd = yield (Nothing, EventBeginDocument) >> addEnd
Packit 899799
    addEnd = await >>= maybe
Packit 899799
        (yield (Nothing, EventEndDocument))
Packit 899799
        (\e -> yield e >> addEnd)
Packit 899799
Packit 899799
toEventC :: Monad m => ParseSettings -> Conduit (PositionRange, Token) m EventPos
Packit 899799
toEventC ps =
Packit 899799
    go [] []
Packit 899799
  where
Packit 899799
    go !es !levels =
Packit 899799
        await >>= maybe (return ()) push
Packit 899799
      where
Packit 899799
        push (position, token) =
Packit 899799
            mapM_ (yield . (,) (Just position)) events >> go es' levels'
Packit 899799
          where
Packit 899799
            (es', levels', events) = tokenToEvent ps es levels token
Packit 899799
Packit 899799
data ParseSettings = ParseSettings
Packit 899799
    { psDecodeEntities   :: DecodeEntities
Packit 899799
    , psRetainNamespaces :: Bool
Packit 899799
    -- ^ Whether the original xmlns attributes should be retained in the parsed
Packit 899799
    -- values. For more information on motivation, see:
Packit 899799
    --
Packit 899799
    -- <https://github.com/snoyberg/xml/issues/38>
Packit 899799
    --
Packit 899799
    -- Default: False
Packit 899799
    --
Packit 899799
    -- Since 1.2.1
Packit 899799
    }
Packit 899799
Packit 899799
instance Default ParseSettings where
Packit 899799
    def = ParseSettings
Packit 899799
        { psDecodeEntities = decodeXmlEntities
Packit 899799
        , psRetainNamespaces = False
Packit 899799
        }
Packit 899799
Packit 899799
conduitToken :: MonadThrow m => ParseSettings -> Conduit T.Text m (PositionRange, Token)
Packit 899799
conduitToken = conduitParser . parseToken . psDecodeEntities
Packit 899799
Packit 899799
parseToken :: DecodeEntities -> Parser Token
Packit 899799
parseToken de = (char '<' >> parseLt) <|> TokenContent <$> parseContent de False False
Packit 899799
  where
Packit 899799
    parseLt =
Packit 899799
        (char '?' >> parseInstr) <|>
Packit 899799
        (char '!' >> (parseComment <|> parseCdata <|> parseDoctype)) <|>
Packit 899799
        parseBegin <|>
Packit 899799
        (char '/' >> parseEnd)
Packit 899799
    parseInstr = do
Packit 899799
        name <- parseIdent
Packit 899799
        if name == "xml"
Packit 899799
            then do
Packit 899799
                as <- A.many $ parseAttribute de
Packit 899799
                skipSpace
Packit 899799
                char' '?'
Packit 899799
                char' '>'
Packit 899799
                newline <|> return ()
Packit 899799
                return $ TokenXMLDeclaration as
Packit 899799
            else do
Packit 899799
                skipSpace
Packit 899799
                x <- T.pack <$> manyTill anyChar (try $ string "?>")
Packit 899799
                return $ TokenInstruction $ Instruction name x
Packit 899799
    parseComment = do
Packit 899799
        char' '-'
Packit 899799
        char' '-'
Packit 899799
        c <- T.pack <$> manyTill anyChar (string "-->") -- FIXME use takeWhile instead
Packit 899799
        return $ TokenComment c
Packit 899799
    parseCdata = do
Packit 899799
        _ <- string "[CDATA["
Packit 899799
        t <- T.pack <$> manyTill anyChar (string "]]>") -- FIXME use takeWhile instead
Packit 899799
        return $ TokenCDATA t
Packit 899799
    parseDoctype = do
Packit 899799
        _ <- string "DOCTYPE"
Packit 899799
        skipSpace
Packit 899799
        name <- parseName
Packit 899799
        let i =
Packit 899799
                case name of
Packit 899799
                    TName Nothing x  -> x
Packit 899799
                    TName (Just x) y -> T.concat [x, ":", y]
Packit 899799
        skipSpace
Packit 899799
        eid <- fmap Just parsePublicID <|>
Packit 899799
               fmap Just parseSystemID <|>
Packit 899799
               return Nothing
Packit 899799
        skipSpace
Packit 899799
        ents <- (do
Packit 899799
            char' '['
Packit 899799
            ents <- parseEntities id
Packit 899799
            skipSpace
Packit 899799
            return ents) <|> return []
Packit 899799
        char' '>'
Packit 899799
        newline <|> return ()
Packit 899799
        return $ TokenDoctype i eid ents
Packit 899799
    parseEntities front =
Packit 899799
        (char ']' >> return (front [])) <|>
Packit 899799
        (parseEntity >>= \e -> parseEntities (front . (e:))) <|>
Packit 899799
        (char '<' >> parseEntities front) <|>
Packit 899799
        (skipWhile (\t -> t /= ']' && t /= '<') >> parseEntities front)
Packit 899799
    parseEntity = try $ do
Packit 899799
        _ <- string "
Packit 899799
        skipSpace
Packit 899799
        i <- parseIdent
Packit 899799
        t <- quotedText
Packit 899799
        skipSpace
Packit 899799
        char' '>'
Packit 899799
        return (i, t)
Packit 899799
    parsePublicID = do
Packit 899799
        _ <- string "PUBLIC"
Packit 899799
        x <- quotedText
Packit 899799
        y <- quotedText
Packit 899799
        return $ PublicID x y
Packit 899799
    parseSystemID = do
Packit 899799
        _ <- string "SYSTEM"
Packit 899799
        x <- quotedText
Packit 899799
        return $ SystemID x
Packit 899799
    quotedText = do
Packit 899799
        skipSpace
Packit 899799
        between '"' <|> between '\''
Packit 899799
    between c = do
Packit 899799
        char' c
Packit 899799
        x <- takeWhile (/=c)
Packit 899799
        char' c
Packit 899799
        return x
Packit 899799
    parseEnd = do
Packit 899799
        skipSpace
Packit 899799
        n <- parseName
Packit 899799
        skipSpace
Packit 899799
        char' '>'
Packit 899799
        return $ TokenEndElement n
Packit 899799
    parseBegin = do
Packit 899799
        skipSpace
Packit 899799
        n <- parseName
Packit 899799
        as <- A.many $ parseAttribute de
Packit 899799
        skipSpace
Packit 899799
        isClose <- (char '/' >> skipSpace >> return True) <|> return False
Packit 899799
        char' '>'
Packit 899799
        return $ TokenBeginElement n as isClose 0
Packit 899799
Packit 899799
parseAttribute :: DecodeEntities -> Parser TAttribute
Packit 899799
parseAttribute de = do
Packit 899799
    skipSpace
Packit 899799
    key <- parseName
Packit 899799
    skipSpace
Packit 899799
    char' '='
Packit 899799
    skipSpace
Packit 899799
    val <- squoted <|> dquoted
Packit 899799
    return (key, val)
Packit 899799
  where
Packit 899799
    squoted = char '\'' *> manyTill (parseContent de False True) (char '\'')
Packit 899799
    dquoted = char  '"' *> manyTill (parseContent de True False) (char  '"')
Packit 899799
Packit 899799
parseName :: Parser TName
Packit 899799
parseName =
Packit 899799
  name <$> parseIdent <*> A.optional (char ':' >> parseIdent)
Packit 899799
  where
Packit 899799
    name i1 Nothing   = TName Nothing i1
Packit 899799
    name i1 (Just i2) = TName (Just i1) i2
Packit 899799
Packit 899799
parseIdent :: Parser Text
Packit 899799
parseIdent =
Packit 899799
    takeWhile1 valid
Packit 899799
  where
Packit 899799
    valid '&'  = False
Packit 899799
    valid '<'  = False
Packit 899799
    valid '>'  = False
Packit 899799
    valid ':'  = False
Packit 899799
    valid '?'  = False
Packit 899799
    valid '='  = False
Packit 899799
    valid '"'  = False
Packit 899799
    valid '\'' = False
Packit 899799
    valid '/'  = False
Packit 899799
    valid ';'  = False
Packit 899799
    valid '#'  = False
Packit 899799
    valid c    = not $ isXMLSpace c
Packit 899799
Packit 899799
parseContent :: DecodeEntities
Packit 899799
             -> Bool -- break on double quote
Packit 899799
             -> Bool -- break on single quote
Packit 899799
             -> Parser Content
Packit 899799
parseContent de breakDouble breakSingle = parseReference <|> parseTextContent where
Packit 899799
  parseReference = do
Packit 899799
    char' '&'
Packit 899799
    t <- parseEntityRef <|> parseHexCharRef <|> parseDecCharRef
Packit 899799
    char' ';'
Packit 899799
    return t
Packit 899799
  parseEntityRef = do
Packit 899799
    TName ma b <- parseName
Packit 899799
    let name = maybe "" (`T.append` ":") ma `T.append` b
Packit 899799
    return $ case name of
Packit 899799
      "lt" -> ContentText "<"
Packit 899799
      "gt" -> ContentText ">"
Packit 899799
      "amp" -> ContentText "&"
Packit 899799
      "quot" -> ContentText "\""
Packit 899799
      "apos" -> ContentText "'"
Packit 899799
      _ -> de name
Packit 899799
  parseHexCharRef = do
Packit 899799
    void $ string "#x"
Packit 899799
    n <- AT.hexadecimal
Packit 899799
    case toValidXmlChar n of
Packit 899799
      Nothing -> fail "Invalid character from hexadecimal character reference."
Packit 899799
      Just c -> return $ ContentText $ T.singleton c
Packit 899799
  parseDecCharRef = do
Packit 899799
    void $ string "#"
Packit 899799
    n <- AT.decimal
Packit 899799
    case toValidXmlChar n of
Packit 899799
      Nothing -> fail "Invalid character from decimal character reference."
Packit 899799
      Just c -> return $ ContentText $ T.singleton c
Packit 899799
  parseTextContent = ContentText <$> takeWhile1 valid
Packit 899799
  valid '"'  = not breakDouble
Packit 899799
  valid '\'' = not breakSingle
Packit 899799
  valid '&'  = False -- amp
Packit 899799
  valid '<'  = False -- lt
Packit 899799
  valid _    = True
Packit 899799
Packit 899799
-- | Is this codepoint a valid XML character? See
Packit 899799
-- <https://www.w3.org/TR/xml/#charsets>. This is proudly XML 1.0 only.
Packit 899799
toValidXmlChar :: Int -> Maybe Char
Packit 899799
toValidXmlChar n
Packit 899799
  | any checkRange ranges = Just (toEnum n)
Packit 899799
  | otherwise = Nothing
Packit 899799
  where
Packit 899799
    --Inclusive lower bound, inclusive upper bound.
Packit 899799
    ranges :: [(Int, Int)]
Packit 899799
    ranges =
Packit 899799
      [ (0x9, 0xA)
Packit 899799
      , (0xD, 0xD)
Packit 899799
      , (0x20, 0xD7FF)
Packit 899799
      , (0xE000, 0xFFFD)
Packit 899799
      , (0x10000, 0x10FFFF)
Packit 899799
      ]
Packit 899799
    checkRange (lb, ub) = lb <= n && n <= ub
Packit 899799
Packit 899799
skipSpace :: Parser ()
Packit 899799
skipSpace = skipWhile isXMLSpace
Packit 899799
Packit 899799
-- | Determines whether a character is an XML white space. The list of
Packit 899799
-- white spaces is given by
Packit 899799
--
Packit 899799
-- >  S ::= (#x20 | #x9 | #xD | #xA)+
Packit 899799
--
Packit 899799
-- in <http://www.w3.org/TR/2008/REC-xml-20081126/#sec-common-syn>.
Packit 899799
isXMLSpace :: Char -> Bool
Packit 899799
isXMLSpace ' '  = True
Packit 899799
isXMLSpace '\t' = True
Packit 899799
isXMLSpace '\r' = True
Packit 899799
isXMLSpace '\n' = True
Packit 899799
isXMLSpace _    = False
Packit 899799
Packit 899799
newline :: Parser ()
Packit 899799
newline = void $ (char '\r' >> char '\n') <|> char '\n'
Packit 899799
Packit 899799
char' :: Char -> Parser ()
Packit 899799
char' = void . char
Packit 899799
Packit 899799
data ContentType = Ignore | IsContent Text | IsError String | NotContent
Packit 899799
Packit 899799
-- | Grabs the next piece of content if available. This function skips over any
Packit 899799
-- comments and instructions and concatenates all content until the next start
Packit 899799
-- or end tag.
Packit 899799
contentMaybe :: MonadThrow m => Consumer Event m (Maybe Text)
Packit 899799
contentMaybe = do
Packit 899799
    x <- CL.peek
Packit 899799
    case pc' x of
Packit 899799
        Ignore      -> CL.drop 1 >> contentMaybe
Packit 899799
        IsContent t -> CL.drop 1 >> fmap Just (takeContents (t:))
Packit 899799
        IsError e   -> lift $ monadThrow $ InvalidEntity e x
Packit 899799
        NotContent  -> return Nothing
Packit 899799
  where
Packit 899799
    pc' Nothing  = NotContent
Packit 899799
    pc' (Just x) = pc x
Packit 899799
    pc (EventContent (ContentText t)) = IsContent t
Packit 899799
    pc (EventContent (ContentEntity e)) = IsError $ "Unknown entity: " ++ show e
Packit 899799
    pc (EventCDATA t) = IsContent t
Packit 899799
    pc EventBeginElement{} = NotContent
Packit 899799
    pc EventEndElement{} = NotContent
Packit 899799
    pc EventBeginDocument{} = Ignore
Packit 899799
    pc EventEndDocument = Ignore
Packit 899799
    pc EventBeginDoctype{} = Ignore
Packit 899799
    pc EventEndDoctype = Ignore
Packit 899799
    pc EventInstruction{} = Ignore
Packit 899799
    pc EventComment{} = Ignore
Packit 899799
    takeContents front = do
Packit 899799
        x <- CL.peek
Packit 899799
        case pc' x of
Packit 899799
            Ignore      -> CL.drop 1 >> takeContents front
Packit 899799
            IsContent t -> CL.drop 1 >> takeContents (front . (:) t)
Packit 899799
            IsError e   -> lift $ monadThrow $ InvalidEntity e x
Packit 899799
            NotContent  -> return $ T.concat $ front []
Packit 899799
Packit 899799
-- | Grabs the next piece of content. If none if available, returns 'T.empty'.
Packit 899799
-- This is simply a wrapper around 'contentMaybe'.
Packit 899799
content :: MonadThrow m => Consumer Event m Text
Packit 899799
content = fromMaybe T.empty <$> contentMaybe
Packit 899799
Packit 899799
Packit 899799
isWhitespace :: Event -> Bool
Packit 899799
isWhitespace EventBeginDocument             = True
Packit 899799
isWhitespace EventEndDocument               = True
Packit 899799
isWhitespace EventBeginDoctype{}            = True
Packit 899799
isWhitespace EventEndDoctype                = True
Packit 899799
isWhitespace EventInstruction{}             = True
Packit 899799
isWhitespace (EventContent (ContentText t)) = T.all isSpace t
Packit 899799
isWhitespace EventComment{}                 = True
Packit 899799
isWhitespace (EventCDATA t)                 = T.all isSpace t
Packit 899799
isWhitespace _                              = False
Packit 899799
Packit 899799
Packit 899799
-- | The most generic way to parse a tag. It takes a 'NameMatcher' to check whether
Packit 899799
-- this is a correct tag name, an 'AttrParser' to handle attributes, and
Packit 899799
-- then a parser to deal with content.
Packit 899799
--
Packit 899799
-- 'Events' are consumed if and only if the tag name and its attributes match.
Packit 899799
--
Packit 899799
-- This function automatically absorbs its balancing closing tag, and will
Packit 899799
-- throw an exception if not all of the attributes or child elements are
Packit 899799
-- consumed. If you want to allow extra attributes, see 'ignoreAttrs'.
Packit 899799
--
Packit 899799
-- This function automatically ignores comments, instructions and whitespace.
Packit 899799
tag :: MonadThrow m
Packit 899799
    => NameMatcher a -- ^ Check if this is a correct tag name
Packit 899799
                     --   and return a value that can be used to get an @AttrParser@.
Packit 899799
                     --   If this fails, the function will return @Nothing@
Packit 899799
    -> (a -> AttrParser b) -- ^ Given the value returned by the name checker, this function will
Packit 899799
                           --   be used to get an @AttrParser@ appropriate for the specific tag.
Packit 899799
                           --   If the @AttrParser@ fails, the function will also return @Nothing@
Packit 899799
    -> (b -> ConduitM Event o m c) -- ^ Handler function to handle the attributes and children
Packit 899799
                                   --   of a tag, given the value return from the @AttrParser@
Packit 899799
    -> ConduitM Event o m (Maybe c)
Packit 899799
tag nameMatcher attrParser f = do
Packit 899799
  (x, leftovers) <- dropWS []
Packit 899799
  res <- case x of
Packit 899799
    Just (EventBeginElement name as) -> case runNameMatcher nameMatcher name of
Packit 899799
      Just y -> case runAttrParser' (attrParser y) as of
Packit 899799
        Left _ -> return Nothing
Packit 899799
        Right z -> do
Packit 899799
          z' <- f z
Packit 899799
          (a, _leftovers') <- dropWS []
Packit 899799
          case a of
Packit 899799
            Just (EventEndElement name')
Packit 899799
              | name == name' -> return (Just z')
Packit 899799
            _ -> lift $ monadThrow $ InvalidEndElement name a
Packit 899799
      Nothing -> return Nothing
Packit 899799
    _ -> return Nothing
Packit 899799
Packit 899799
  case res of
Packit 899799
    -- Did not parse, put back all of the leading whitespace events and the
Packit 899799
    -- final observed event generated by dropWS
Packit 899799
    Nothing -> mapM_ leftover leftovers
Packit 899799
    -- Parse succeeded, discard all of those whitespace events and the
Packit 899799
    -- first parsed event
Packit 899799
    Just _  -> return ()
Packit 899799
Packit 899799
  return res
Packit 899799
  where
Packit 899799
    -- Drop Events until we encounter a non-whitespace element. Return all of
Packit 899799
    -- the events consumed here (including the first non-whitespace event) so
Packit 899799
    -- that the calling function can treat them as leftovers if the parse fails
Packit 899799
    dropWS leftovers = do
Packit 899799
        x <- await
Packit 899799
        let leftovers' = maybe id (:) x leftovers
Packit 899799
Packit 899799
        case isWhitespace <$> x of
Packit 899799
          Just True -> dropWS leftovers'
Packit 899799
          _         -> return (x, leftovers')
Packit 899799
    runAttrParser' p as =
Packit 899799
        case runAttrParser p as of
Packit 899799
            Left e          -> Left e
Packit 899799
            Right ([], x)   -> Right x
Packit 899799
            Right (attr, _) -> Left $ toException $ UnparsedAttributes attr
Packit 899799
Packit 899799
-- | A simplified version of 'tag' where the 'NameMatcher' result isn't forwarded to the attributes parser.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
tag' :: MonadThrow m
Packit 899799
     => NameMatcher a -> AttrParser b -> (b -> ConduitM Event o m c)
Packit 899799
     -> ConduitM Event o m (Maybe c)
Packit 899799
tag' a b = tag a (const b)
Packit 899799
Packit 899799
-- | A further simplified tag parser, which requires that no attributes exist.
Packit 899799
tagNoAttr :: MonadThrow m
Packit 899799
          => NameMatcher a -- ^ Check if this is a correct tag name
Packit 899799
          -> ConduitM Event o m b -- ^ Handler function to handle the children of the matched tag
Packit 899799
          -> ConduitM Event o m (Maybe b)
Packit 899799
tagNoAttr name f = tag' name (return ()) $ const f
Packit 899799
Packit 899799
Packit 899799
-- | A further simplified tag parser, which ignores all attributes, if any exist
Packit 899799
tagIgnoreAttrs :: MonadThrow m
Packit 899799
               => NameMatcher a -- ^ Check if this is a correct tag name
Packit 899799
               -> ConduitM Event o m b -- ^ Handler function to handle the children of the matched tag
Packit 899799
               -> ConduitM Event o m (Maybe b)
Packit 899799
tagIgnoreAttrs name f = tag' name ignoreAttrs $ const f
Packit 899799
Packit 899799
Packit 899799
-- | Ignore an empty tag and all of its attributes.
Packit 899799
--   This does not ignore the tag recursively
Packit 899799
--   (i.e. it assumes there are no child elements).
Packit 899799
--   This function returns @Just ()@ if the tag matched.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
ignoreEmptyTag :: MonadThrow m
Packit 899799
          => NameMatcher a -- ^ Check if this is a correct tag name
Packit 899799
          -> ConduitM Event o m (Maybe ())
Packit 899799
ignoreEmptyTag nameMatcher = tagIgnoreAttrs nameMatcher (return ())
Packit 899799
Packit 899799
Packit 899799
{-# DEPRECATED ignoreTag "Please use 'ignoreEmptyTag'." #-}
Packit 899799
ignoreTag :: MonadThrow m
Packit 899799
          => NameMatcher a -- ^ Check if this is a correct tag name
Packit 899799
          -> ConduitM Event o m (Maybe ())
Packit 899799
ignoreTag = ignoreEmptyTag
Packit 899799
Packit 899799
Packit 899799
-- | Ignore a tag, its attributes and its children subtrees recursively.
Packit 899799
--   Both content and text events are ignored.
Packit 899799
--   This function returns @Just ()@ if the tag matched.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
ignoreTreeContent :: MonadThrow m
Packit 899799
                  => NameMatcher a -- ^ Check if this is a correct tag name
Packit 899799
                  -> ConduitM Event o m (Maybe ())
Packit 899799
ignoreTreeContent namePred = tagIgnoreAttrs namePred (void $ many ignoreAnyTreeContent)
Packit 899799
Packit 899799
{-# DEPRECATED ignoreTree "Please use 'ignoreTreeContent'." #-}
Packit 899799
ignoreTree :: MonadThrow m
Packit 899799
           => NameMatcher a -- ^ Check if this is a correct tag name
Packit 899799
           -> ConduitM Event o m (Maybe ())
Packit 899799
ignoreTree = ignoreTreeContent
Packit 899799
Packit 899799
-- | Like 'ignoreTreeContent', but matches any name and also ignores content events.
Packit 899799
ignoreAnyTreeContent :: MonadThrow m => ConduitM Event o m (Maybe ())
Packit 899799
ignoreAnyTreeContent = (void <$> contentMaybe) `orE` ignoreTreeContent anyName
Packit 899799
Packit 899799
{-# DEPRECATED ignoreAllTreesContent "Please use 'ignoreAnyTreeContent'." #-}
Packit 899799
ignoreAllTreesContent :: MonadThrow m => ConduitM Event o m (Maybe ())
Packit 899799
ignoreAllTreesContent = ignoreAnyTreeContent
Packit 899799
Packit 899799
-- | Get the value of the first parser which returns 'Just'. If no parsers
Packit 899799
-- succeed (i.e., return @Just@), this function returns 'Nothing'.
Packit 899799
--
Packit 899799
-- > orE a b = choose [a, b]
Packit 899799
orE :: Monad m
Packit 899799
    => Consumer Event m (Maybe a) -- ^ The first (preferred) parser
Packit 899799
    -> Consumer Event m (Maybe a) -- ^ The second parser, only executed if the first parser fails
Packit 899799
    -> Consumer Event m (Maybe a)
Packit 899799
orE a b = a >>= \x -> maybe b (const $ return x) x
Packit 899799
Packit 899799
-- | Get the value of the first parser which returns 'Just'. If no parsers
Packit 899799
-- succeed (i.e., return 'Just'), this function returns 'Nothing'.
Packit 899799
choose :: Monad m
Packit 899799
       => [ConduitM Event o m (Maybe a)] -- ^ List of parsers that will be tried in order.
Packit 899799
       -> ConduitM Event o m (Maybe a)   -- ^ Result of the first parser to succeed, or @Nothing@
Packit 899799
                                         --   if no parser succeeded
Packit 899799
choose []     = return Nothing
Packit 899799
choose (i:is) = i >>= maybe (choose is) (return . Just)
Packit 899799
Packit 899799
-- | Force an optional parser into a required parser. All of the 'tag'
Packit 899799
-- functions, 'attr', 'choose' and 'many' deal with 'Maybe' parsers. Use this when you
Packit 899799
-- want to finally force something to happen.
Packit 899799
force :: MonadThrow m
Packit 899799
      => String -- ^ Error message
Packit 899799
      -> m (Maybe a) -- ^ Optional parser to be forced
Packit 899799
      -> m a
Packit 899799
force msg i = i >>= maybe (throwM $ XmlException msg Nothing) return
Packit 899799
Packit 899799
-- | A helper function which reads a file from disk using 'enumFile', detects
Packit 899799
-- character encoding using 'detectUtf', parses the XML using 'parseBytes', and
Packit 899799
-- then hands off control to your supplied parser.
Packit 899799
parseFile :: MonadResource m
Packit 899799
          => ParseSettings
Packit 899799
          -> FilePath
Packit 899799
          -> Producer m Event
Packit 899799
parseFile ps fp = sourceFile fp =$= parseBytes ps
Packit 899799
Packit 899799
-- | Parse an event stream from a lazy 'L.ByteString'.
Packit 899799
parseLBS :: MonadThrow m
Packit 899799
         => ParseSettings
Packit 899799
         -> L.ByteString
Packit 899799
         -> Producer m Event
Packit 899799
parseLBS ps lbs = CL.sourceList (L.toChunks lbs) =$= parseBytes ps
Packit 899799
Packit 899799
data XmlException = XmlException
Packit 899799
    { xmlErrorMessage :: String
Packit 899799
    , xmlBadInput     :: Maybe Event
Packit 899799
    }
Packit 899799
                  | InvalidEndElement Name (Maybe Event)
Packit 899799
                  | InvalidEntity String (Maybe Event)
Packit 899799
                  | MissingAttribute String
Packit 899799
                  | UnparsedAttributes [(Name, [Content])]
Packit 899799
    deriving (Show, Typeable)
Packit 899799
Packit 899799
instance Exception XmlException where
Packit 899799
#if MIN_VERSION_base(4, 8, 0)
Packit 899799
  displayException (XmlException msg (Just event)) = "Error while parsing XML event " ++ show event ++ ": " ++ msg
Packit 899799
  displayException (XmlException msg _) = "Error while parsing XML: " ++ msg
Packit 899799
  displayException (InvalidEndElement name (Just event)) = "Error while parsing XML event: expected </" ++ T.unpack (nameLocalName name) ++ ">, got " ++ show event
Packit 899799
  displayException (InvalidEndElement name _) = "Error while parsing XML event: expected </" ++ show name ++ ">, got nothing"
Packit 899799
  displayException (InvalidEntity msg (Just event)) = "Error while parsing XML entity " ++ show event ++ ": " ++ msg
Packit 899799
  displayException (InvalidEntity msg _) = "Error while parsing XML entity: " ++ msg
Packit 899799
  displayException (MissingAttribute msg) = "Missing required attribute: " ++ msg
Packit 899799
  displayException (UnparsedAttributes attrs) = show (length attrs) ++ " remaining unparsed attributes: \n" ++ intercalate "\n" (show <$> attrs)
Packit 899799
#endif
Packit 899799
Packit 899799
Packit 899799
-- | A @NameMatcher@ describes which names a tag parser is allowed to match.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
newtype NameMatcher a = NameMatcher { runNameMatcher :: Name -> Maybe a }
Packit 899799
Packit 899799
deriving instance Functor NameMatcher
Packit 899799
Packit 899799
instance Applicative NameMatcher where
Packit 899799
  pure a = NameMatcher $ const $ pure a
Packit 899799
  NameMatcher f <*> NameMatcher a = NameMatcher $ \name -> f name <*> a name
Packit 899799
Packit 899799
-- | 'NameMatcher's can be combined with @\<|\>@
Packit 899799
instance Alternative NameMatcher where
Packit 899799
  empty = NameMatcher $ const Nothing
Packit 899799
  NameMatcher f <|> NameMatcher g = NameMatcher (\a -> f a <|> g a)
Packit 899799
Packit 899799
-- | Match a single 'Name' in a concise way.
Packit 899799
-- Note that 'Name' is namespace sensitive: when using the 'IsString' instance,
Packit 899799
-- use @"{http:\/\/a\/b}c"@ to match the tag @c@ in the XML namespace @http://a/b@
Packit 899799
instance (a ~ Name) => IsString (NameMatcher a) where
Packit 899799
  fromString s = matching (== fromString s)
Packit 899799
Packit 899799
-- | @matching f@ matches @name@ iff @f name@ is true. Returns the matched 'Name'.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
matching :: (Name -> Bool) -> NameMatcher Name
Packit 899799
matching f = NameMatcher $ \name -> if f name then Just name else Nothing
Packit 899799
Packit 899799
-- | Matches any 'Name'. Returns the matched 'Name'.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
anyName :: NameMatcher Name
Packit 899799
anyName = matching (const True)
Packit 899799
Packit 899799
-- | Matches any 'Name' from the given list. Returns the matched 'Name'.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
anyOf :: [Name] -> NameMatcher Name
Packit 899799
anyOf values = matching (`elem` values)
Packit 899799
Packit 899799
Packit 899799
-- | A monad for parsing attributes. By default, it requires you to deal with
Packit 899799
-- all attributes present on an element, and will throw an exception if there
Packit 899799
-- are unhandled attributes. Use the 'requireAttr', 'attr' et al
Packit 899799
-- functions for handling an attribute, and 'ignoreAttrs' if you would like to
Packit 899799
-- skip the rest of the attributes on an element.
Packit 899799
--
Packit 899799
-- 'Alternative' instance behaves like 'First' monoid: it chooses first
Packit 899799
-- parser which doesn't fail.
Packit 899799
newtype AttrParser a = AttrParser { runAttrParser :: [(Name, [Content])] -> Either SomeException ([(Name, [Content])], a) }
Packit 899799
Packit 899799
instance Monad AttrParser where
Packit 899799
    return a = AttrParser $ \as -> Right (as, a)
Packit 899799
    (AttrParser f) >>= g = AttrParser $ \as ->
Packit 899799
        either Left (\(as', f') -> runAttrParser (g f') as') (f as)
Packit 899799
instance Functor AttrParser where
Packit 899799
    fmap = liftM
Packit 899799
instance Applicative AttrParser where
Packit 899799
    pure = return
Packit 899799
    (<*>) = ap
Packit 899799
instance Alternative AttrParser where
Packit 899799
    empty = AttrParser $ const $ Left $ toException $ XmlException "AttrParser.empty" Nothing
Packit 899799
    AttrParser f <|> AttrParser g = AttrParser $ \x ->
Packit 899799
        either (const $ g x) Right (f x)
Packit 899799
instance MonadThrow AttrParser where
Packit 899799
    throwM = AttrParser . const . throwM
Packit 899799
Packit 899799
optionalAttrRaw :: ((Name, [Content]) -> Maybe b) -> AttrParser (Maybe b)
Packit 899799
optionalAttrRaw f =
Packit 899799
    AttrParser $ go id
Packit 899799
  where
Packit 899799
    go front [] = Right (front [], Nothing)
Packit 899799
    go front (a:as) =
Packit 899799
        maybe (go (front . (:) a) as)
Packit 899799
              (\b -> Right (front as, Just b))
Packit 899799
              (f a)
Packit 899799
Packit 899799
requireAttrRaw :: String -> ((Name, [Content]) -> Maybe b) -> AttrParser b
Packit 899799
requireAttrRaw msg f = optionalAttrRaw f >>=
Packit 899799
    maybe (AttrParser $ const $ Left $ toException $ MissingAttribute msg)
Packit 899799
          return
Packit 899799
Packit 899799
-- | Return the value for an attribute if present.
Packit 899799
attr :: Name -> AttrParser (Maybe Text)
Packit 899799
attr n = optionalAttrRaw
Packit 899799
    (\(x, y) -> if x == n then Just (contentsToText y) else Nothing)
Packit 899799
Packit 899799
-- | Shortcut composition of 'force' and 'attr'.
Packit 899799
requireAttr :: Name -> AttrParser Text
Packit 899799
requireAttr n = force ("Missing attribute: " ++ show n) $ attr n
Packit 899799
Packit 899799
Packit 899799
{-# DEPRECATED optionalAttr "Please use 'attr'." #-}
Packit 899799
optionalAttr :: Name -> AttrParser (Maybe Text)
Packit 899799
optionalAttr = attr
Packit 899799
Packit 899799
contentsToText :: [Content] -> Text
Packit 899799
contentsToText = T.concat . map toText where
Packit 899799
  toText (ContentText t)   = t
Packit 899799
  toText (ContentEntity e) = T.concat ["&", e, ";"]
Packit 899799
Packit 899799
-- | Skip the remaining attributes on an element. Since this will clear the
Packit 899799
-- list of attributes, you must call this /after/ any calls to 'requireAttr',
Packit 899799
-- 'optionalAttr', etc.
Packit 899799
ignoreAttrs :: AttrParser ()
Packit 899799
ignoreAttrs = AttrParser $ const $ Right ([], ())
Packit 899799
Packit 899799
-- | Keep parsing elements as long as the parser returns 'Just'.
Packit 899799
many :: Monad m
Packit 899799
     => ConduitM Event o m (Maybe a)
Packit 899799
     -> ConduitM Event o m [a]
Packit 899799
many i = manyIgnore i $ return Nothing
Packit 899799
Packit 899799
-- | Like 'many' but discards the results without building an intermediate list.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
many_ :: MonadThrow m
Packit 899799
      => ConduitM Event o m (Maybe a)
Packit 899799
      -> ConduitM Event o m ()
Packit 899799
many_ consumer = manyIgnoreYield (return Nothing) (void <$> consumer)
Packit 899799
Packit 899799
-- | Keep parsing elements as long as the parser returns 'Just'
Packit 899799
--   or the ignore parser returns 'Just'.
Packit 899799
manyIgnore :: Monad m
Packit 899799
           => ConduitM Event o m (Maybe a)
Packit 899799
           -> ConduitM Event o m (Maybe b)
Packit 899799
           -> ConduitM Event o m [a]
Packit 899799
manyIgnore i ignored = go id where
Packit 899799
  go front = i >>= maybe (onFail front) (\y -> go $ front . (:) y)
Packit 899799
  -- onFail is called if the main parser fails
Packit 899799
  onFail front = ignored >>= maybe (return $ front []) (const $ go front)
Packit 899799
Packit 899799
-- | Like @many@, but any tags and content the consumer doesn't match on
Packit 899799
--   are silently ignored.
Packit 899799
many' :: MonadThrow m
Packit 899799
      => ConduitM Event o m (Maybe a)
Packit 899799
      -> ConduitM Event o m [a]
Packit 899799
many' consumer = manyIgnore consumer ignoreAllTreesContent
Packit 899799
Packit 899799
Packit 899799
-- | Like 'many', but uses 'yield' so the result list can be streamed
Packit 899799
--   to downstream conduits without waiting for 'manyYield' to finish
Packit 899799
manyYield :: Monad m
Packit 899799
          => ConduitM a b m (Maybe b)
Packit 899799
          -> Conduit a m b
Packit 899799
manyYield consumer = fix $ \loop ->
Packit 899799
  consumer >>= maybe (return ()) (\x -> yield x >> loop)
Packit 899799
Packit 899799
-- | Like 'manyIgnore', but uses 'yield' so the result list can be streamed
Packit 899799
--   to downstream conduits without waiting for 'manyIgnoreYield' to finish
Packit 899799
manyIgnoreYield :: MonadThrow m
Packit 899799
                => ConduitM Event b m (Maybe b) -- ^ Consuming parser that generates the result stream
Packit 899799
                -> ConduitM Event b m (Maybe ()) -- ^ Ignore parser that consumes elements to be ignored
Packit 899799
                -> Conduit Event m b
Packit 899799
manyIgnoreYield consumer ignoreParser = fix $ \loop ->
Packit 899799
  consumer >>= maybe (onFail loop) (\x -> yield x >> loop)
Packit 899799
  where onFail loop = ignoreParser >>= maybe (return ()) (const loop)
Packit 899799
Packit 899799
-- | Like 'many'', but uses 'yield' so the result list can be streamed
Packit 899799
--   to downstream conduits without waiting for 'manyYield'' to finish
Packit 899799
manyYield' :: MonadThrow m
Packit 899799
           => ConduitM Event b m (Maybe b)
Packit 899799
           -> Conduit Event m b
Packit 899799
manyYield' consumer = manyIgnoreYield consumer ignoreAllTreesContent
Packit 899799
Packit 899799
Packit 899799
-- | Stream a content 'Event'. If next event isn't a content, nothing is consumed.
Packit 899799
--
Packit 899799
-- Returns @Just ()@ if a content 'Event' was consumed, @Nothing@ otherwise.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
takeContent :: MonadThrow m => ConduitM Event Event m (Maybe ())
Packit 899799
takeContent = do
Packit 899799
  event <- await
Packit 899799
  case event of
Packit 899799
    Just e@(EventContent ContentText{}) -> yield e >> return (Just ())
Packit 899799
    Just e@EventCDATA{}                 -> yield e >> return (Just ())
Packit 899799
    Just e -> if isWhitespace e then yield e >> takeContent else leftover e >> return Nothing
Packit 899799
    _ -> return Nothing
Packit 899799
Packit 899799
-- | Stream 'Event's corresponding to a single element that matches given 'NameMatcher' and 'AttrParser', from the opening- to the closing-tag.
Packit 899799
--
Packit 899799
-- If next 'Event' isn't an element, nothing is consumed.
Packit 899799
--
Packit 899799
-- If an opening-tag is consumed but no matching closing-tag is found, an 'XmlException' is thrown.
Packit 899799
--
Packit 899799
-- This function automatically ignores comments, instructions and whitespace.
Packit 899799
--
Packit 899799
-- Returns @Just ()@ if an element was consumed, 'Nothing' otherwise.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
takeTree :: MonadThrow m => NameMatcher a -> AttrParser b -> ConduitM Event Event m (Maybe ())
Packit 899799
takeTree nameMatcher attrParser = do
Packit 899799
  event <- await
Packit 899799
  case event of
Packit 899799
    Just e@(EventBeginElement name as) -> case runNameMatcher nameMatcher name of
Packit 899799
      Just _ -> case runAttrParser attrParser as of
Packit 899799
        Right _ -> do
Packit 899799
          yield e
Packit 899799
          whileJust takeAnyTreeContent
Packit 899799
          endEvent <- await
Packit 899799
          case endEvent of
Packit 899799
            Just e'@(EventEndElement name') | name == name' -> yield e' >> return (Just ())
Packit 899799
            _ -> lift $ monadThrow $ InvalidEndElement name endEvent
Packit 899799
        _ -> leftover e >> return Nothing
Packit 899799
      _ -> leftover e >> return Nothing
Packit 899799
Packit 899799
    Just e -> if isWhitespace e then yield e >> takeTree nameMatcher attrParser else leftover e >> return Nothing
Packit 899799
    _ -> return Nothing
Packit 899799
  where
Packit 899799
    whileJust f = fix $ \loop -> f >>= maybe (return ()) (const loop)
Packit 899799
Packit 899799
-- | Like 'takeTree', but can also stream a content 'Event'.
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
takeTreeContent :: MonadThrow m
Packit 899799
                => NameMatcher a
Packit 899799
                -> AttrParser b
Packit 899799
                -> ConduitM Event Event m (Maybe ())
Packit 899799
takeTreeContent nameMatcher attrParser = runMaybeT $ MaybeT (takeTree nameMatcher attrParser) <|> MaybeT takeContent
Packit 899799
Packit 899799
-- | Like 'takeTreeContent', without checking for tag name or attributes.
Packit 899799
--
Packit 899799
-- >>> runResourceT $ parseLBS def "text" $$ takeAnyTreeContent =$= consume
Packit 899799
-- Just [ EventContent (ContentText "text") ]
Packit 899799
--
Packit 899799
-- >>> runResourceT $ parseLBS def "" $$ takeAnyTreeContent =$= consume
Packit 899799
-- Just [ ]
Packit 899799
--
Packit 899799
-- >>> runResourceT $ parseLBS def "<c></c>text" $$ takeAnyTreeContent =$= consume
Packit 899799
-- Just [ EventBeginElement "b" [], EventBeginElement "c" [], EventEndElement "c", EventEndElement "b" ]
Packit 899799
--
Packit 899799
-- Since 1.5.0
Packit 899799
takeAnyTreeContent :: MonadThrow m
Packit 899799
                => ConduitM Event Event m (Maybe ())
Packit 899799
takeAnyTreeContent = takeTreeContent anyName ignoreAttrs
Packit 899799
Packit 899799
{-# DEPRECATED takeAllTreesContent "Please use 'takeAnyTreeContent'." #-}
Packit 899799
takeAllTreesContent :: MonadThrow m => ConduitM Event Event m (Maybe ())
Packit 899799
takeAllTreesContent = takeAnyTreeContent
Packit 899799
Packit 899799
type DecodeEntities = Text -> Content
Packit 899799
Packit 899799
-- | Default implementation of 'DecodeEntities', which leaves the
Packit 899799
-- entity as-is. Numeric character references and the five standard
Packit 899799
-- entities (lt, gt, amp, quot, pos) are handled internally by the
Packit 899799
-- parser.
Packit 899799
decodeXmlEntities :: DecodeEntities
Packit 899799
decodeXmlEntities = ContentEntity
Packit 899799
Packit 899799
-- | HTML4-compliant entity decoder. Handles the additional 248
Packit 899799
-- entities defined by HTML 4 and XHTML 1.
Packit 899799
--
Packit 899799
-- Note that HTML 5 introduces a drastically larger number of entities, and
Packit 899799
-- this code does not recognize most of them.
Packit 899799
decodeHtmlEntities :: DecodeEntities
Packit 899799
decodeHtmlEntities t =
Packit 899799
  maybe (ContentEntity t) ContentText $ Map.lookup t htmlEntities
Packit 899799
Packit 899799
htmlEntities :: Map.Map T.Text T.Text
Packit 899799
htmlEntities = Map.fromList
Packit 899799
    $ map (pack *** pack) -- Work around the long-compile-time bug
Packit 899799
    [ ("nbsp", "\160")
Packit 899799
    , ("iexcl", "\161")
Packit 899799
    , ("cent", "\162")
Packit 899799
    , ("pound", "\163")
Packit 899799
    , ("curren", "\164")
Packit 899799
    , ("yen", "\165")
Packit 899799
    , ("brvbar", "\166")
Packit 899799
    , ("sect", "\167")
Packit 899799
    , ("uml", "\168")
Packit 899799
    , ("copy", "\169")
Packit 899799
    , ("ordf", "\170")
Packit 899799
    , ("laquo", "\171")
Packit 899799
    , ("not", "\172")
Packit 899799
    , ("shy", "\173")
Packit 899799
    , ("reg", "\174")
Packit 899799
    , ("macr", "\175")
Packit 899799
    , ("deg", "\176")
Packit 899799
    , ("plusmn", "\177")
Packit 899799
    , ("sup2", "\178")
Packit 899799
    , ("sup3", "\179")
Packit 899799
    , ("acute", "\180")
Packit 899799
    , ("micro", "\181")
Packit 899799
    , ("para", "\182")
Packit 899799
    , ("middot", "\183")
Packit 899799
    , ("cedil", "\184")
Packit 899799
    , ("sup1", "\185")
Packit 899799
    , ("ordm", "\186")
Packit 899799
    , ("raquo", "\187")
Packit 899799
    , ("frac14", "\188")
Packit 899799
    , ("frac12", "\189")
Packit 899799
    , ("frac34", "\190")
Packit 899799
    , ("iquest", "\191")
Packit 899799
    , ("Agrave", "\192")
Packit 899799
    , ("Aacute", "\193")
Packit 899799
    , ("Acirc", "\194")
Packit 899799
    , ("Atilde", "\195")
Packit 899799
    , ("Auml", "\196")
Packit 899799
    , ("Aring", "\197")
Packit 899799
    , ("AElig", "\198")
Packit 899799
    , ("Ccedil", "\199")
Packit 899799
    , ("Egrave", "\200")
Packit 899799
    , ("Eacute", "\201")
Packit 899799
    , ("Ecirc", "\202")
Packit 899799
    , ("Euml", "\203")
Packit 899799
    , ("Igrave", "\204")
Packit 899799
    , ("Iacute", "\205")
Packit 899799
    , ("Icirc", "\206")
Packit 899799
    , ("Iuml", "\207")
Packit 899799
    , ("ETH", "\208")
Packit 899799
    , ("Ntilde", "\209")
Packit 899799
    , ("Ograve", "\210")
Packit 899799
    , ("Oacute", "\211")
Packit 899799
    , ("Ocirc", "\212")
Packit 899799
    , ("Otilde", "\213")
Packit 899799
    , ("Ouml", "\214")
Packit 899799
    , ("times", "\215")
Packit 899799
    , ("Oslash", "\216")
Packit 899799
    , ("Ugrave", "\217")
Packit 899799
    , ("Uacute", "\218")
Packit 899799
    , ("Ucirc", "\219")
Packit 899799
    , ("Uuml", "\220")
Packit 899799
    , ("Yacute", "\221")
Packit 899799
    , ("THORN", "\222")
Packit 899799
    , ("szlig", "\223")
Packit 899799
    , ("agrave", "\224")
Packit 899799
    , ("aacute", "\225")
Packit 899799
    , ("acirc", "\226")
Packit 899799
    , ("atilde", "\227")
Packit 899799
    , ("auml", "\228")
Packit 899799
    , ("aring", "\229")
Packit 899799
    , ("aelig", "\230")
Packit 899799
    , ("ccedil", "\231")
Packit 899799
    , ("egrave", "\232")
Packit 899799
    , ("eacute", "\233")
Packit 899799
    , ("ecirc", "\234")
Packit 899799
    , ("euml", "\235")
Packit 899799
    , ("igrave", "\236")
Packit 899799
    , ("iacute", "\237")
Packit 899799
    , ("icirc", "\238")
Packit 899799
    , ("iuml", "\239")
Packit 899799
    , ("eth", "\240")
Packit 899799
    , ("ntilde", "\241")
Packit 899799
    , ("ograve", "\242")
Packit 899799
    , ("oacute", "\243")
Packit 899799
    , ("ocirc", "\244")
Packit 899799
    , ("otilde", "\245")
Packit 899799
    , ("ouml", "\246")
Packit 899799
    , ("divide", "\247")
Packit 899799
    , ("oslash", "\248")
Packit 899799
    , ("ugrave", "\249")
Packit 899799
    , ("uacute", "\250")
Packit 899799
    , ("ucirc", "\251")
Packit 899799
    , ("uuml", "\252")
Packit 899799
    , ("yacute", "\253")
Packit 899799
    , ("thorn", "\254")
Packit 899799
    , ("yuml", "\255")
Packit 899799
    , ("OElig", "\338")
Packit 899799
    , ("oelig", "\339")
Packit 899799
    , ("Scaron", "\352")
Packit 899799
    , ("scaron", "\353")
Packit 899799
    , ("Yuml", "\376")
Packit 899799
    , ("fnof", "\402")
Packit 899799
    , ("circ", "\710")
Packit 899799
    , ("tilde", "\732")
Packit 899799
    , ("Alpha", "\913")
Packit 899799
    , ("Beta", "\914")
Packit 899799
    , ("Gamma", "\915")
Packit 899799
    , ("Delta", "\916")
Packit 899799
    , ("Epsilon", "\917")
Packit 899799
    , ("Zeta", "\918")
Packit 899799
    , ("Eta", "\919")
Packit 899799
    , ("Theta", "\920")
Packit 899799
    , ("Iota", "\921")
Packit 899799
    , ("Kappa", "\922")
Packit 899799
    , ("Lambda", "\923")
Packit 899799
    , ("Mu", "\924")
Packit 899799
    , ("Nu", "\925")
Packit 899799
    , ("Xi", "\926")
Packit 899799
    , ("Omicron", "\927")
Packit 899799
    , ("Pi", "\928")
Packit 899799
    , ("Rho", "\929")
Packit 899799
    , ("Sigma", "\931")
Packit 899799
    , ("Tau", "\932")
Packit 899799
    , ("Upsilon", "\933")
Packit 899799
    , ("Phi", "\934")
Packit 899799
    , ("Chi", "\935")
Packit 899799
    , ("Psi", "\936")
Packit 899799
    , ("Omega", "\937")
Packit 899799
    , ("alpha", "\945")
Packit 899799
    , ("beta", "\946")
Packit 899799
    , ("gamma", "\947")
Packit 899799
    , ("delta", "\948")
Packit 899799
    , ("epsilon", "\949")
Packit 899799
    , ("zeta", "\950")
Packit 899799
    , ("eta", "\951")
Packit 899799
    , ("theta", "\952")
Packit 899799
    , ("iota", "\953")
Packit 899799
    , ("kappa", "\954")
Packit 899799
    , ("lambda", "\955")
Packit 899799
    , ("mu", "\956")
Packit 899799
    , ("nu", "\957")
Packit 899799
    , ("xi", "\958")
Packit 899799
    , ("omicron", "\959")
Packit 899799
    , ("pi", "\960")
Packit 899799
    , ("rho", "\961")
Packit 899799
    , ("sigmaf", "\962")
Packit 899799
    , ("sigma", "\963")
Packit 899799
    , ("tau", "\964")
Packit 899799
    , ("upsilon", "\965")
Packit 899799
    , ("phi", "\966")
Packit 899799
    , ("chi", "\967")
Packit 899799
    , ("psi", "\968")
Packit 899799
    , ("omega", "\969")
Packit 899799
    , ("thetasym", "\977")
Packit 899799
    , ("upsih", "\978")
Packit 899799
    , ("piv", "\982")
Packit 899799
    , ("ensp", "\8194")
Packit 899799
    , ("emsp", "\8195")
Packit 899799
    , ("thinsp", "\8201")
Packit 899799
    , ("zwnj", "\8204")
Packit 899799
    , ("zwj", "\8205")
Packit 899799
    , ("lrm", "\8206")
Packit 899799
    , ("rlm", "\8207")
Packit 899799
    , ("ndash", "\8211")
Packit 899799
    , ("mdash", "\8212")
Packit 899799
    , ("lsquo", "\8216")
Packit 899799
    , ("rsquo", "\8217")
Packit 899799
    , ("sbquo", "\8218")
Packit 899799
    , ("ldquo", "\8220")
Packit 899799
    , ("rdquo", "\8221")
Packit 899799
    , ("bdquo", "\8222")
Packit 899799
    , ("dagger", "\8224")
Packit 899799
    , ("Dagger", "\8225")
Packit 899799
    , ("bull", "\8226")
Packit 899799
    , ("hellip", "\8230")
Packit 899799
    , ("permil", "\8240")
Packit 899799
    , ("prime", "\8242")
Packit 899799
    , ("Prime", "\8243")
Packit 899799
    , ("lsaquo", "\8249")
Packit 899799
    , ("rsaquo", "\8250")
Packit 899799
    , ("oline", "\8254")
Packit 899799
    , ("frasl", "\8260")
Packit 899799
    , ("euro", "\8364")
Packit 899799
    , ("image", "\8465")
Packit 899799
    , ("weierp", "\8472")
Packit 899799
    , ("real", "\8476")
Packit 899799
    , ("trade", "\8482")
Packit 899799
    , ("alefsym", "\8501")
Packit 899799
    , ("larr", "\8592")
Packit 899799
    , ("uarr", "\8593")
Packit 899799
    , ("rarr", "\8594")
Packit 899799
    , ("darr", "\8595")
Packit 899799
    , ("harr", "\8596")
Packit 899799
    , ("crarr", "\8629")
Packit 899799
    , ("lArr", "\8656")
Packit 899799
    , ("uArr", "\8657")
Packit 899799
    , ("rArr", "\8658")
Packit 899799
    , ("dArr", "\8659")
Packit 899799
    , ("hArr", "\8660")
Packit 899799
    , ("forall", "\8704")
Packit 899799
    , ("part", "\8706")
Packit 899799
    , ("exist", "\8707")
Packit 899799
    , ("empty", "\8709")
Packit 899799
    , ("nabla", "\8711")
Packit 899799
    , ("isin", "\8712")
Packit 899799
    , ("notin", "\8713")
Packit 899799
    , ("ni", "\8715")
Packit 899799
    , ("prod", "\8719")
Packit 899799
    , ("sum", "\8721")
Packit 899799
    , ("minus", "\8722")
Packit 899799
    , ("lowast", "\8727")
Packit 899799
    , ("radic", "\8730")
Packit 899799
    , ("prop", "\8733")
Packit 899799
    , ("infin", "\8734")
Packit 899799
    , ("ang", "\8736")
Packit 899799
    , ("and", "\8743")
Packit 899799
    , ("or", "\8744")
Packit 899799
    , ("cap", "\8745")
Packit 899799
    , ("cup", "\8746")
Packit 899799
    , ("int", "\8747")
Packit 899799
    , ("there4", "\8756")
Packit 899799
    , ("sim", "\8764")
Packit 899799
    , ("cong", "\8773")
Packit 899799
    , ("asymp", "\8776")
Packit 899799
    , ("ne", "\8800")
Packit 899799
    , ("equiv", "\8801")
Packit 899799
    , ("le", "\8804")
Packit 899799
    , ("ge", "\8805")
Packit 899799
    , ("sub", "\8834")
Packit 899799
    , ("sup", "\8835")
Packit 899799
    , ("nsub", "\8836")
Packit 899799
    , ("sube", "\8838")
Packit 899799
    , ("supe", "\8839")
Packit 899799
    , ("oplus", "\8853")
Packit 899799
    , ("otimes", "\8855")
Packit 899799
    , ("perp", "\8869")
Packit 899799
    , ("sdot", "\8901")
Packit 899799
    , ("lceil", "\8968")
Packit 899799
    , ("rceil", "\8969")
Packit 899799
    , ("lfloor", "\8970")
Packit 899799
    , ("rfloor", "\8971")
Packit 899799
    , ("lang", "\9001")
Packit 899799
    , ("rang", "\9002")
Packit 899799
    , ("loz", "\9674")
Packit 899799
    , ("spades", "\9824")
Packit 899799
    , ("clubs", "\9827")
Packit 899799
    , ("hearts", "\9829")
Packit 899799
    , ("diams", "\9830")
Packit 899799
    ]