|
Packit |
899799 |
{-# LANGUAGE CPP #-}
|
|
Packit |
899799 |
{-# LANGUAGE DeriveDataTypeable #-}
|
|
Packit |
899799 |
{-# LANGUAGE FlexibleContexts #-}
|
|
Packit |
899799 |
{-# LANGUAGE OverloadedStrings #-}
|
|
Packit |
899799 |
{-# LANGUAGE PatternGuards #-}
|
|
Packit |
899799 |
{-# LANGUAGE RankNTypes #-}
|
|
Packit |
899799 |
-- | DOM-based parsing and rendering.
|
|
Packit |
899799 |
--
|
|
Packit |
899799 |
-- This module requires that all entities be resolved at parsing. If you need
|
|
Packit |
899799 |
-- to interact with unresolved entities, please use "Text.XML.Unresolved". This
|
|
Packit |
899799 |
-- is the recommended module for most uses cases.
|
|
Packit |
899799 |
--
|
|
Packit |
899799 |
-- While many of the datatypes in this module are simply re-exported from
|
|
Packit |
899799 |
-- @Data.XML.Types@, 'Document', 'Node' and 'Element' are all redefined here to
|
|
Packit |
899799 |
-- disallow the possibility of unresolved entities. Conversion functions are
|
|
Packit |
899799 |
-- provided to switch between the two sets of datatypes.
|
|
Packit |
899799 |
--
|
|
Packit |
899799 |
-- For simpler, bidirectional traversal of the DOM tree, see the
|
|
Packit |
899799 |
-- "Text.XML.Cursor" module.
|
|
Packit |
899799 |
module Text.XML
|
|
Packit |
899799 |
( -- * Data types
|
|
Packit |
899799 |
Document (..)
|
|
Packit |
899799 |
, Prologue (..)
|
|
Packit |
899799 |
, Instruction (..)
|
|
Packit |
899799 |
, Miscellaneous (..)
|
|
Packit |
899799 |
, Node (..)
|
|
Packit |
899799 |
, Element (..)
|
|
Packit |
899799 |
, Name (..)
|
|
Packit |
899799 |
, Doctype (..)
|
|
Packit |
899799 |
, ExternalID (..)
|
|
Packit |
899799 |
-- * Parsing
|
|
Packit |
899799 |
-- ** Files
|
|
Packit |
899799 |
, readFile
|
|
Packit |
899799 |
-- ** Bytes
|
|
Packit |
899799 |
, parseLBS
|
|
Packit |
899799 |
, parseLBS_
|
|
Packit |
899799 |
, sinkDoc
|
|
Packit |
899799 |
-- ** Text
|
|
Packit |
899799 |
, parseText
|
|
Packit |
899799 |
, parseText_
|
|
Packit |
899799 |
, sinkTextDoc
|
|
Packit |
899799 |
-- ** Other
|
|
Packit |
899799 |
, fromEvents
|
|
Packit |
899799 |
, UnresolvedEntityException (..)
|
|
Packit |
899799 |
, XMLException (..)
|
|
Packit |
899799 |
-- * Rendering
|
|
Packit |
899799 |
, writeFile
|
|
Packit |
899799 |
, renderLBS
|
|
Packit |
899799 |
, renderText
|
|
Packit |
899799 |
, renderBytes
|
|
Packit |
899799 |
-- * Settings
|
|
Packit |
899799 |
, def
|
|
Packit |
899799 |
-- ** Parsing
|
|
Packit |
899799 |
, ParseSettings
|
|
Packit |
899799 |
, psDecodeEntities
|
|
Packit |
899799 |
, P.psRetainNamespaces
|
|
Packit |
899799 |
-- *** Entity decoding
|
|
Packit |
899799 |
, P.decodeXmlEntities
|
|
Packit |
899799 |
, P.decodeHtmlEntities
|
|
Packit |
899799 |
-- ** Rendering
|
|
Packit |
899799 |
, R.RenderSettings
|
|
Packit |
899799 |
, R.rsPretty
|
|
Packit |
899799 |
, R.rsNamespaces
|
|
Packit |
899799 |
, R.rsAttrOrder
|
|
Packit |
899799 |
, R.rsUseCDATA
|
|
Packit |
899799 |
, R.rsXMLDeclaration
|
|
Packit |
899799 |
, R.orderAttrs
|
|
Packit |
899799 |
-- * Conversion
|
|
Packit |
899799 |
, toXMLDocument
|
|
Packit |
899799 |
, fromXMLDocument
|
|
Packit |
899799 |
, toXMLNode
|
|
Packit |
899799 |
, fromXMLNode
|
|
Packit |
899799 |
, toXMLElement
|
|
Packit |
899799 |
, fromXMLElement
|
|
Packit |
899799 |
) where
|
|
Packit |
899799 |
|
|
Packit |
899799 |
import Control.Applicative ((<$>))
|
|
Packit |
899799 |
import Control.DeepSeq (NFData (rnf))
|
|
Packit |
899799 |
import Control.Exception (Exception, SomeException, handle,
|
|
Packit |
899799 |
throw, throwIO)
|
|
Packit |
899799 |
import Control.Monad.ST (runST)
|
|
Packit |
899799 |
import Control.Monad.Trans.Resource (MonadThrow, monadThrow,
|
|
Packit |
899799 |
runExceptionT, runResourceT)
|
|
Packit |
899799 |
import Data.ByteString (ByteString)
|
|
Packit |
899799 |
import qualified Data.ByteString.Lazy as L
|
|
Packit |
899799 |
import Data.Data (Data)
|
|
Packit |
899799 |
import Data.Either (partitionEithers)
|
|
Packit |
899799 |
import qualified Data.Map as Map
|
|
Packit |
899799 |
import Data.Set (Set)
|
|
Packit |
899799 |
import qualified Data.Set as Set
|
|
Packit |
899799 |
import Data.Text (Text)
|
|
Packit |
899799 |
import qualified Data.Text as T
|
|
Packit |
899799 |
import Data.Typeable (Typeable)
|
|
Packit |
899799 |
import Data.XML.Types (Doctype (..), ExternalID (..),
|
|
Packit |
899799 |
Instruction (..),
|
|
Packit |
899799 |
Miscellaneous (..), Name (..),
|
|
Packit |
899799 |
Prologue (..))
|
|
Packit |
899799 |
import qualified Data.XML.Types as X
|
|
Packit |
899799 |
import Prelude hiding (readFile, writeFile)
|
|
Packit |
899799 |
import Text.XML.Stream.Parse (ParseSettings, def,
|
|
Packit |
899799 |
psDecodeEntities)
|
|
Packit |
899799 |
import qualified Text.XML.Stream.Parse as P
|
|
Packit |
899799 |
import qualified Text.XML.Stream.Render as R
|
|
Packit |
899799 |
import qualified Text.XML.Unresolved as D
|
|
Packit |
899799 |
|
|
Packit |
899799 |
import Control.Monad.Trans.Class (lift)
|
|
Packit |
899799 |
import Data.Conduit
|
|
Packit |
899799 |
import qualified Data.Conduit.Binary as CB
|
|
Packit |
899799 |
import Data.Conduit.Lazy (lazyConsume)
|
|
Packit |
899799 |
import qualified Data.Conduit.List as CL
|
|
Packit |
899799 |
import qualified Data.Text.Lazy as TL
|
|
Packit |
899799 |
import qualified Data.Text.Lazy.Encoding as TLE
|
|
Packit |
899799 |
import System.IO.Unsafe (unsafePerformIO)
|
|
Packit |
899799 |
|
|
Packit |
899799 |
import Control.Arrow (first)
|
|
Packit |
899799 |
import Data.List (foldl')
|
|
Packit |
899799 |
import Data.Monoid (mappend, mempty)
|
|
Packit |
899799 |
import Data.String (fromString)
|
|
Packit |
899799 |
import qualified Text.Blaze as B
|
|
Packit |
899799 |
import qualified Text.Blaze.Html as B
|
|
Packit |
899799 |
import qualified Text.Blaze.Html5 as B5
|
|
Packit |
899799 |
import qualified Text.Blaze.Internal as BI
|
|
Packit |
899799 |
|
|
Packit |
899799 |
data Document = Document
|
|
Packit |
899799 |
{ documentPrologue :: Prologue
|
|
Packit |
899799 |
, documentRoot :: Element
|
|
Packit |
899799 |
, documentEpilogue :: [Miscellaneous]
|
|
Packit |
899799 |
}
|
|
Packit |
899799 |
deriving (Show, Eq, Typeable, Data)
|
|
Packit |
899799 |
|
|
Packit |
899799 |
#if MIN_VERSION_containers(0, 4, 2)
|
|
Packit |
899799 |
instance NFData Document where
|
|
Packit |
899799 |
rnf (Document a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()
|
|
Packit |
899799 |
#endif
|
|
Packit |
899799 |
|
|
Packit |
899799 |
data Node
|
|
Packit |
899799 |
= NodeElement Element
|
|
Packit |
899799 |
| NodeInstruction Instruction
|
|
Packit |
899799 |
| NodeContent Text
|
|
Packit |
899799 |
| NodeComment Text
|
|
Packit |
899799 |
deriving (Show, Eq, Ord, Typeable, Data)
|
|
Packit |
899799 |
|
|
Packit |
899799 |
#if MIN_VERSION_containers(0, 4, 2)
|
|
Packit |
899799 |
instance NFData Node where
|
|
Packit |
899799 |
rnf (NodeElement e) = rnf e `seq` ()
|
|
Packit |
899799 |
rnf (NodeInstruction i) = rnf i `seq` ()
|
|
Packit |
899799 |
rnf (NodeContent t) = rnf t `seq` ()
|
|
Packit |
899799 |
rnf (NodeComment t) = rnf t `seq` ()
|
|
Packit |
899799 |
#endif
|
|
Packit |
899799 |
|
|
Packit |
899799 |
data Element = Element
|
|
Packit |
899799 |
{ elementName :: Name
|
|
Packit |
899799 |
, elementAttributes :: Map.Map Name Text
|
|
Packit |
899799 |
, elementNodes :: [Node]
|
|
Packit |
899799 |
}
|
|
Packit |
899799 |
deriving (Show, Eq, Ord, Typeable, Data)
|
|
Packit |
899799 |
|
|
Packit |
899799 |
#if MIN_VERSION_containers(0, 4, 2)
|
|
Packit |
899799 |
instance NFData Element where
|
|
Packit |
899799 |
rnf (Element a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()
|
|
Packit |
899799 |
#endif
|
|
Packit |
899799 |
|
|
Packit |
899799 |
{-
|
|
Packit |
899799 |
readFile :: FilePath -> ParseSettings -> IO (Either SomeException Document)
|
|
Packit |
899799 |
readFile_ :: FIlePath -> ParseSettings -> IO Document
|
|
Packit |
899799 |
-}
|
|
Packit |
899799 |
|
|
Packit |
899799 |
toXMLDocument :: Document -> X.Document
|
|
Packit |
899799 |
toXMLDocument = toXMLDocument' def
|
|
Packit |
899799 |
|
|
Packit |
899799 |
toXMLDocument' :: R.RenderSettings -> Document -> X.Document
|
|
Packit |
899799 |
toXMLDocument' rs (Document a b c) = X.Document a (toXMLElement' rs b) c
|
|
Packit |
899799 |
|
|
Packit |
899799 |
toXMLElement :: Element -> X.Element
|
|
Packit |
899799 |
toXMLElement = toXMLElement' def
|
|
Packit |
899799 |
|
|
Packit |
899799 |
toXMLElement' :: R.RenderSettings -> Element -> X.Element
|
|
Packit |
899799 |
toXMLElement' rs (Element name as nodes) =
|
|
Packit |
899799 |
X.Element name as' nodes'
|
|
Packit |
899799 |
where
|
|
Packit |
899799 |
as' = map (\(x, y) -> (x, [X.ContentText y])) $ R.rsAttrOrder rs name as
|
|
Packit |
899799 |
nodes' = map (toXMLNode' rs) nodes
|
|
Packit |
899799 |
|
|
Packit |
899799 |
toXMLNode :: Node -> X.Node
|
|
Packit |
899799 |
toXMLNode = toXMLNode' def
|
|
Packit |
899799 |
|
|
Packit |
899799 |
toXMLNode' :: R.RenderSettings -> Node -> X.Node
|
|
Packit |
899799 |
toXMLNode' rs (NodeElement e) = X.NodeElement $ toXMLElement' rs e
|
|
Packit |
899799 |
toXMLNode' _ (NodeContent t) = X.NodeContent $ X.ContentText t
|
|
Packit |
899799 |
toXMLNode' _ (NodeComment c) = X.NodeComment c
|
|
Packit |
899799 |
toXMLNode' _ (NodeInstruction i) = X.NodeInstruction i
|
|
Packit |
899799 |
|
|
Packit |
899799 |
fromXMLDocument :: X.Document -> Either (Set Text) Document
|
|
Packit |
899799 |
fromXMLDocument (X.Document a b c) =
|
|
Packit |
899799 |
case fromXMLElement b of
|
|
Packit |
899799 |
Left es -> Left es
|
|
Packit |
899799 |
Right b' -> Right $ Document a b' c
|
|
Packit |
899799 |
|
|
Packit |
899799 |
fromXMLElement :: X.Element -> Either (Set Text) Element
|
|
Packit |
899799 |
fromXMLElement (X.Element name as nodes) =
|
|
Packit |
899799 |
case (lnodes, las) of
|
|
Packit |
899799 |
([], []) -> Right $ Element name ras rnodes
|
|
Packit |
899799 |
(x, []) -> Left $ Set.unions x
|
|
Packit |
899799 |
([], y) -> Left $ Set.unions y
|
|
Packit |
899799 |
(x, y) -> Left $ Set.unions x `Set.union` Set.unions y
|
|
Packit |
899799 |
where
|
|
Packit |
899799 |
enodes = map fromXMLNode nodes
|
|
Packit |
899799 |
(lnodes, rnodes) = partitionEithers enodes
|
|
Packit |
899799 |
eas = map go as
|
|
Packit |
899799 |
(las, ras') = partitionEithers eas
|
|
Packit |
899799 |
ras = Map.fromList ras'
|
|
Packit |
899799 |
go (x, y) =
|
|
Packit |
899799 |
case go' [] id y of
|
|
Packit |
899799 |
Left es -> Left es
|
|
Packit |
899799 |
Right y' -> Right (x, y')
|
|
Packit |
899799 |
go' [] front [] = Right $ T.concat $ front []
|
|
Packit |
899799 |
go' errs _ [] = Left $ Set.fromList errs
|
|
Packit |
899799 |
go' errs front (X.ContentText t:ys) = go' errs (front . (:) t) ys
|
|
Packit |
899799 |
go' errs front (X.ContentEntity t:ys) = go' (t : errs) front ys
|
|
Packit |
899799 |
|
|
Packit |
899799 |
fromXMLNode :: X.Node -> Either (Set Text) Node
|
|
Packit |
899799 |
fromXMLNode (X.NodeElement e) = NodeElement <$> fromXMLElement e
|
|
Packit |
899799 |
fromXMLNode (X.NodeContent (X.ContentText t)) = Right $ NodeContent t
|
|
Packit |
899799 |
fromXMLNode (X.NodeContent (X.ContentEntity t)) = Left $ Set.singleton t
|
|
Packit |
899799 |
fromXMLNode (X.NodeComment c) = Right $ NodeComment c
|
|
Packit |
899799 |
fromXMLNode (X.NodeInstruction i) = Right $ NodeInstruction i
|
|
Packit |
899799 |
|
|
Packit |
899799 |
readFile :: ParseSettings -> FilePath -> IO Document
|
|
Packit |
899799 |
readFile ps fp = handle
|
|
Packit |
899799 |
(throwIO . InvalidXMLFile fp)
|
|
Packit |
899799 |
(runResourceT $ CB.sourceFile fp $$ sinkDoc ps)
|
|
Packit |
899799 |
|
|
Packit |
899799 |
data XMLException = InvalidXMLFile FilePath SomeException
|
|
Packit |
899799 |
deriving Typeable
|
|
Packit |
899799 |
|
|
Packit |
899799 |
instance Show XMLException where
|
|
Packit |
899799 |
show (InvalidXMLFile fp e) = concat
|
|
Packit |
899799 |
[ "Error parsing XML file "
|
|
Packit |
899799 |
, fp
|
|
Packit |
899799 |
, ": "
|
|
Packit |
899799 |
, show e
|
|
Packit |
899799 |
]
|
|
Packit |
899799 |
instance Exception XMLException
|
|
Packit |
899799 |
|
|
Packit |
899799 |
parseLBS :: ParseSettings -> L.ByteString -> Either SomeException Document
|
|
Packit |
899799 |
parseLBS ps lbs = runST
|
|
Packit |
899799 |
$ runExceptionT
|
|
Packit |
899799 |
$ CL.sourceList (L.toChunks lbs)
|
|
Packit |
899799 |
$$ sinkDoc ps
|
|
Packit |
899799 |
|
|
Packit |
899799 |
parseLBS_ :: ParseSettings -> L.ByteString -> Document
|
|
Packit |
899799 |
parseLBS_ ps = either throw id . parseLBS ps
|
|
Packit |
899799 |
|
|
Packit |
899799 |
sinkDoc :: MonadThrow m
|
|
Packit |
899799 |
=> ParseSettings
|
|
Packit |
899799 |
-> Consumer ByteString m Document
|
|
Packit |
899799 |
sinkDoc ps = P.parseBytesPos ps =$= fromEvents
|
|
Packit |
899799 |
|
|
Packit |
899799 |
parseText :: ParseSettings -> TL.Text -> Either SomeException Document
|
|
Packit |
899799 |
parseText ps tl = runST
|
|
Packit |
899799 |
$ runExceptionT
|
|
Packit |
899799 |
$ CL.sourceList (TL.toChunks tl)
|
|
Packit |
899799 |
$$ sinkTextDoc ps
|
|
Packit |
899799 |
|
|
Packit |
899799 |
parseText_ :: ParseSettings -> TL.Text -> Document
|
|
Packit |
899799 |
parseText_ ps = either throw id . parseText ps
|
|
Packit |
899799 |
|
|
Packit |
899799 |
sinkTextDoc :: MonadThrow m
|
|
Packit |
899799 |
=> ParseSettings
|
|
Packit |
899799 |
-> Consumer Text m Document
|
|
Packit |
899799 |
sinkTextDoc ps = P.parseTextPos ps =$= fromEvents
|
|
Packit |
899799 |
|
|
Packit |
899799 |
fromEvents :: MonadThrow m => Consumer P.EventPos m Document
|
|
Packit |
899799 |
fromEvents = do
|
|
Packit |
899799 |
d <- D.fromEvents
|
|
Packit |
899799 |
either (lift . monadThrow . UnresolvedEntityException) return $ fromXMLDocument d
|
|
Packit |
899799 |
|
|
Packit |
899799 |
data UnresolvedEntityException = UnresolvedEntityException (Set Text)
|
|
Packit |
899799 |
deriving (Show, Typeable)
|
|
Packit |
899799 |
instance Exception UnresolvedEntityException
|
|
Packit |
899799 |
|
|
Packit |
899799 |
--renderBytes :: MonadUnsafeIO m => R.RenderSettings -> Document -> Producer m ByteString
|
|
Packit |
899799 |
renderBytes rs doc = D.renderBytes rs $ toXMLDocument' rs doc
|
|
Packit |
899799 |
|
|
Packit |
899799 |
writeFile :: R.RenderSettings -> FilePath -> Document -> IO ()
|
|
Packit |
899799 |
writeFile rs fp doc =
|
|
Packit |
899799 |
runResourceT $ renderBytes rs doc $$ CB.sinkFile fp
|
|
Packit |
899799 |
|
|
Packit |
899799 |
renderLBS :: R.RenderSettings -> Document -> L.ByteString
|
|
Packit |
899799 |
renderLBS rs doc =
|
|
Packit |
899799 |
L.fromChunks $ unsafePerformIO
|
|
Packit |
899799 |
-- not generally safe, but we know that runResourceT
|
|
Packit |
899799 |
-- will not deallocate any of the resources being used
|
|
Packit |
899799 |
-- by the process
|
|
Packit |
899799 |
$ lazyConsume
|
|
Packit |
899799 |
$ renderBytes rs doc
|
|
Packit |
899799 |
|
|
Packit |
899799 |
renderText :: R.RenderSettings -> Document -> TL.Text
|
|
Packit |
899799 |
renderText rs = TLE.decodeUtf8 . renderLBS rs
|
|
Packit |
899799 |
|
|
Packit |
899799 |
instance B.ToMarkup Document where
|
|
Packit |
899799 |
toMarkup (Document _ root _) = B5.docType >> B.toMarkup root
|
|
Packit |
899799 |
|
|
Packit |
899799 |
-- | Note that the special element name
|
|
Packit |
899799 |
-- @{http://www.snoyman.com/xml2html}ie-cond@ with the single attribute @cond@
|
|
Packit |
899799 |
-- is used to indicate an IE conditional comment.
|
|
Packit |
899799 |
instance B.ToMarkup Element where
|
|
Packit |
899799 |
toMarkup (Element "{http://www.snoyman.com/xml2html}ie-cond" attrs children)
|
|
Packit |
899799 |
| [("cond", cond)] <- Map.toList attrs =
|
|
Packit |
899799 |
B.preEscapedToMarkup ("
|
|
Packit |
899799 |
`mappend` B.preEscapedToMarkup cond
|
|
Packit |
899799 |
`mappend` B.preEscapedToMarkup ("]>" :: T.Text)
|
|
Packit |
899799 |
`mappend` mapM_ B.toMarkup children
|
|
Packit |
899799 |
`mappend` B.preEscapedToMarkup ("" :: T.Text)
|
|
Packit |
899799 |
|
|
Packit |
899799 |
toMarkup (Element name' attrs children) =
|
|
Packit |
899799 |
if isVoid
|
|
Packit |
899799 |
then foldl' (B.!) leaf attrs'
|
|
Packit |
899799 |
else foldl' (B.!) parent attrs' childrenHtml
|
|
Packit |
899799 |
where
|
|
Packit |
899799 |
childrenHtml :: B.Html
|
|
Packit |
899799 |
childrenHtml =
|
|
Packit |
899799 |
case (name `elem` ["style", "script"], children) of
|
|
Packit |
899799 |
(True, [NodeContent t]) -> B.preEscapedToMarkup t
|
|
Packit |
899799 |
_ -> mapM_ B.toMarkup children
|
|
Packit |
899799 |
|
|
Packit |
899799 |
isVoid = nameLocalName name' `Set.member` voidElems
|
|
Packit |
899799 |
|
|
Packit |
899799 |
parent :: B.Html -> B.Html
|
|
Packit |
899799 |
parent = BI.Parent tag open close
|
|
Packit |
899799 |
leaf :: B.Html
|
|
Packit |
899799 |
#if MIN_VERSION_blaze_markup(0,8,0)
|
|
Packit |
899799 |
leaf = BI.Leaf tag open (fromString " />") ()
|
|
Packit |
899799 |
#else
|
|
Packit |
899799 |
leaf = BI.Leaf tag open (fromString " />")
|
|
Packit |
899799 |
#endif
|
|
Packit |
899799 |
|
|
Packit |
899799 |
name = T.unpack $ nameLocalName name'
|
|
Packit |
899799 |
tag = fromString name
|
|
Packit |
899799 |
open = fromString $ '<' : name
|
|
Packit |
899799 |
close = fromString $ concat ["</", name, ">"]
|
|
Packit |
899799 |
|
|
Packit |
899799 |
attrs' :: [B.Attribute]
|
|
Packit |
899799 |
attrs' = map (goAttr . first nameLocalName) $ Map.toList attrs
|
|
Packit |
899799 |
goAttr (key, value) = B.customAttribute (B.textTag key) $ B.toValue value
|
|
Packit |
899799 |
|
|
Packit |
899799 |
instance B.ToMarkup Node where
|
|
Packit |
899799 |
toMarkup (NodeElement e) = B.toMarkup e
|
|
Packit |
899799 |
toMarkup (NodeContent t) = B.toMarkup t
|
|
Packit |
899799 |
toMarkup _ = mempty
|
|
Packit |
899799 |
|
|
Packit |
899799 |
voidElems :: Set.Set T.Text
|
|
Packit |
899799 |
voidElems = Set.fromAscList $ T.words $ T.pack "area base br col command embed hr img input keygen link meta param source track wbr"
|