Blame src/Text/Pandoc/Writers/TEI.hs

Packit Service d2f85f
{-# LANGUAGE OverloadedStrings #-}
Packit Service d2f85f
{-# LANGUAGE PatternGuards     #-}
Packit Service d2f85f
{-
Packit Service d2f85f
Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
Packit Service d2f85f
Packit Service d2f85f
This program is free software; you can redistribute it and/or modify
Packit Service d2f85f
it under the terms of the GNU General Public License as published by
Packit Service d2f85f
the Free Software Foundation; either version 2 of the License, or
Packit Service d2f85f
(at your option) any later version.
Packit Service d2f85f
Packit Service d2f85f
This program is distributed in the hope that it will be useful,
Packit Service d2f85f
but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service d2f85f
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service d2f85f
GNU General Public License for more details.
Packit Service d2f85f
Packit Service d2f85f
You should have received a copy of the GNU General Public License
Packit Service d2f85f
along with this program; if not, write to the Free Software
Packit Service d2f85f
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
Packit Service d2f85f
-}
Packit Service d2f85f
Packit Service d2f85f
{- |
Packit Service d2f85f
   Module      : Text.Pandoc.Writers.Docbook
Packit Service d2f85f
   Copyright   : Copyright (C) 2006-2017 John MacFarlane
Packit Service d2f85f
   License     : GNU GPL, version 2 or above
Packit Service d2f85f
Packit Service d2f85f
   Maintainer  : John MacFarlane <jgm@berkeley.edu>
Packit Service d2f85f
   Stability   : alpha
Packit Service d2f85f
   Portability : portable
Packit Service d2f85f
Packit Service d2f85f
Conversion of 'Pandoc' documents to Docbook XML.
Packit Service d2f85f
-}
Packit Service d2f85f
module Text.Pandoc.Writers.TEI (writeTEI) where
Packit Service d2f85f
import Data.Char (toLower)
Packit Service d2f85f
import Data.List (isPrefixOf, stripPrefix)
Packit Service d2f85f
import Data.Text (Text)
Packit Service d2f85f
import qualified Text.Pandoc.Builder as B
Packit Service d2f85f
import Text.Pandoc.Class (PandocMonad, report)
Packit Service d2f85f
import Text.Pandoc.Definition
Packit Service d2f85f
import Text.Pandoc.Highlighting (languages, languagesByExtension)
Packit Service d2f85f
import Text.Pandoc.ImageSize
Packit Service d2f85f
import Text.Pandoc.Logging
Packit Service d2f85f
import Text.Pandoc.Options
Packit Service d2f85f
import Text.Pandoc.Pretty
Packit Service d2f85f
import Text.Pandoc.Shared
Packit Service d2f85f
import Text.Pandoc.Templates (renderTemplate')
Packit Service d2f85f
import Text.Pandoc.Writers.Shared
Packit Service d2f85f
import Text.Pandoc.XML
Packit Service d2f85f
Packit Service d2f85f
-- | Convert list of authors to a docbook <author> section
Packit Service d2f85f
authorToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m B.Inlines
Packit Service d2f85f
authorToTEI opts name' = do
Packit Service d2f85f
  name <- render Nothing <$> inlinesToTEI opts name'
Packit Service d2f85f
  let colwidth = if writerWrapText opts == WrapAuto
Packit Service d2f85f
                    then Just $ writerColumns opts
Packit Service d2f85f
                    else Nothing
Packit Service d2f85f
  return $ B.rawInline "tei" $ render colwidth $
Packit Service d2f85f
      inTagsSimple "author" (text $ escapeStringForXML name)
Packit Service d2f85f
Packit Service d2f85f
-- | Convert Pandoc document to string in Docbook format.
Packit Service d2f85f
writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m Text
Packit Service d2f85f
writeTEI opts (Pandoc meta blocks) = do
Packit Service d2f85f
  let elements = hierarchicalize blocks
Packit Service d2f85f
      colwidth = if writerWrapText opts == WrapAuto
Packit Service d2f85f
                    then Just $ writerColumns opts
Packit Service d2f85f
                    else Nothing
Packit Service d2f85f
      render' :: Doc -> Text
Packit Service d2f85f
      render' = render colwidth
Packit Service d2f85f
      startLvl = case writerTopLevelDivision opts of
Packit Service d2f85f
                   TopLevelPart    -> -1
Packit Service d2f85f
                   TopLevelChapter -> 0
Packit Service d2f85f
                   TopLevelSection -> 1
Packit Service d2f85f
                   TopLevelDefault -> 1
Packit Service d2f85f
  auths'      <- mapM (authorToTEI opts) $ docAuthors meta
Packit Service d2f85f
  let meta'    = B.setMeta "author" auths' meta
Packit Service d2f85f
  metadata <- metaToJSON opts
Packit Service d2f85f
                 (fmap (render' . vcat) .
Packit Service d2f85f
                   mapM (elementToTEI opts startLvl) . hierarchicalize)
Packit Service d2f85f
                 (fmap render' . inlinesToTEI opts)
Packit Service d2f85f
                 meta'
Packit Service d2f85f
  main    <- (render' . vcat) <$> mapM (elementToTEI opts startLvl) elements
Packit Service d2f85f
  let context = defField "body" main
Packit Service d2f85f
              $
Packit Service d2f85f
                  defField "mathml" (case writerHTMLMathMethod opts of
Packit Service d2f85f
                                          MathML -> True
Packit Service d2f85f
                                          _      -> False) metadata
Packit Service d2f85f
  case writerTemplate opts of
Packit Service d2f85f
       Nothing  -> return main
Packit Service d2f85f
       Just tpl -> renderTemplate' tpl context
Packit Service d2f85f
Packit Service d2f85f
-- | Convert an Element to TEI.
Packit Service d2f85f
elementToTEI :: PandocMonad m => WriterOptions -> Int -> Element -> m Doc
Packit Service d2f85f
elementToTEI opts _   (Blk block) = blockToTEI opts block
Packit Service d2f85f
elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) = do
Packit Service d2f85f
  -- TEI doesn't allow sections with no content, so insert some if needed
Packit Service d2f85f
  let elements' = if null elements
Packit Service d2f85f
                    then [Blk (Para [])]
Packit Service d2f85f
                    else elements
Packit Service d2f85f
      -- level numbering correspond to LaTeX internals
Packit Service d2f85f
      divType = case lvl of
Packit Service d2f85f
                 n | n == -1          -> "part"
Packit Service d2f85f
                   | n == 0           -> "chapter"
Packit Service d2f85f
                   | n >= 1 && n <= 5 -> "level" ++ show n
Packit Service d2f85f
                   | otherwise        -> "section"
Packit Service d2f85f
  contents <- vcat <$> mapM (elementToTEI opts (lvl + 1)) elements'
Packit Service d2f85f
  titleContents <- inlinesToTEI opts title
Packit Service d2f85f
  return $ inTags True "div" (("type", divType) :
Packit Service d2f85f
    [("id", writerIdentifierPrefix opts ++ id') | not (null id')]) $
Packit Service d2f85f
      inTagsSimple "head" titleContents $$ contents
Packit Service d2f85f
Packit Service d2f85f
-- | Convert a list of Pandoc blocks to TEI.
Packit Service d2f85f
blocksToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc
Packit Service d2f85f
blocksToTEI opts bs = vcat <$> mapM (blockToTEI opts) bs
Packit Service d2f85f
Packit Service d2f85f
-- | Auxiliary function to convert Plain block to Para.
Packit Service d2f85f
plainToPara :: Block -> Block
Packit Service d2f85f
plainToPara (Plain x) = Para x
Packit Service d2f85f
plainToPara x         = x
Packit Service d2f85f
Packit Service d2f85f
-- | Convert a list of pairs of terms and definitions into a TEI
Packit Service d2f85f
-- list with labels and items.
Packit Service d2f85f
deflistItemsToTEI :: PandocMonad m
Packit Service d2f85f
                  => WriterOptions -> [([Inline],[[Block]])] -> m Doc
Packit Service d2f85f
deflistItemsToTEI opts items =
Packit Service d2f85f
 vcat <$> mapM (uncurry (deflistItemToTEI opts)) items
Packit Service d2f85f
Packit Service d2f85f
-- | Convert a term and a list of blocks into a TEI varlistentry.
Packit Service d2f85f
deflistItemToTEI :: PandocMonad m
Packit Service d2f85f
                 => WriterOptions -> [Inline] -> [[Block]] -> m Doc
Packit Service d2f85f
deflistItemToTEI opts term defs = do
Packit Service d2f85f
  term' <- inlinesToTEI opts term
Packit Service d2f85f
  defs' <- blocksToTEI opts $ concatMap (map plainToPara) defs
Packit Service d2f85f
  return $ inTagsIndented "label" term' $$
Packit Service d2f85f
           inTagsIndented "item" defs'
Packit Service d2f85f
Packit Service d2f85f
-- | Convert a list of lists of blocks to a list of TEI list items.
Packit Service d2f85f
listItemsToTEI :: PandocMonad m => WriterOptions -> [[Block]] -> m Doc
Packit Service d2f85f
listItemsToTEI opts items = vcat <$> mapM (listItemToTEI opts) items
Packit Service d2f85f
Packit Service d2f85f
-- | Convert a list of blocks into a TEI list item.
Packit Service d2f85f
listItemToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc
Packit Service d2f85f
listItemToTEI opts item =
Packit Service d2f85f
  inTagsIndented "item" <$> blocksToTEI opts (map plainToPara item)
Packit Service d2f85f
Packit Service d2f85f
imageToTEI :: PandocMonad m => WriterOptions -> Attr -> String -> m Doc
Packit Service d2f85f
imageToTEI _ attr src = return $ selfClosingTag "graphic" $
Packit Service d2f85f
  ("url", src) : idAndRole attr ++ dims
Packit Service d2f85f
  where
Packit Service d2f85f
    dims = go Width "width" ++ go Height "depth"
Packit Service d2f85f
    go dir dstr = case dimension dir attr of
Packit Service d2f85f
                    Just a  -> [(dstr, show a)]
Packit Service d2f85f
                    Nothing -> []
Packit Service d2f85f
Packit Service d2f85f
-- | Convert a Pandoc block element to TEI.
Packit Service d2f85f
blockToTEI :: PandocMonad m => WriterOptions -> Block -> m Doc
Packit Service d2f85f
blockToTEI _ Null = return empty
Packit Service d2f85f
-- Add ids to paragraphs in divs with ids - this is needed for
Packit Service d2f85f
-- pandoc-citeproc to get link anchors in bibliographies:
Packit Service d2f85f
blockToTEI opts (Div (ident,_,_) [Para lst]) = do
Packit Service d2f85f
  let attribs = [("id", ident) | not (null ident)]
Packit Service d2f85f
  inTags False "p" attribs <$> inlinesToTEI opts lst
Packit Service d2f85f
blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs
Packit Service d2f85f
blockToTEI _ h@Header{} = do
Packit Service d2f85f
  -- should not occur after hierarchicalize, except inside lists/blockquotes
Packit Service d2f85f
  report $ BlockNotRendered h
Packit Service d2f85f
  return empty
Packit Service d2f85f
-- For TEI simple, text must be within containing block element, so
Packit Service d2f85f
-- we use treat as Para to ensure that Plain text ends up contained by
Packit Service d2f85f
-- something:
Packit Service d2f85f
blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst
Packit Service d2f85f
-- title beginning with fig: indicates that the image is a figure
Packit Service d2f85f
--blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) =
Packit Service d2f85f
--  let alt  = inlinesToTEI opts txt
Packit Service d2f85f
--      capt = if null txt
Packit Service d2f85f
--                then empty
Packit Service d2f85f
--                else inTagsSimple "title" alt
Packit Service d2f85f
--  in  inTagsIndented "figure" $
Packit Service d2f85f
--        capt $$
Packit Service d2f85f
--        (inTagsIndented "mediaobject" $
Packit Service d2f85f
--           (inTagsIndented "imageobject"
Packit Service d2f85f
--             (imageToTEI opts attr src)) $$
Packit Service d2f85f
--           inTagsSimple "textobject" (inTagsSimple "phrase" alt))
Packit Service d2f85f
blockToTEI opts (Para lst) =
Packit Service d2f85f
  inTags False "p" [] <$> inlinesToTEI opts lst
Packit Service d2f85f
blockToTEI opts (LineBlock lns) =
Packit Service d2f85f
  blockToTEI opts $ linesToPara lns
Packit Service d2f85f
blockToTEI opts (BlockQuote blocks) =
Packit Service d2f85f
  inTagsIndented "quote" <$> blocksToTEI opts blocks
Packit Service d2f85f
blockToTEI _ (CodeBlock (_,classes,_) str) =
Packit Service d2f85f
  return $ text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <>
Packit Service d2f85f
     flush (text (escapeStringForXML str) <> cr <> text "</ab>")
Packit Service d2f85f
    where lang  = if null langs
Packit Service d2f85f
                     then ""
Packit Service d2f85f
                     else escapeStringForXML (head langs)
Packit Service d2f85f
          isLang l    = map toLower l `elem` map (map toLower) languages
Packit Service d2f85f
          langsFrom s = if isLang s
Packit Service d2f85f
                           then [s]
Packit Service d2f85f
                           else languagesByExtension . map toLower $ s
Packit Service d2f85f
          langs       = concatMap langsFrom classes
Packit Service d2f85f
blockToTEI opts (BulletList lst) = do
Packit Service d2f85f
  let attribs = [("type", "unordered")]
Packit Service d2f85f
  inTags True "list" attribs <$> listItemsToTEI opts lst
Packit Service d2f85f
blockToTEI _ (OrderedList _ []) = return empty
Packit Service d2f85f
blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = do
Packit Service d2f85f
  let attribs = case numstyle of
Packit Service d2f85f
                       DefaultStyle -> []
Packit Service d2f85f
                       Decimal      -> [("type", "ordered:arabic")]
Packit Service d2f85f
                       Example      -> [("type", "ordered:arabic")]
Packit Service d2f85f
                       UpperAlpha   -> [("type", "ordered:upperalpha")]
Packit Service d2f85f
                       LowerAlpha   -> [("type", "ordered:loweralpha")]
Packit Service d2f85f
                       UpperRoman   -> [("type", "ordered:upperroman")]
Packit Service d2f85f
                       LowerRoman   -> [("type", "ordered:lowerroman")]
Packit Service d2f85f
  items <- if start == 1
Packit Service d2f85f
              then listItemsToTEI opts (first:rest)
Packit Service d2f85f
              else do
Packit Service d2f85f
                fi <- blocksToTEI opts $ map plainToPara first
Packit Service d2f85f
                re <- listItemsToTEI opts rest
Packit Service d2f85f
                return $ inTags True "item" [("n",show start)] fi $$ re
Packit Service d2f85f
  return $ inTags True "list" attribs items
Packit Service d2f85f
blockToTEI opts (DefinitionList lst) = do
Packit Service d2f85f
  let attribs = [("type", "definition")]
Packit Service d2f85f
  inTags True "list" attribs <$> deflistItemsToTEI opts lst
Packit Service d2f85f
blockToTEI _ b@(RawBlock f str)
Packit Service d2f85f
  | f == "tei"     = return $ text str
Packit Service d2f85f
  -- raw TEI block (should such a thing exist).
Packit Service d2f85f
  | otherwise      = do
Packit Service d2f85f
    report $ BlockNotRendered b
Packit Service d2f85f
    return empty
Packit Service d2f85f
blockToTEI _ HorizontalRule = return $
Packit Service d2f85f
  selfClosingTag "milestone" [("unit","undefined")
Packit Service d2f85f
                             ,("type","separator")
Packit Service d2f85f
                             ,("rendition","line")]
Packit Service d2f85f
Packit Service d2f85f
-- | TEI Tables
Packit Service d2f85f
-- TEI Simple's tables are composed of cells and rows; other
Packit Service d2f85f
-- table info in the AST is here lossily discard.
Packit Service d2f85f
blockToTEI opts (Table _ _ _ headers rows) = do
Packit Service d2f85f
  headers' <- tableHeadersToTEI opts headers
Packit Service d2f85f
  rows' <- mapM (tableRowToTEI opts) rows
Packit Service d2f85f
  return $ inTags True "table" [] $ headers' $$ vcat rows'
Packit Service d2f85f
Packit Service d2f85f
tableRowToTEI :: PandocMonad m
Packit Service d2f85f
              => WriterOptions
Packit Service d2f85f
              -> [[Block]]
Packit Service d2f85f
              -> m Doc
Packit Service d2f85f
tableRowToTEI opts cols =
Packit Service d2f85f
  (inTagsIndented "row" . vcat) <$> mapM (tableItemToTEI opts) cols
Packit Service d2f85f
Packit Service d2f85f
tableHeadersToTEI :: PandocMonad m
Packit Service d2f85f
                  => WriterOptions
Packit Service d2f85f
                  -> [[Block]]
Packit Service d2f85f
                  -> m Doc
Packit Service d2f85f
tableHeadersToTEI opts cols =
Packit Service d2f85f
  (inTags True "row" [("role","label")] . vcat) <$>
Packit Service d2f85f
    mapM (tableItemToTEI opts) cols
Packit Service d2f85f
Packit Service d2f85f
tableItemToTEI :: PandocMonad m
Packit Service d2f85f
               => WriterOptions
Packit Service d2f85f
               -> [Block]
Packit Service d2f85f
               -> m Doc
Packit Service d2f85f
tableItemToTEI opts item =
Packit Service d2f85f
  (inTags False "cell" [] . vcat) <$> mapM (blockToTEI opts) item
Packit Service d2f85f
Packit Service d2f85f
-- | Convert a list of inline elements to TEI.
Packit Service d2f85f
inlinesToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m Doc
Packit Service d2f85f
inlinesToTEI opts lst = hcat <$> mapM (inlineToTEI opts) lst
Packit Service d2f85f
Packit Service d2f85f
-- | Convert an inline element to TEI.
Packit Service d2f85f
inlineToTEI :: PandocMonad m => WriterOptions -> Inline -> m Doc
Packit Service d2f85f
inlineToTEI _ (Str str) = return $ text $ escapeStringForXML str
Packit Service d2f85f
inlineToTEI opts (Emph lst) =
Packit Service d2f85f
  inTags False "hi" [("rendition","simple:italic")] <$> inlinesToTEI opts lst
Packit Service d2f85f
inlineToTEI opts (Strong lst) =
Packit Service d2f85f
  inTags False "hi" [("rendition", "simple:bold")] <$> inlinesToTEI opts lst
Packit Service d2f85f
inlineToTEI opts (Strikeout lst) =
Packit Service d2f85f
  inTags False "hi" [("rendition", "simple:strikethrough")] <$>
Packit Service d2f85f
  inlinesToTEI opts lst
Packit Service d2f85f
inlineToTEI opts (Superscript lst) =
Packit Service d2f85f
  inTags False "hi" [("rendition", "simple:superscript")] <$>
Packit Service d2f85f
    inlinesToTEI opts lst
Packit Service d2f85f
inlineToTEI opts (Subscript lst) =
Packit Service d2f85f
  inTags False "hi" [("rendition", "simple:subscript")] <$>
Packit Service d2f85f
    inlinesToTEI opts lst
Packit Service d2f85f
inlineToTEI opts (SmallCaps lst) =
Packit Service d2f85f
  inTags False "hi" [("rendition", "simple:smallcaps")] <$>
Packit Service d2f85f
    inlinesToTEI opts lst
Packit Service d2f85f
inlineToTEI opts (Quoted _ lst) =
Packit Service d2f85f
  inTagsSimple "quote" <$> inlinesToTEI opts lst
Packit Service d2f85f
inlineToTEI opts (Cite _ lst) =
Packit Service d2f85f
  inlinesToTEI opts lst
Packit Service d2f85f
inlineToTEI opts (Span _ ils) =
Packit Service d2f85f
  inlinesToTEI opts ils
Packit Service d2f85f
inlineToTEI _ (Code _ str) = return $
Packit Service d2f85f
  inTags False "seg" [("type","code")] $ text (escapeStringForXML str)
Packit Service d2f85f
-- Distinguish display from inline math by wrapping the former in a "figure."
Packit Service d2f85f
inlineToTEI _ (Math t str) = return $
Packit Service d2f85f
  case t of
Packit Service d2f85f
    InlineMath  -> inTags False "formula" [("notation","TeX")] $
Packit Service d2f85f
                   text str
Packit Service d2f85f
    DisplayMath -> inTags True "figure" [("type","math")] $
Packit Service d2f85f
                   inTags False "formula" [("notation","TeX")] $ text str
Packit Service d2f85f
Packit Service d2f85f
inlineToTEI _ il@(RawInline f x) | f == "tei"     = return $ text x
Packit Service d2f85f
                                 | otherwise      = empty <$
Packit Service d2f85f
                                     report (InlineNotRendered il)
Packit Service d2f85f
inlineToTEI _ LineBreak = return $ selfClosingTag "lb" []
Packit Service d2f85f
inlineToTEI _ Space =
Packit Service d2f85f
            return space
Packit Service d2f85f
-- because we use \n for LineBreak, we can't do soft breaks:
Packit Service d2f85f
inlineToTEI _ SoftBreak =
Packit Service d2f85f
            return space
Packit Service d2f85f
inlineToTEI opts (Link attr txt (src, _))
Packit Service d2f85f
  | Just email <- stripPrefix "mailto:" src = do
Packit Service d2f85f
      let emailLink = text $
Packit Service d2f85f
                      escapeStringForXML email
Packit Service d2f85f
      case txt of
Packit Service d2f85f
           [Str s] | escapeURI s == email ->
Packit Service d2f85f
                       return emailLink
Packit Service d2f85f
           _             -> do
Packit Service d2f85f
              linktext <- inlinesToTEI opts txt
Packit Service d2f85f
              return $ linktext <+> char '(' <> emailLink <> char ')'
Packit Service d2f85f
  | otherwise =
Packit Service d2f85f
      (if "#" `isPrefixOf` src
Packit Service d2f85f
            then inTags False "ref" $ ("target", drop 1 src) : idAndRole attr
Packit Service d2f85f
            else inTags False "ref" $ ("target", src) : idAndRole attr ) <$>
Packit Service d2f85f
        inlinesToTEI opts txt
Packit Service d2f85f
inlineToTEI opts (Image attr description (src, tit)) = do
Packit Service d2f85f
  let titleDoc = if null tit
Packit Service d2f85f
                   then empty
Packit Service d2f85f
                   else inTags False "figDesc" []
Packit Service d2f85f
                           (text $ escapeStringForXML tit)
Packit Service d2f85f
  imageDesc <- if null description
Packit Service d2f85f
                  then return empty
Packit Service d2f85f
                  else inTags False "head" []
Packit Service d2f85f
                         <$> inlinesToTEI opts description
Packit Service d2f85f
  img <- imageToTEI opts attr src
Packit Service d2f85f
  return $ inTagsIndented "figure" $ imageDesc $$ img $$ titleDoc
Packit Service d2f85f
inlineToTEI opts (Note contents) =
Packit Service d2f85f
  inTagsIndented "note" <$> blocksToTEI opts contents
Packit Service d2f85f
Packit Service d2f85f
idAndRole :: Attr -> [(String, String)]
Packit Service d2f85f
idAndRole (id',cls,_) = ident ++ role
Packit Service d2f85f
  where
Packit Service d2f85f
    ident = if null id'
Packit Service d2f85f
               then []
Packit Service d2f85f
               else [("id", id')]
Packit Service d2f85f
    role  = if null cls
Packit Service d2f85f
               then []
Packit Service d2f85f
               else [("role", unwords cls)]