Blame src/Text/Pandoc/Readers/Docx/Lists.hs

Packit Service d2f85f
{-
Packit Service d2f85f
Copyright (C) 2014-2017 Jesse Rosenthal <jrosenthal@jhu.edu>
Packit Service d2f85f
Packit Service d2f85f
This program is free software; you can redistribute it and/or modify
Packit Service d2f85f
it under the terms of the GNU General Public License as published by
Packit Service d2f85f
the Free Software Foundation; either version 2 of the License, or
Packit Service d2f85f
(at your option) any later version.
Packit Service d2f85f
Packit Service d2f85f
This program is distributed in the hope that it will be useful,
Packit Service d2f85f
but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service d2f85f
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service d2f85f
GNU General Public License for more details.
Packit Service d2f85f
Packit Service d2f85f
You should have received a copy of the GNU General Public License
Packit Service d2f85f
along with this program; if not, write to the Free Software
Packit Service d2f85f
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
Packit Service d2f85f
-}
Packit Service d2f85f
Packit Service d2f85f
{- |
Packit Service d2f85f
   Module      : Text.Pandoc.Readers.Docx.Lists
Packit Service d2f85f
   Copyright   : Copyright (C) 2014-2017 Jesse Rosenthal
Packit Service d2f85f
   License     : GNU GPL, version 2 or above
Packit Service d2f85f
Packit Service d2f85f
   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
Packit Service d2f85f
   Stability   : alpha
Packit Service d2f85f
   Portability : portable
Packit Service d2f85f
Packit Service d2f85f
Functions for converting flat docx paragraphs into nested lists.
Packit Service d2f85f
-}
Packit Service d2f85f
Packit Service d2f85f
module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
Packit Service d2f85f
                                      , blocksToDefinitions
Packit Service d2f85f
                                      , listParagraphDivs
Packit Service d2f85f
                                      ) where
Packit Service d2f85f
Packit Service d2f85f
import Data.List
Packit Service d2f85f
import Data.Maybe
Packit Service d2f85f
import Text.Pandoc.Generic (bottomUp)
Packit Service d2f85f
import Text.Pandoc.JSON
Packit Service d2f85f
import Text.Pandoc.Shared (trim)
Packit Service d2f85f
Packit Service d2f85f
isListItem :: Block -> Bool
Packit Service d2f85f
isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True
Packit Service d2f85f
isListItem _                       = False
Packit Service d2f85f
Packit Service d2f85f
getLevel :: Block -> Maybe Integer
Packit Service d2f85f
getLevel (Div (_, _, kvs) _) =  fmap read $ lookup "level" kvs
Packit Service d2f85f
getLevel _                   = Nothing
Packit Service d2f85f
Packit Service d2f85f
getLevelN :: Block -> Integer
Packit Service d2f85f
getLevelN b = fromMaybe (-1) (getLevel b)
Packit Service d2f85f
Packit Service d2f85f
getNumId :: Block -> Maybe Integer
Packit Service d2f85f
getNumId (Div (_, _, kvs) _) =  fmap read $ lookup "num-id" kvs
Packit Service d2f85f
getNumId _                   = Nothing
Packit Service d2f85f
Packit Service d2f85f
getNumIdN :: Block -> Integer
Packit Service d2f85f
getNumIdN b = fromMaybe (-1) (getNumId b)
Packit Service d2f85f
Packit Service d2f85f
getText :: Block -> Maybe String
Packit Service d2f85f
getText (Div (_, _, kvs) _) = lookup "text" kvs
Packit Service d2f85f
getText _                   = Nothing
Packit Service d2f85f
Packit Service d2f85f
data ListType = Itemized | Enumerated ListAttributes
Packit Service d2f85f
Packit Service d2f85f
listStyleMap :: [(String, ListNumberStyle)]
Packit Service d2f85f
listStyleMap = [("upperLetter", UpperAlpha),
Packit Service d2f85f
                ("lowerLetter", LowerAlpha),
Packit Service d2f85f
                ("upperRoman", UpperRoman),
Packit Service d2f85f
                ("lowerRoman", LowerRoman),
Packit Service d2f85f
                ("decimal", Decimal)]
Packit Service d2f85f
Packit Service d2f85f
listDelimMap :: [(String, ListNumberDelim)]
Packit Service d2f85f
listDelimMap = [("%1)", OneParen),
Packit Service d2f85f
                ("(%1)", TwoParens),
Packit Service d2f85f
                ("%1.", Period)]
Packit Service d2f85f
Packit Service d2f85f
getListType :: Block -> Maybe ListType
Packit Service d2f85f
getListType b@(Div (_, _, kvs) _) | isListItem b =
Packit Service d2f85f
  let
Packit Service d2f85f
    start = lookup "start" kvs
Packit Service d2f85f
    frmt = lookup "format" kvs
Packit Service d2f85f
    txt  = lookup "text" kvs
Packit Service d2f85f
  in
Packit Service d2f85f
   case frmt of
Packit Service d2f85f
     Just "bullet" -> Just Itemized
Packit Service d2f85f
     Just f        ->
Packit Service d2f85f
       case txt of
Packit Service d2f85f
         Just t -> Just $ Enumerated (
Packit Service d2f85f
                  read (fromMaybe "1" start) :: Int,
Packit Service d2f85f
                  fromMaybe DefaultStyle (lookup f listStyleMap),
Packit Service d2f85f
                  fromMaybe DefaultDelim (lookup t listDelimMap))
Packit Service d2f85f
         Nothing -> Nothing
Packit Service d2f85f
     _ -> Nothing
Packit Service d2f85f
getListType _ = Nothing
Packit Service d2f85f
Packit Service d2f85f
listParagraphDivs :: [String]
Packit Service d2f85f
listParagraphDivs = ["ListParagraph"]
Packit Service d2f85f
Packit Service d2f85f
-- This is a first stab at going through and attaching meaning to list
Packit Service d2f85f
-- paragraphs, without an item marker, following a list item. We
Packit Service d2f85f
-- assume that these are paragraphs in the same item.
Packit Service d2f85f
Packit Service d2f85f
handleListParagraphs :: [Block] -> [Block]
Packit Service d2f85f
handleListParagraphs [] = []
Packit Service d2f85f
handleListParagraphs (
Packit Service d2f85f
  Div attr1@(_, classes1, _) blks1 :
Packit Service d2f85f
  Div (ident2, classes2, kvs2) blks2 :
Packit Service d2f85f
  blks
Packit Service d2f85f
  ) | "list-item" `elem` classes1 &&
Packit Service d2f85f
    notElem "list-item" classes2 &&
Packit Service d2f85f
    (not . null) (listParagraphDivs `intersect` classes2) =
Packit Service d2f85f
      -- We don't want to keep this indent.
Packit Service d2f85f
      let newDiv2 =
Packit Service d2f85f
            Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2
Packit Service d2f85f
      in
Packit Service d2f85f
       handleListParagraphs (Div attr1 (blks1 ++ [newDiv2]) : blks)
Packit Service d2f85f
handleListParagraphs (blk:blks) = blk : handleListParagraphs blks
Packit Service d2f85f
Packit Service d2f85f
separateBlocks' :: Block -> [[Block]] -> [[Block]]
Packit Service d2f85f
separateBlocks' blk [[]] = [[blk]]
Packit Service d2f85f
separateBlocks' b@(BulletList _) acc = init acc ++ [last acc ++ [b]]
Packit Service d2f85f
separateBlocks' b@(OrderedList _ _) acc = init acc ++ [last acc ++ [b]]
Packit Service d2f85f
-- The following is for the invisible bullet lists. This is how
Packit Service d2f85f
-- pandoc-generated ooxml does multiparagraph item lists.
Packit Service d2f85f
separateBlocks' b acc | fmap trim (getText b) == Just "" =
Packit Service d2f85f
  init acc ++ [last acc ++ [b]]
Packit Service d2f85f
separateBlocks' b acc = acc ++ [[b]]
Packit Service d2f85f
Packit Service d2f85f
separateBlocks :: [Block] -> [[Block]]
Packit Service d2f85f
separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)
Packit Service d2f85f
Packit Service d2f85f
flatToBullets' :: Integer -> [Block] -> [Block]
Packit Service d2f85f
flatToBullets' _ [] = []
Packit Service d2f85f
flatToBullets' num xs@(b : elems)
Packit Service d2f85f
  | getLevelN b == num = b : flatToBullets' num elems
Packit Service d2f85f
  | otherwise =
Packit Service d2f85f
    let bNumId = getNumIdN b
Packit Service d2f85f
        bLevel = getLevelN b
Packit Service d2f85f
        (children, remaining) =
Packit Service d2f85f
          span
Packit Service d2f85f
          (\b' ->
Packit Service d2f85f
            (getLevelN b') > bLevel ||
Packit Service d2f85f
             ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))
Packit Service d2f85f
          xs
Packit Service d2f85f
    in
Packit Service d2f85f
     case getListType b of
Packit Service d2f85f
       Just (Enumerated attr) ->
Packit Service d2f85f
         OrderedList attr (separateBlocks $ flatToBullets' bLevel children) :
Packit Service d2f85f
         flatToBullets' num remaining
Packit Service d2f85f
       _ ->
Packit Service d2f85f
         BulletList (separateBlocks $ flatToBullets' bLevel children) :
Packit Service d2f85f
         flatToBullets' num remaining
Packit Service d2f85f
Packit Service d2f85f
flatToBullets :: [Block] -> [Block]
Packit Service d2f85f
flatToBullets elems = flatToBullets' (-1) elems
Packit Service d2f85f
Packit Service d2f85f
singleItemHeaderToHeader :: Block -> Block
Packit Service d2f85f
singleItemHeaderToHeader (OrderedList _ [[h@Header{}]]) = h
Packit Service d2f85f
singleItemHeaderToHeader blk                            = blk
Packit Service d2f85f
Packit Service d2f85f
Packit Service d2f85f
blocksToBullets :: [Block] -> [Block]
Packit Service d2f85f
blocksToBullets blks =
Packit Service d2f85f
  map singleItemHeaderToHeader $
Packit Service d2f85f
  bottomUp removeListDivs $flatToBullets (handleListParagraphs blks)
Packit Service d2f85f
Packit Service d2f85f
plainParaInlines :: Block -> [Inline]
Packit Service d2f85f
plainParaInlines (Plain ils) = ils
Packit Service d2f85f
plainParaInlines (Para ils)  = ils
Packit Service d2f85f
plainParaInlines _           = []
Packit Service d2f85f
Packit Service d2f85f
blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block]
Packit Service d2f85f
blocksToDefinitions' []     acc [] = reverse acc
Packit Service d2f85f
blocksToDefinitions' defAcc acc [] =
Packit Service d2f85f
  reverse $ DefinitionList (reverse defAcc) : acc
Packit Service d2f85f
blocksToDefinitions' defAcc acc
Packit Service d2f85f
  (Div (_, classes1, _) blks1 : Div (ident2, classes2, kvs2) blks2 : blks)
Packit Service d2f85f
  | "DefinitionTerm" `elem` classes1 && "Definition"  `elem` classes2 =
Packit Service d2f85f
    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
Packit Service d2f85f
        pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
Packit Service d2f85f
    in
Packit Service d2f85f
     blocksToDefinitions' (pair : defAcc) acc blks
Packit Service d2f85f
blocksToDefinitions' defAcc acc
Packit Service d2f85f
  (Div (ident2, classes2, kvs2) blks2 : blks)
Packit Service d2f85f
  | (not . null) defAcc && "Definition"  `elem` classes2 =
Packit Service d2f85f
    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
Packit Service d2f85f
        defItems2 = case remainingAttr2 == ("", [], []) of
Packit Service d2f85f
          True  -> blks2
Packit Service d2f85f
          False -> [Div remainingAttr2 blks2]
Packit Service d2f85f
        ((defTerm, defItems):defs) = defAcc
Packit Service d2f85f
        defAcc' = case null defItems of
Packit Service d2f85f
          True -> (defTerm, [defItems2]) : defs
Packit Service d2f85f
          False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
Packit Service d2f85f
    in
Packit Service d2f85f
     blocksToDefinitions' defAcc' acc blks
Packit Service d2f85f
blocksToDefinitions' [] acc (b:blks) =
Packit Service d2f85f
  blocksToDefinitions' [] (b:acc) blks
Packit Service d2f85f
blocksToDefinitions' defAcc acc (b:blks) =
Packit Service d2f85f
  blocksToDefinitions' [] (b : DefinitionList (reverse defAcc) : acc) blks
Packit Service d2f85f
Packit Service d2f85f
removeListDivs' :: Block -> [Block]
Packit Service d2f85f
removeListDivs' (Div (ident, classes, kvs) blks)
Packit Service d2f85f
  | "list-item" `elem` classes =
Packit Service d2f85f
    case delete "list-item" classes of
Packit Service d2f85f
      []       -> blks
Packit Service d2f85f
      classes' -> [Div (ident, classes', kvs) blks]
Packit Service d2f85f
removeListDivs' (Div (ident, classes, kvs) blks)
Packit Service d2f85f
  | not $ null $ listParagraphDivs `intersect` classes =
Packit Service d2f85f
    case classes \\ listParagraphDivs of
Packit Service d2f85f
      []       -> blks
Packit Service d2f85f
      classes' -> [Div (ident, classes', kvs) blks]
Packit Service d2f85f
removeListDivs' blk = [blk]
Packit Service d2f85f
Packit Service d2f85f
removeListDivs :: [Block] -> [Block]
Packit Service d2f85f
removeListDivs = concatMap removeListDivs'
Packit Service d2f85f
Packit Service d2f85f
Packit Service d2f85f
Packit Service d2f85f
blocksToDefinitions :: [Block] -> [Block]
Packit Service d2f85f
blocksToDefinitions = blocksToDefinitions' [] []