aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-09-21 11:39:15 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2019-09-21 11:39:15 -0700
commit780079aaec6b82d3d235e20afda06cdfc8b486d5 (patch)
tree6994f0b88d34cdc7df9baad28d2198e0819e2f29
parente3a6648e8f2553bb37a158729ec7cfbdd942fbcb (diff)
parent9dbfd23c566efb5bf80deaf4e34b09cf38a97197 (diff)
downloadpandoc-780079aaec6b82d3d235e20afda06cdfc8b486d5.tar.gz
Merge branch 'lierdakil-docx-reader-styles'
-rw-r--r--.gitignore1
-rw-r--r--MANUAL.txt6
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs212
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs25
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs315
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs127
-rw-r--r--test/Tests/Readers/Docx.hs9
-rw-r--r--test/docx/0_level_headers.native4
-rw-r--r--test/docx/adjacent_codeblocks.docxbin22437 -> 22264 bytes
-rw-r--r--test/docx/compact-style-removal.docxbin0 -> 9951 bytes
-rw-r--r--test/docx/compact-style-removal.native5
-rw-r--r--test/docx/custom-style-with-styles.native6
-rw-r--r--test/docx/lists-compact.docxbin0 -> 9952 bytes
-rw-r--r--test/docx/lists-compact.native5
-rw-r--r--test/docx/lists.docxbin19845 -> 9473 bytes
-rw-r--r--test/docx/lists.native2
-rw-r--r--test/docx/nested_anchors_in_header.native2
17 files changed, 407 insertions, 312 deletions
diff --git a/.gitignore b/.gitignore
index 32ef66e8b..e8690c4eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,4 @@ windows/*.wixobj
data/reference.docx
data/reference.odt
.stack-work
+cabal.project.local
diff --git a/MANUAL.txt b/MANUAL.txt
index 3c9f158ef..7e01a5002 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5321,17 +5321,17 @@ And with the extension:
$ pandoc test/docx/custom-style-reference.docx -f docx+styles -t markdown
- ::: {custom-style="FirstParagraph"}
+ ::: {custom-style="First Paragraph"}
This is some text.
:::
- ::: {custom-style="BodyText"}
+ ::: {custom-style="Body Text"}
This is text with an [emphasized]{custom-style="Emphatic"} text style.
And this is text with a [strengthened]{custom-style="Strengthened"}
text style.
:::
- ::: {custom-style="MyBlockStyle"}
+ ::: {custom-style="My Block Style"}
> Here is a styled paragraph that inherits from Block Text.
:::
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 4f44d18e7..9d17ab118 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -2,6 +2,8 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE FlexibleContexts #-}
{- |
Module : Text.Pandoc.Readers.Docx
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -64,6 +66,7 @@ import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
+import Data.Char (isSpace)
import qualified Data.Map as M
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
@@ -129,16 +132,16 @@ instance Default DEnv where
type DocxContext m = ReaderT DEnv (StateT DState m)
evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
-evalDocxContext ctx env st = flip evalStateT st $flip runReaderT env ctx
+evalDocxContext ctx env st = flip evalStateT st $ runReaderT ctx env
-- This is empty, but we put it in for future-proofing.
-spansToKeep :: [String]
+spansToKeep :: [CharStyleName]
spansToKeep = []
-divsToKeep :: [String]
-divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
+divsToKeep :: [ParaStyleName]
+divsToKeep = ["Definition", "Definition Term"]
-metaStyles :: M.Map String String
+metaStyles :: M.Map ParaStyleName String
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
@@ -150,7 +153,7 @@ sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)
isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
- not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
+ not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles)
isMetaPar _ = False
isEmptyPar :: BodyPart -> Bool
@@ -167,7 +170,7 @@ bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String M
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
- , (c : _)<- (pStyle pPr) `intersect` (M.keys metaStyles)
+ , (c : _)<- getStyleNames (pStyle pPr) `intersect` M.keys metaStyles
, (Just metaField) <- M.lookup c metaStyles = do
inlines <- smushInlines <$> mapM parPartToInlines parParts
remaining <- bodyPartsToMeta' bps
@@ -197,11 +200,29 @@ fixAuthors (MetaBlocks blks) =
g _ = MetaInlines []
fixAuthors mv = mv
-codeStyles :: [String]
-codeStyles = ["VerbatimChar"]
+isInheritedFromStyles :: (Eq (StyleName s), HasStyleName s, HasParentStyle s) => [StyleName s] -> s -> Bool
+isInheritedFromStyles names sty
+ | getStyleName sty `elem` names = True
+ | Just psty <- getParentStyle sty = isInheritedFromStyles names psty
+ | otherwise = False
-codeDivs :: [String]
-codeDivs = ["SourceCode"]
+hasStylesInheritedFrom :: [ParaStyleName] -> ParagraphStyle -> Bool
+hasStylesInheritedFrom ns s = any (isInheritedFromStyles ns) $ pStyle s
+
+removeStyleNamed :: ParaStyleName -> ParagraphStyle -> ParagraphStyle
+removeStyleNamed sn ps = ps{pStyle = filter (\psd -> getStyleName psd /= sn) $ pStyle ps}
+
+isCodeCharStyle :: CharStyle -> Bool
+isCodeCharStyle = isInheritedFromStyles ["Verbatim Char"]
+
+isCodeDiv :: ParagraphStyle -> Bool
+isCodeDiv = hasStylesInheritedFrom ["Source Code"]
+
+isBlockQuote :: ParStyle -> Bool
+isBlockQuote =
+ isInheritedFromStyles [
+ "Quote", "Block Text", "Block Quote", "Block Quotation"
+ ]
runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
@@ -227,57 +248,31 @@ parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
parPartToString _ = ""
-blacklistedCharStyles :: [String]
+blacklistedCharStyles :: [CharStyleName]
blacklistedCharStyles = ["Hyperlink"]
resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle
resolveDependentRunStyle rPr
- | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles =
+ | Just s <- rParentStyle rPr
+ , getStyleName s `elem` blacklistedCharStyles =
return rPr
- | Just (_, cs) <- rStyle rPr = do
+ | Just s <- rParentStyle rPr = do
opts <- asks docxOptions
if isEnabled Ext_styles opts
then return rPr
- else do rPr' <- resolveDependentRunStyle cs
- return $
- RunStyle { isBold = case isBold rPr of
- Just bool -> Just bool
- Nothing -> isBold rPr'
- , isItalic = case isItalic rPr of
- Just bool -> Just bool
- Nothing -> isItalic rPr'
- , isSmallCaps = case isSmallCaps rPr of
- Just bool -> Just bool
- Nothing -> isSmallCaps rPr'
- , isStrike = case isStrike rPr of
- Just bool -> Just bool
- Nothing -> isStrike rPr'
- , isRTL = case isRTL rPr of
- Just bool -> Just bool
- Nothing -> isRTL rPr'
- , rVertAlign = case rVertAlign rPr of
- Just valign -> Just valign
- Nothing -> rVertAlign rPr'
- , rUnderline = case rUnderline rPr of
- Just ulstyle -> Just ulstyle
- Nothing -> rUnderline rPr'
- , rStyle = rStyle rPr
- }
+ else leftBiasedMergeRunStyle rPr <$> resolveDependentRunStyle (cStyleData s)
| otherwise = return rPr
runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
runStyleToTransform rPr
- | Just (s, _) <- rStyle rPr
- , s `elem` spansToKeep = do
- transform <- runStyleToTransform rPr{rStyle = Nothing}
- return $ spanWith ("", [s], []) . transform
- | Just (s, _) <- rStyle rPr = do
- opts <- asks docxOptions
- let extraInfo = if isEnabled Ext_styles opts
- then spanWith ("", [], [("custom-style", s)])
- else id
- transform <- runStyleToTransform rPr{rStyle = Nothing}
- return $ extraInfo . transform
+ | Just sn <- getStyleName <$> rParentStyle rPr
+ , sn `elem` spansToKeep = do
+ transform <- runStyleToTransform rPr{rParentStyle = Nothing}
+ return $ spanWith ("", [normalizeToClassName sn], []) . transform
+ | Just s <- rParentStyle rPr = do
+ ei <- extraInfo spanWith s
+ transform <- runStyleToTransform rPr{rParentStyle = Nothing}
+ return $ ei . transform
| Just True <- isItalic rPr = do
transform <- runStyleToTransform rPr{isItalic = Nothing}
return $ emph . transform
@@ -309,8 +304,7 @@ runStyleToTransform rPr
runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
- | Just (s, _) <- rStyle rs
- , s `elem` codeStyles = do
+ | maybe False isCodeCharStyle $ rParentStyle rs = do
rPr <- resolveDependentRunStyle rs
let codeString = code $ concatMap runElemToString runElems
return $ case rVertAlign rPr of
@@ -525,55 +519,49 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
isSp LineBreak = True
isSp _ = False
+extraInfo :: (Eq (StyleName a), PandocMonad m, HasStyleName a)
+ => (Attr -> i -> i) -> a -> DocxContext m (i -> i)
+extraInfo f s = do
+ opts <- asks docxOptions
+ return $ if | isEnabled Ext_styles opts
+ -> f ("", [], [("custom-style", fromStyleName $ getStyleName s)])
+ | otherwise -> id
+
parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
parStyleToTransform pPr
| (c:cs) <- pStyle pPr
- , c `elem` divsToKeep = do
+ , getStyleName c `elem` divsToKeep = do
let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
- return $ divWith ("", [c], []) . transform
+ return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform
| (c:cs) <- pStyle pPr,
- c `elem` listParagraphDivs = do
+ getStyleName c `elem` listParagraphStyles = do
let pPr' = pPr { pStyle = cs, indentation = Nothing}
transform <- parStyleToTransform pPr'
- return $ divWith ("", [c], []) . transform
- | (c:cs) <- pStyle pPr
- , Just True <- pBlockQuote pPr = do
- opts <- asks docxOptions
- let pPr' = pPr { pStyle = cs }
- transform <- parStyleToTransform pPr'
- let extraInfo = if isEnabled Ext_styles opts
- then divWith ("", [], [("custom-style", c)])
- else id
- return $ extraInfo . blockQuote . transform
+ return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform
| (c:cs) <- pStyle pPr = do
- opts <- asks docxOptions
- let pPr' = pPr { pStyle = cs}
+ let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
- let extraInfo = if isEnabled Ext_styles opts
- then divWith ("", [], [("custom-style", c)])
- else id
- return $ extraInfo . transform
+ ei <- extraInfo divWith c
+ return $ ei . (if isBlockQuote c then blockQuote else id) . transform
| null (pStyle pPr)
- , Just left <- indentation pPr >>= leftParIndent
- , Just hang <- indentation pPr >>= hangingParIndent = do
+ , Just left <- indentation pPr >>= leftParIndent = do
let pPr' = pPr { indentation = Nothing }
+ hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent
transform <- parStyleToTransform pPr'
- return $ case (left - hang) > 0 of
- True -> blockQuote . transform
- False -> transform
- | null (pStyle pPr),
- Just left <- indentation pPr >>= leftParIndent = do
- let pPr' = pPr { indentation = Nothing }
- transform <- parStyleToTransform pPr'
- return $ case left > 0 of
- True -> blockQuote . transform
- False -> transform
+ return $ if (left - hang) > 0
+ then blockQuote . transform
+ else transform
parStyleToTransform _ = return id
+normalizeToClassName :: (FromStyleName a) => a -> String
+normalizeToClassName = map go . fromStyleName
+ where go c | isSpace c = '-'
+ | otherwise = c
+
bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
- | not $ null $ codeDivs `intersect` (pStyle pPr) = do
+ | isCodeDiv pPr = do
transform <- parStyleToTransform pPr
return $
transform $
@@ -583,62 +571,60 @@ bodyPartToBlocks (Paragraph pPr parparts)
ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
- headerWith ("", delete style (pStyle pPr), []) n ils
+ headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils
| otherwise = do
- ils <- (trimSps . smushInlines) <$> mapM parPartToInlines parparts
+ ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts
prevParaIls <- gets docxPrevPara
dropIls <- gets docxDropCap
let ils' = dropIls <> ils
- if dropCap pPr
+ let (paraOrPlain, pPr')
+ | hasStylesInheritedFrom ["Compact"] pPr = (plain, removeStyleNamed "Compact" pPr)
+ | otherwise = (para, pPr)
+ if dropCap pPr'
then do modify $ \s -> s { docxDropCap = ils' }
return mempty
else do modify $ \s -> s { docxDropCap = mempty }
let ils'' = prevParaIls <>
(if isNull prevParaIls then mempty else space) <>
ils'
+ handleInsertion = do
+ modify $ \s -> s {docxPrevPara = mempty}
+ transform <- parStyleToTransform pPr'
+ return $ transform $ paraOrPlain ils''
opts <- asks docxOptions
- case () of
-
- _ | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) ->
+ if | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) ->
return mempty
- _ | Just (TrackedChange Insertion _) <- pChange pPr
- , AcceptChanges <- readerTrackChanges opts -> do
- modify $ \s -> s {docxPrevPara = mempty}
- transform <- parStyleToTransform pPr
- return $ transform $ para ils''
- _ | Just (TrackedChange Insertion _) <- pChange pPr
+ | Just (TrackedChange Insertion _) <- pChange pPr'
+ , AcceptChanges <- readerTrackChanges opts ->
+ handleInsertion
+ | Just (TrackedChange Insertion _) <- pChange pPr'
, RejectChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
- _ | Just (TrackedChange Insertion cInfo) <- pChange pPr
+ | Just (TrackedChange Insertion cInfo) <- pChange pPr'
, AllChanges <- readerTrackChanges opts
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
- transform <- parStyleToTransform pPr
+ transform <- parStyleToTransform pPr'
return $ transform $
- para $ ils'' <> insertMark
- _ | Just (TrackedChange Deletion _) <- pChange pPr
+ paraOrPlain $ ils'' <> insertMark
+ | Just (TrackedChange Deletion _) <- pChange pPr'
, AcceptChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
- _ | Just (TrackedChange Deletion _) <- pChange pPr
- , RejectChanges <- readerTrackChanges opts -> do
- modify $ \s -> s {docxPrevPara = mempty}
- transform <- parStyleToTransform pPr
- return $ transform $ para ils''
- _ | Just (TrackedChange Deletion cInfo) <- pChange pPr
+ | Just (TrackedChange Deletion _) <- pChange pPr'
+ , RejectChanges <- readerTrackChanges opts ->
+ handleInsertion
+ | Just (TrackedChange Deletion cInfo) <- pChange pPr'
, AllChanges <- readerTrackChanges opts
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
- transform <- parStyleToTransform pPr
+ transform <- parStyleToTransform pPr'
return $ transform $
- para $ ils'' <> insertMark
- _ | otherwise -> do
- modify $ \s -> s {docxPrevPara = mempty}
- transform <- parStyleToTransform pPr
- return $ transform $ para ils''
+ paraOrPlain $ ils'' <> insertMark
+ | otherwise -> handleInsertion
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
-- We check whether this current numId has previously been used,
-- since Docx expects us to pick up where we left off.
@@ -658,7 +644,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
blks <- bodyPartToBlocks (Paragraph pPr parparts)
return $ divWith ("", ["list-item"], kvs) blks
bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
- let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr}
+ let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
in
bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Tbl _ _ _ []) =
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index cc390f122..eb24640c5 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Lists
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -14,13 +15,16 @@ Functions for converting flat docx paragraphs into nested lists.
module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
, blocksToDefinitions
, listParagraphDivs
+ , listParagraphStyles
) where
import Prelude
import Data.List
import Data.Maybe
+import Data.String (fromString)
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.JSON
+import Text.Pandoc.Readers.Docx.Parse (ParaStyleName)
import Text.Pandoc.Shared (trim, safeRead)
isListItem :: Block -> Bool
@@ -79,7 +83,10 @@ getListType b@(Div (_, _, kvs) _) | isListItem b =
getListType _ = Nothing
listParagraphDivs :: [String]
-listParagraphDivs = ["ListParagraph"]
+listParagraphDivs = ["list-paragraph"]
+
+listParagraphStyles :: [ParaStyleName]
+listParagraphStyles = map fromString listParagraphDivs
-- This is a first stab at going through and attaching meaning to list
-- paragraphs, without an item marker, following a list item. We
@@ -160,7 +167,7 @@ blocksToDefinitions' defAcc acc [] =
reverse $ DefinitionList (reverse defAcc) : acc
blocksToDefinitions' defAcc acc
(Div (_, classes1, _) blks1 : Div (ident2, classes2, kvs2) blks2 : blks)
- | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 =
+ | "Definition-Term" `elem` classes1 && "Definition" `elem` classes2 =
let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
in
@@ -169,12 +176,12 @@ blocksToDefinitions' ((defTerm, defItems):defs) acc
(Div (ident2, classes2, kvs2) blks2 : blks)
| "Definition" `elem` classes2 =
let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
- defItems2 = case remainingAttr2 == ("", [], []) of
- True -> blks2
- False -> [Div remainingAttr2 blks2]
- defAcc' = case null defItems of
- True -> (defTerm, [defItems2]) : defs
- False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
+ defItems2 = if remainingAttr2 == ("", [], [])
+ then blks2
+ else [Div remainingAttr2 blks2]
+ defAcc' = if null defItems
+ then (defTerm, [defItems2]) : defs
+ else (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
in
blocksToDefinitions' defAcc' acc blks
blocksToDefinitions' [] acc (b:blks) =
@@ -198,7 +205,5 @@ removeListDivs' blk = [blk]
removeListDivs :: [Block] -> [Block]
removeListDivs = concatMap removeListDivs'
-
-
blocksToDefinitions :: [Block] -> [Block]
blocksToDefinitions = blocksToDefinitions' [] []
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index f725660b9..00c5fb0be 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,7 +1,11 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -31,6 +35,8 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, VertAlign(..)
, ParIndentation(..)
, ParagraphStyle(..)
+ , ParStyle
+ , CharStyle(cStyleData)
, Row(..)
, Cell(..)
, TrackedChange(..)
@@ -38,8 +44,17 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, ChangeInfo(..)
, FieldInfo(..)
, Level(..)
+ , ParaStyleName
+ , CharStyleName
+ , FromStyleName(..)
+ , HasStyleName(..)
+ , HasParentStyle(..)
, archiveToDocx
, archiveToDocxWithWarnings
+ , getStyleNames
+ , pHeading
+ , constructBogusParStyleData
+ , leftBiasedMergeRunStyle
) where
import Prelude
import Codec.Archive.Zip
@@ -49,10 +64,13 @@ import Control.Monad.Reader
import Control.Monad.State.Strict
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
-import Data.Char (chr, ord, readLitChar)
+import Data.Char (chr, ord, readLitChar, toLower)
import Data.List
+import Data.Function (on)
+import Data.String (IsString(..))
import qualified Data.Map as M
import Data.Maybe
+import Data.Coerce
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
import Text.Pandoc.Readers.Docx.Fields
@@ -121,9 +139,9 @@ unwrap :: NameSpaces -> Content -> [Content]
unwrap ns (Elem element)
| isElem ns "w" "sdt" element
, Just sdtContent <- findChildByName ns "w" "sdtContent" element
- = concatMap ((unwrap ns) . Elem) (elChildren sdtContent)
+ = concatMap (unwrap ns . Elem) (elChildren sdtContent)
| isElem ns "w" "smartTag" element
- = concatMap ((unwrap ns) . Elem) (elChildren element)
+ = concatMap (unwrap ns . Elem) (elChildren element)
unwrap _ content = [content]
unwrapChild :: NameSpaces -> Content -> Content
@@ -149,24 +167,20 @@ walkDocument ns element =
_ -> Nothing
-data Docx = Docx Document
+newtype Docx = Docx Document
deriving Show
data Document = Document NameSpaces Body
deriving Show
-data Body = Body [BodyPart]
+newtype Body = Body [BodyPart]
deriving Show
type Media = [(FilePath, B.ByteString)]
-type CharStyle = (String, RunStyle)
+type CharStyleMap = M.Map CharStyleId CharStyle
-type ParStyle = (String, ParStyleData)
-
-type CharStyleMap = M.Map String RunStyle
-
-type ParStyleMap = M.Map String ParStyleData
+type ParStyleMap = M.Map ParaStyleId ParStyle
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
@@ -213,12 +227,9 @@ data ChangeInfo = ChangeInfo ChangeId Author ChangeDate
data TrackedChange = TrackedChange ChangeType ChangeInfo
deriving Show
-data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle]
, indentation :: Maybe ParIndentation
, dropCap :: Bool
- , pHeading :: Maybe (String, Int)
- , pNumInfo :: Maybe (String, String)
- , pBlockQuote :: Maybe Bool
, pChange :: Maybe TrackedChange
}
deriving Show
@@ -227,9 +238,6 @@ defaultParagraphStyle :: ParagraphStyle
defaultParagraphStyle = ParagraphStyle { pStyle = []
, indentation = Nothing
, dropCap = False
- , pHeading = Nothing
- , pNumInfo = Nothing
- , pBlockQuote = Nothing
, pChange = Nothing
}
@@ -242,18 +250,61 @@ data BodyPart = Paragraph ParagraphStyle [ParPart]
type TblGrid = [Integer]
-data TblLook = TblLook {firstRowFormatting::Bool}
+newtype TblLook = TblLook {firstRowFormatting::Bool}
deriving Show
defaultTblLook :: TblLook
defaultTblLook = TblLook{firstRowFormatting = False}
-data Row = Row [Cell]
+newtype Row = Row [Cell]
deriving Show
-data Cell = Cell [BodyPart]
+newtype Cell = Cell [BodyPart]
deriving Show
+newtype CharStyleId = CharStyleId { fromCharStyleId :: String }
+ deriving (Show, Eq, Ord, FromStyleId)
+newtype ParaStyleId = ParaStyleId { fromParaStyleId :: String }
+ deriving (Show, Eq, Ord, FromStyleId)
+
+newtype CharStyleName = CharStyleName { fromCharStyleName :: CIString }
+ deriving (Show, Eq, Ord, IsString, FromStyleName)
+newtype ParaStyleName = ParaStyleName { fromParaStyleName :: CIString }
+ deriving (Show, Eq, Ord, IsString, FromStyleName)
+
+-- Case-insensitive comparisons
+newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
+
+class FromStyleName a where
+ fromStyleName :: a -> String
+
+instance FromStyleName String where
+ fromStyleName = id
+
+class FromStyleId a where
+ fromStyleId :: a -> String
+
+instance FromStyleId String where
+ fromStyleId = id
+
+instance Eq CIString where
+ (==) = (==) `on` map toLower . coerce
+
+instance Ord CIString where
+ compare = compare `on` map toLower . coerce
+
+leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle
+leftBiasedMergeRunStyle a b = RunStyle
+ { isBold = isBold a <|> isBold b
+ , isItalic = isItalic a <|> isItalic b
+ , isSmallCaps = isSmallCaps a <|> isSmallCaps b
+ , isStrike = isStrike a <|> isStrike b
+ , isRTL = isRTL a <|> isRTL b
+ , rVertAlign = rVertAlign a <|> rVertAlign b
+ , rUnderline = rUnderline a <|> rUnderline b
+ , rParentStyle = rParentStyle a
+ }
+
-- (width, height) in EMUs
type Extent = Maybe (Double, Double)
@@ -285,21 +336,28 @@ data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
-data RunStyle = RunStyle { isBold :: Maybe Bool
- , isItalic :: Maybe Bool
- , isSmallCaps :: Maybe Bool
- , isStrike :: Maybe Bool
- , isRTL :: Maybe Bool
- , rVertAlign :: Maybe VertAlign
- , rUnderline :: Maybe String
- , rStyle :: Maybe CharStyle
+data CharStyle = CharStyle { cStyleId :: CharStyleId
+ , cStyleName :: CharStyleName
+ , cStyleData :: RunStyle
+ } deriving (Show)
+
+data RunStyle = RunStyle { isBold :: Maybe Bool
+ , isItalic :: Maybe Bool
+ , isSmallCaps :: Maybe Bool
+ , isStrike :: Maybe Bool
+ , isRTL :: Maybe Bool
+ , rVertAlign :: Maybe VertAlign
+ , rUnderline :: Maybe String
+ , rParentStyle :: Maybe CharStyle
}
deriving Show
-data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
- , isBlockQuote :: Maybe Bool
- , numInfo :: Maybe (String, String)
- , psStyle :: Maybe ParStyle}
+data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
+ , numInfo :: Maybe (String, String)
+ , psParentStyle :: Maybe ParStyle
+ , pStyleName :: ParaStyleName
+ , pStyleId :: ParaStyleId
+ }
deriving Show
defaultRunStyle :: RunStyle
@@ -310,7 +368,7 @@ defaultRunStyle = RunStyle { isBold = Nothing
, isRTL = Nothing
, rVertAlign = Nothing
, rUnderline = Nothing
- , rStyle = Nothing
+ , rParentStyle = Nothing
}
type Target = String
@@ -390,7 +448,10 @@ elemToBody ns element | isElem ns "w" "body" element =
elemToBody _ _ = throwError WrongElem
archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
-archiveToStyles zf =
+archiveToStyles = archiveToStyles' getStyleId getStyleId
+archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
+ (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles' conv1 conv2 zf =
let stylesElem = findEntryByPath "word/styles.xml" zf >>=
(parseXMLDoc . UTF8.toStringLazy . fromEntry)
in
@@ -399,19 +460,17 @@ archiveToStyles zf =
Just styElem ->
let namespaces = elemToNameSpaces styElem
in
- ( M.fromList $ buildBasedOnList namespaces styElem
- (Nothing :: Maybe CharStyle),
- M.fromList $ buildBasedOnList namespaces styElem
- (Nothing :: Maybe ParStyle) )
+ ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
+ M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
-isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
+isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
findAttrByName ns "w" "val"
- , Just ps <- parentStyle = basedOnVal == getStyleId ps
+ , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
@@ -419,30 +478,70 @@ isBasedOnStyle ns element parentStyle
, Nothing <- parentStyle = True
| otherwise = False
-class ElemToStyle a where
+class HasStyleId a => ElemToStyle a where
cStyleType :: Maybe a -> String
elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
- getStyleId :: a -> String
+
+class FromStyleId (StyleId a) => HasStyleId a where
+ type StyleId a
+ getStyleId :: a -> StyleId a
+
+class FromStyleName (StyleName a) => HasStyleName a where
+ type StyleName a
+ getStyleName :: a -> StyleName a
+
+class HasParentStyle a where
+ getParentStyle :: a -> Maybe a
+
+instance HasParentStyle CharStyle where
+ getParentStyle = rParentStyle . cStyleData
+
+instance HasParentStyle ParStyle where
+ getParentStyle = psParentStyle
+
+getStyleNames :: (Functor t, HasStyleName a) => t a -> t (StyleName a)
+getStyleNames = fmap getStyleName
+
+constructBogusParStyleData :: ParaStyleName -> ParStyle
+constructBogusParStyleData stName = ParStyle
+ { headingLev = Nothing
+ , numInfo = Nothing
+ , psParentStyle = Nothing
+ , pStyleName = stName
+ , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName
+ }
instance ElemToStyle CharStyle where
cStyleType _ = "character"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just "character" <- findAttrByName ns "w" "type" element
- , Just styleId <- findAttrByName ns "w" "styleId" element =
- Just (styleId, elemToRunStyle ns element parentStyle)
+ , Just "character" <- findAttrByName ns "w" "type" element =
+ elemToCharStyle ns element parentStyle
| otherwise = Nothing
- getStyleId s = fst s
+
+instance HasStyleId CharStyle where
+ type StyleId CharStyle = CharStyleId
+ getStyleId = cStyleId
+
+instance HasStyleName CharStyle where
+ type StyleName CharStyle = CharStyleName
+ getStyleName = cStyleName
instance ElemToStyle ParStyle where
cStyleType _ = "paragraph"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
, Just "paragraph" <- findAttrByName ns "w" "type" element
- , Just styleId <- findAttrByName ns "w" "styleId" element =
- Just (styleId, elemToParStyleData ns element parentStyle)
+ = elemToParStyleData ns element parentStyle
| otherwise = Nothing
- getStyleId s = fst s
+
+instance HasStyleId ParStyle where
+ type StyleId ParStyle = ParaStyleId
+ getStyleId = pStyleId
+
+instance HasStyleName ParStyle where
+ type StyleName ParStyle = ParaStyleName
+ getStyleName = pStyleName
getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
getStyleChildren ns element parentStyle
@@ -495,7 +594,7 @@ filePathToRelType "word/_rels/endnotes.xml.rels" _ = Just InEndnote
-- -- to see if it's a documentPath, we have to check against the dynamic
-- -- docPath specified in "_rels/.rels"
filePathToRelType path docXmlPath =
- if path == "word/_rels/" ++ (takeFileName docXmlPath) ++ ".rels"
+ if path == "word/_rels/" ++ takeFileName docXmlPath ++ ".rels"
then Just InDocument
else Nothing
@@ -537,7 +636,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
case lvlOverride of
Just (LevelOverride _ _ (Just lvl')) -> Just lvl'
Just (LevelOverride _ (Just strt) _) ->
- lookup ilvl $ map (\(Level i fmt s _) -> (i, (Level i fmt s (Just strt)))) lvls
+ lookup ilvl $ map (\(Level i fmt s _) -> (i, Level i fmt s (Just strt))) lvls
_ ->
lookup ilvl $ map (\l@(Level i _ _ _) -> (i, l)) lvls
@@ -693,6 +792,12 @@ testBitMask bitMaskS n =
stringToInteger :: String -> Maybe Integer
stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int)
+pHeading = getParStyleField headingLev . pStyle
+
+pNumInfo :: ParagraphStyle -> Maybe (String, String)
+pNumInfo = getParStyleField numInfo . pStyle
+
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
elemToBodyPart ns element
| isElem ns "w" "p" element
@@ -703,23 +808,19 @@ elemToBodyPart ns element
elemToBodyPart ns element
| isElem ns "w" "p" element
, Just (numId, lvl) <- getNumInfo ns element = do
- sty <- asks envParStyles
- let parstyle = elemToParagraphStyle ns element sty
+ parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
parparts <- mapD (elemToParPart ns) (elChildren element)
- num <- asks envNumbering
- let levelInfo = lookupLevel numId lvl num
+ levelInfo <- lookupLevel numId lvl <$> asks envNumbering
return $ ListItem parstyle numId lvl levelInfo parparts
elemToBodyPart ns element
| isElem ns "w" "p" element = do
- sty <- asks envParStyles
- let parstyle = elemToParagraphStyle ns element sty
+ parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
parparts <- mapD (elemToParPart ns) (elChildren element)
-- Word uses list enumeration for numbered headings, so we only
-- want to infer a list from the styles if it is NOT a heading.
case pHeading parstyle of
Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
- num <- asks envNumbering
- let levelInfo = lookupLevel numId lvl num
+ levelInfo <- lookupLevel numId lvl <$> asks envNumbering
return $ ListItem parstyle numId lvl levelInfo parparts
_ -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
@@ -727,7 +828,7 @@ elemToBodyPart ns element
let caption' = findChildByName ns "w" "tblPr" element
>>= findChildByName ns "w" "tblCaption"
>>= findAttrByName ns "w" "val"
- caption = (fromMaybe "" caption')
+ caption = fromMaybe "" caption'
grid' = case findChildByName ns "w" "tblGrid" element of
Just g -> elemToTblGrid ns g
Nothing -> return []
@@ -1007,20 +1108,18 @@ elemToRun ns element
return $ Run runStyle runElems
elemToRun _ _ = throwError WrongElem
-getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a
+getParentStyleValue :: (ParStyle -> Maybe a) -> ParStyle -> Maybe a
getParentStyleValue field style
| Just value <- field style = Just value
- | Just parentStyle <- psStyle style
- = getParentStyleValue field (snd parentStyle)
+ | Just parentStyle <- psParentStyle style
+ = getParentStyleValue field parentStyle
getParentStyleValue _ _ = Nothing
-getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] ->
- Maybe a
-getParStyleField field stylemap styles
- | x <- mapMaybe (\x -> M.lookup x stylemap) styles
- , (y:_) <- mapMaybe (getParentStyleValue field) x
+getParStyleField :: (ParStyle -> Maybe a) -> [ParStyle] -> Maybe a
+getParStyleField field styles
+ | (y:_) <- mapMaybe (getParentStyleValue field) styles
= Just y
-getParStyleField _ _ _ = Nothing
+getParStyleField _ _ = Nothing
getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
getTrackedChange ns element
@@ -1042,10 +1141,10 @@ elemToParagraphStyle ns element sty
| Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (findAttrByName ns "w" "val")
+ (fmap ParaStyleId . findAttrByName ns "w" "val")
(findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
- {pStyle = style
+ {pStyle = mapMaybe (`M.lookup` sty) style
, indentation =
findChildByName ns "w" "ind" pPr >>=
elemToParIndentation ns
@@ -1057,9 +1156,6 @@ elemToParagraphStyle ns element sty
Just "none" -> False
Just _ -> True
Nothing -> False
- , pHeading = getParStyleField headingLev sty style
- , pNumInfo = getParStyleField numInfo sty style
- , pBlockQuote = getParStyleField isBlockQuote sty style
, pChange = findChildByName ns "w" "rPr" pPr >>=
filterChild (\e -> isElem ns "w" "ins" e ||
isElem ns "w" "moveTo" e ||
@@ -1089,16 +1185,20 @@ elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
elemToRunStyleD ns element
| Just rPr <- findChildByName ns "w" "rPr" element = do
charStyles <- asks envCharStyles
- let parentSty = case
+ let parentSty =
findChildByName ns "w" "rStyle" rPr >>=
- findAttrByName ns "w" "val"
- of
- Just styName | Just style <- M.lookup styName charStyles ->
- Just (styName, style)
- _ -> Nothing
+ findAttrByName ns "w" "val" >>=
+ flip M.lookup charStyles . CharStyleId
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
+elemToCharStyle :: NameSpaces
+ -> Element -> Maybe CharStyle -> Maybe CharStyle
+elemToCharStyle ns element parentStyle
+ = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
+ <*> getElementStyleName ns element
+ <*> (Just $ elemToRunStyle ns element parentStyle)
+
elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
elemToRunStyle ns element parentStyle
| Just rPr <- findChildByName ns "w" "rPr" element =
@@ -1121,38 +1221,23 @@ elemToRunStyle ns element parentStyle
, rUnderline =
findChildByName ns "w" "u" rPr >>=
findAttrByName ns "w" "val"
- , rStyle = parentStyle
+ , rParentStyle = parentStyle
}
elemToRunStyle _ _ _ = defaultRunStyle
-getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
+getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , Just index <- stripPrefix "Heading" styleId
- , Just n <- stringToInteger index
- , n > 0 = Just (styleId, fromInteger n)
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , Just index <- findChildByName ns "w" "name" element >>=
- findAttrByName ns "w" "val" >>=
- stripPrefix "heading "
- , Just n <- stringToInteger index
- , n > 0 = Just (styleId, fromInteger n)
+ | Just styleName <- getElementStyleName ns element
+ , Just n <- stringToInteger =<<
+ (stripPrefix "heading " . map toLower $
+ fromStyleName styleName)
+ , n > 0 = Just (styleName, fromInteger n)
getHeaderLevel _ _ = Nothing
-blockQuoteStyleIds :: [String]
-blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"]
-
-blockQuoteStyleNames :: [String]
-blockQuoteStyleNames = ["Quote", "Block Text"]
-
-getBlockQuote :: NameSpaces -> Element -> Maybe Bool
-getBlockQuote ns element
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , styleId `elem` blockQuoteStyleIds = Just True
- | Just styleName <- findChildByName ns "w" "name" element >>=
- findAttrByName ns "w" "val"
- , styleName `elem` blockQuoteStyleNames = Just True
-getBlockQuote _ _ = Nothing
+getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
+getElementStyleName ns el = coerce <$>
+ ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+ <|> findAttrByName ns "w" "styleId" el)
getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
getNumInfo ns element = do
@@ -1167,15 +1252,19 @@ getNumInfo ns element = do
return (numId, lvl)
-elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
-elemToParStyleData ns element parentStyle =
- ParStyleData
+elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
+elemToParStyleData ns element parentStyle
+ | Just styleId <- findAttrByName ns "w" "styleId" element
+ , Just styleName <- getElementStyleName ns element
+ = Just $ ParStyle
{
headingLev = getHeaderLevel ns element
- , isBlockQuote = getBlockQuote ns element
, numInfo = getNumInfo ns element
- , psStyle = parentStyle
- }
+ , psParentStyle = parentStyle
+ , pStyleName = styleName
+ , pStyleId = ParaStyleId styleId
+ }
+elemToParStyleData _ _ _ = Nothing
elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 02db23db5..d62dbeedb 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -208,7 +208,7 @@ writeDocx opts doc@(Pandoc meta _) = do
let doc' = walk fixDisplayMath doc
username <- P.lookupEnv "USERNAME"
utctime <- P.getCurrentTime
- distArchive <- (toArchive . BL.fromStrict) <$> do
+ distArchive <- toArchive . BL.fromStrict <$> do
oldUserDataDir <- P.getUserDataDir
P.setUserDataDir Nothing
res <- P.readDefaultDataFile "reference.docx"
@@ -216,7 +216,7 @@ writeDocx opts doc@(Pandoc meta _) = do
return res
refArchive <- case writerReferenceDoc opts of
Just f -> toArchive <$> P.readFileLazy f
- Nothing -> (toArchive . BL.fromStrict) <$>
+ Nothing -> toArchive . BL.fromStrict <$>
P.readDataFile "reference.docx"
parsedDoc <- parseXml refArchive distArchive "word/document.xml"
@@ -237,7 +237,7 @@ writeDocx opts doc@(Pandoc meta _) = do
>>= subtrct mbAttrMarRight
>>= subtrct mbAttrMarLeft
where
- subtrct mbStr = \x -> mbStr >>= safeRead >>= (\y -> Just $ x - y)
+ subtrct mbStr x = mbStr >>= safeRead >>= (\y -> Just $ x - y)
-- styles
mblang <- toLang $ getLang opts meta
@@ -285,7 +285,7 @@ writeDocx opts doc@(Pandoc meta _) = do
envRTL = isRTLmeta
, envChangesAuthor = fromMaybe "unknown" username
, envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
- , envPrintWidth = maybe 420 (\x -> quot x 20) pgContentWidth
+ , envPrintWidth = maybe 420 (`quot` 20) pgContentWidth
}
@@ -366,7 +366,7 @@ writeDocx opts doc@(Pandoc meta _) = do
map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
"application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
map mkImageOverride imgs ++
- map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive
+ [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive
, "word/media/" `isPrefixOf` eRelativePath e ]
let defaultnodes = [mknode "Default"
@@ -589,8 +589,8 @@ writeDocx opts doc@(Pandoc meta _) = do
mapMaybe (fmap ("word/" ++) . extractTarget)
(headers ++ footers)
let miscRelEntries = [ e | e <- zEntries refArchive
- , "word/_rels/" `isPrefixOf` (eRelativePath e)
- , ".xml.rels" `isSuffixOf` (eRelativePath e)
+ , "word/_rels/" `isPrefixOf` eRelativePath e
+ , ".xml.rels" `isSuffixOf` eRelativePath e
, eRelativePath e /= "word/_rels/document.xml.rels"
, eRelativePath e /= "word/_rels/footnotes.xml.rels" ]
let otherMediaEntries = [ e | e <- zEntries refArchive
@@ -778,24 +778,24 @@ makeTOC opts = do
tocTitle <- gets stTocTitle
title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
return
- [mknode "w:sdt" [] ([
+ [mknode "w:sdt" [] [
mknode "w:sdtPr" [] (
- mknode "w:docPartObj" [] (
+ mknode "w:docPartObj" []
[mknode "w:docPartGallery" [("w:val","Table of Contents")] (),
mknode "w:docPartUnique" [] ()]
- ) -- w:docPartObj
+ -- w:docPartObj
), -- w:sdtPr
mknode "w:sdtContent" [] (title++[
mknode "w:p" [] (
- mknode "w:r" [] ([
+ mknode "w:r" [] [
mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (),
mknode "w:instrText" [("xml:space","preserve")] tocCmd,
mknode "w:fldChar" [("w:fldCharType","separate")] (),
mknode "w:fldChar" [("w:fldCharType","end")] ()
- ]) -- w:r
+ ] -- w:r
) -- w:p
])
- ])] -- w:sdt
+ ]] -- w:sdt
-- | Convert Pandoc document to two lists of
-- OpenXML elements (the main document and footnotes).
@@ -809,12 +809,12 @@ writeOpenXML opts (Pandoc meta blocks) = do
let includeTOC = writerTableOfContents opts || lookupMetaBool "toc" meta
title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
- authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $
+ authors <- withParaPropM (pStyleM "Author") $ blocksToOpenXML opts $
map Para auths
date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
abstract <- if null abstract'
then return []
- else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract'
+ else withParaPropM (pStyleM "Abstract") $ blocksToOpenXML opts abstract'
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
convertSpace xs = xs
@@ -848,18 +848,12 @@ writeOpenXML opts (Pandoc meta blocks) = do
blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
-pCustomStyle :: String -> Element
-pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
-
pStyleM :: (PandocMonad m) => String -> WS m XML.Element
pStyleM styleName = do
styleMaps <- gets stStyleMaps
let sty' = getStyleId styleName $ sParaStyleMap styleMaps
return $ mknode "w:pStyle" [("w:val",sty')] ()
-rCustomStyle :: String -> Element
-rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
-
rStyleM :: (PandocMonad m) => String -> WS m XML.Element
rStyleM styleName = do
styleMaps <- gets stStyleMaps
@@ -921,19 +915,19 @@ blockToOpenXML' opts (Header lev (ident,_,_) lst) = do
blockToOpenXML' opts (Plain lst) = do
isInTable <- gets stInTable
let block = blockToOpenXML opts (Para lst)
- para <- if isInTable then withParaProp (pCustomStyle "Compact") block else block
+ prop <- pStyleM "Compact"
+ para <- if isInTable then withParaProp prop block else block
return $ para
-
-- title beginning with fig: indicates that the image is a figure
blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
setFirstPara
- let prop = pCustomStyle $
+ prop <- pStyleM $
if null alt
then "Figure"
- else "CaptionedFigure"
+ else "Captioned Figure"
paraProps <- local (\env -> env { envParaProperties = EnvProps (Just prop) [] <> envParaProperties env }) (getParaProps False)
contents <- inlinesToOpenXML opts [Image attr alt (src,tit)]
- captionNode <- withParaProp (pCustomStyle "ImageCaption")
+ captionNode <- withParaPropM (pStyleM "Image Caption")
$ blockToOpenXML opts (Para alt)
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
blockToOpenXML' opts (Para lst)
@@ -944,10 +938,10 @@ blockToOpenXML' opts (Para lst)
[x] -> isDisplayMath x
_ -> False
paraProps <- getParaProps displayMathPara
- bodyTextStyle <- pStyleM "Body Text"
+ bodyTextStyle <- if isFirstPara
+ then pStyleM "First Paragraph"
+ else pStyleM "Body Text"
let paraProps' = case paraProps of
- [] | isFirstPara -> [mknode "w:pPr" []
- [pCustomStyle "FirstParagraph"]]
[] -> [mknode "w:pPr" [] [bodyTextStyle]]
ps -> ps
modify $ \s -> s { stFirstPara = False }
@@ -965,7 +959,7 @@ blockToOpenXML' opts (BlockQuote blocks) = do
setFirstPara
return p
blockToOpenXML' opts (CodeBlock attrs@(ident, _, _) str) = do
- p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str])
+ p <- withParaPropM (pStyleM "Source Code") (blockToOpenXML opts $ Para [Code attrs str])
setFirstPara
wrapBookmark ident p
blockToOpenXML' _ HorizontalRule = do
@@ -981,7 +975,7 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do
let captionStr = stringify caption
caption' <- if null caption
then return []
- else withParaProp (pCustomStyle "TableCaption")
+ else withParaPropM (pStyleM "Table Caption")
$ blockToOpenXML opts (Para caption)
let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
-- Table cells require a <w:p> element, even an empty one!
@@ -997,7 +991,8 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do
[ mknode "w:tcBorders" []
$ mknode "w:bottom" [("w:val","single")] ()
, mknode "w:vAlign" [("w:val","bottom")] () ]
- let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] [pCustomStyle "Compact"]]]
+ compactStyle <- pStyleM "Compact"
+ let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
let mkcell border contents = mknode "w:tc" []
$ [ borderProps | border ] ++
if null contents
@@ -1030,20 +1025,17 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do
: [ mkrow True headers' | hasHeader ] ++
map (mkrow False) rows'
)]
-blockToOpenXML' opts (BulletList lst) = do
- let marker = BulletMarker
- addList marker
- numid <- getNumId
- l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
- setFirstPara
- return l
-blockToOpenXML' opts (OrderedList (start, numstyle, numdelim) lst) = do
- let marker = NumberMarker numstyle numdelim start
- addList marker
- numid <- getNumId
- l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
- setFirstPara
- return l
+blockToOpenXML' opts el
+ | BulletList lst <- el = addOpenXMLList BulletMarker lst
+ | OrderedList (start, numstyle, numdelim) lst <- el
+ = addOpenXMLList (NumberMarker numstyle numdelim start) lst
+ where
+ addOpenXMLList marker lst = do
+ addList marker
+ numid <- getNumId
+ l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
+ setFirstPara
+ return l
blockToOpenXML' opts (DefinitionList items) = do
l <- concat `fmap` mapM (definitionListItemToOpenXML opts) items
setFirstPara
@@ -1051,9 +1043,9 @@ blockToOpenXML' opts (DefinitionList items) = do
definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element]
definitionListItemToOpenXML opts (term,defs) = do
- term' <- withParaProp (pCustomStyle "DefinitionTerm")
+ term' <- withParaPropM (pStyleM "Definition Term")
$ blockToOpenXML opts (Para term)
- defs' <- withParaProp (pCustomStyle "Definition")
+ defs' <- withParaPropM (pStyleM "Definition")
$ concat `fmap` mapM (blocksToOpenXML opts) defs
return $ term' ++ defs'
@@ -1159,7 +1151,7 @@ inlineToOpenXML' _ (Str str) =
formattedString str
inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ")
inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ")
-inlineToOpenXML' opts (Span (_,["underline"],_) ils) = do
+inlineToOpenXML' opts (Span (_,["underline"],_) ils) =
withTextProp (mknode "w:u" [("w:val","single")] ()) $
inlinesToOpenXML opts ils
inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
@@ -1192,18 +1184,21 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
Just "rtl" -> local (\env -> env { envRTL = True })
Just "ltr" -> local (\env -> env { envRTL = False })
_ -> id
- let off x = withTextProp (mknode x [("w:val","0")] ())
- let pmod = (if "csl-no-emph" `elem` classes then off "w:i" else id) .
+ off x = withTextProp (mknode x [("w:val","0")] ())
+ pmod = (if "csl-no-emph" `elem` classes then off "w:i" else id) .
(if "csl-no-strong" `elem` classes then off "w:b" else id) .
(if "csl-no-smallcaps" `elem` classes
then off "w:smallCaps"
else id)
+ getChangeAuthorDate = do
+ defaultAuthor <- asks envChangesAuthor
+ defaultDate <- asks envChangesDate
+ let author = fromMaybe defaultAuthor (lookup "author" kvs)
+ date = fromMaybe defaultDate (lookup "date" kvs)
+ return (author, date)
insmod <- if "insertion" `elem` classes
then do
- defaultAuthor <- asks envChangesAuthor
- defaultDate <- asks envChangesDate
- let author = fromMaybe defaultAuthor (lookup "author" kvs)
- date = fromMaybe defaultDate (lookup "date" kvs)
+ (author, date) <- getChangeAuthorDate
insId <- gets stInsId
modify $ \s -> s{stInsId = insId + 1}
return $ \f -> do
@@ -1215,10 +1210,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
else return id
delmod <- if "deletion" `elem` classes
then do
- defaultAuthor <- asks envChangesAuthor
- defaultDate <- asks envChangesDate
- let author = fromMaybe defaultAuthor (lookup "author" kvs)
- date = fromMaybe defaultDate (lookup "date" kvs)
+ (author, date) <- getChangeAuthorDate
delId <- gets stDelId
modify $ \s -> s{stDelId = delId + 1}
return $ \f -> local (\env->env{envInDel=True}) $ do
@@ -1266,14 +1258,17 @@ inlineToOpenXML' opts (Math mathType str) = do
Left il -> inlineToOpenXML' opts il
inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
inlineToOpenXML' opts (Code attrs str) = do
+ let alltoktypes = [KeywordTok ..]
+ tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (show tt)) alltoktypes
let unhighlighted = intercalate [br] `fmap`
mapM formattedString (lines str)
formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
- toHlTok (toktype,tok) = mknode "w:r" []
- [ mknode "w:rPr" []
- [ rCustomStyle (show toktype) ]
- , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ]
- withTextProp (rCustomStyle "VerbatimChar")
+ toHlTok (toktype,tok) =
+ mknode "w:r" []
+ [ mknode "w:rPr" [] $
+ maybeToList (lookup toktype tokTypesMap)
+ , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ]
+ withTextPropM (rStyleM "Verbatim Char")
$ if isNothing (writerHighlightStyle opts)
then unhighlighted
else case highlight (writerSyntaxMap opts)
@@ -1431,12 +1426,12 @@ defaultFootnotes :: [Element]
defaultFootnotes = [ mknode "w:footnote"
[("w:type", "separator"), ("w:id", "-1")]
[ mknode "w:p" []
- [mknode "w:r" [] $
+ [mknode "w:r" []
[ mknode "w:separator" [] ()]]]
, mknode "w:footnote"
[("w:type", "continuationSeparator"), ("w:id", "0")]
[ mknode "w:p" []
- [ mknode "w:r" [] $
+ [ mknode "w:r" []
[ mknode "w:continuationSeparator" [] ()]]]]
diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs
index 9d0913e55..583a6ec18 100644
--- a/test/Tests/Readers/Docx.hs
+++ b/test/Tests/Readers/Docx.hs
@@ -256,6 +256,10 @@ tests = [ testGroup "document"
"docx/lists.docx"
"docx/lists.native"
, testCompare
+ "compact lists"
+ "docx/lists-compact.docx"
+ "docx/lists-compact.native"
+ , testCompare
"lists with level overrides"
"docx/lists_level_override.docx"
"docx/lists_level_override.native"
@@ -425,6 +429,11 @@ tests = [ testGroup "document"
"custom styles (`+styles`) enabled"
"docx/custom-style-reference.docx"
"docx/custom-style-with-styles.native"
+ , testCompareWithOpts
+ def{readerExtensions=extensionsFromList [Ext_styles]}
+ "custom styles (`+styles`): Compact style is removed from output"
+ "docx/compact-style-removal.docx"
+ "docx/compact-style-removal.native"
]
, testGroup "metadata"
[ testCompareWithOpts def{readerStandalone=True}
diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native
index 804ad8732..6d8269b21 100644
--- a/test/docx/0_level_headers.native
+++ b/test/docx/0_level_headers.native
@@ -15,10 +15,10 @@
,Para [Str "FIGURES",Space,Str "iv"]
,Para [Str "TABLES",Space,Str "v"]
,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Str "2"]
-,Header 1 ("figures",["Heading0"],[]) [Str "FIGURES"]
+,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"]
,Para [Strong [Str "Figure",Space,Str "Page"]]
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
-,Header 1 ("tables",["Heading0"],[]) [Str "TABLES"]
+,Header 1 ("tables",["Heading-0"],[]) [Str "TABLES"]
,Para [Strong [Str "Table",Space,Str "Page"]]
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
,Header 1 ("introduction",[],[]) [Str "Introduction"]
diff --git a/test/docx/adjacent_codeblocks.docx b/test/docx/adjacent_codeblocks.docx
index d61fb45d5..0fd44a183 100644
--- a/test/docx/adjacent_codeblocks.docx
+++ b/test/docx/adjacent_codeblocks.docx
Binary files differ
diff --git a/test/docx/compact-style-removal.docx b/test/docx/compact-style-removal.docx
new file mode 100644
index 000000000..fde0064db
--- /dev/null
+++ b/test/docx/compact-style-removal.docx
Binary files differ
diff --git a/test/docx/compact-style-removal.native b/test/docx/compact-style-removal.native
new file mode 100644
index 000000000..340878ba0
--- /dev/null
+++ b/test/docx/compact-style-removal.native
@@ -0,0 +1,5 @@
+[OrderedList (1,Decimal,Period)
+ [[Plain [Str "One"]]
+ ,[Plain [Str "Two"]]
+ ,[Plain [Str "Three"]]
+ ,[Plain [Str "Four"]]]]
diff --git a/test/docx/custom-style-with-styles.native b/test/docx/custom-style-with-styles.native
index 61f11911d..1ad7d88cc 100644
--- a/test/docx/custom-style-with-styles.native
+++ b/test/docx/custom-style-with-styles.native
@@ -1,7 +1,7 @@
-[Div ("",[],[("custom-style","FirstParagraph")])
+[Div ("",[],[("custom-style","First Paragraph")])
[Para [Str "This",Space,Str "is",Space,Str "some",Space,Str "text."]]
-,Div ("",[],[("custom-style","BodyText")])
+,Div ("",[],[("custom-style","Body Text")])
[Para [Str "This",Space,Str "is",Space,Str "text",Space,Str "with",Space,Str "an",Space,Span ("",[],[("custom-style","Emphatic")]) [Str "emphasized"],Space,Str "text",Space,Str "style.",Space,Str "And",Space,Str "this",Space,Str "is",Space,Str "text",Space,Str "with",Space,Str "a",Space,Span ("",[],[("custom-style","Strengthened")]) [Str "strengthened"],Space,Str "text",Space,Str "style."]]
-,Div ("",[],[("custom-style","MyBlockStyle")])
+,Div ("",[],[("custom-style","My Block Style")])
[BlockQuote
[Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "styled",Space,Str "paragraph",Space,Str "that",Space,Str "inherits",Space,Str "from",Space,Str "Block",Space,Str "Text."]]]]
diff --git a/test/docx/lists-compact.docx b/test/docx/lists-compact.docx
new file mode 100644
index 000000000..d7f9e4a06
--- /dev/null
+++ b/test/docx/lists-compact.docx
Binary files differ
diff --git a/test/docx/lists-compact.native b/test/docx/lists-compact.native
new file mode 100644
index 000000000..340878ba0
--- /dev/null
+++ b/test/docx/lists-compact.native
@@ -0,0 +1,5 @@
+[OrderedList (1,Decimal,Period)
+ [[Plain [Str "One"]]
+ ,[Plain [Str "Two"]]
+ ,[Plain [Str "Three"]]
+ ,[Plain [Str "Four"]]]]
diff --git a/test/docx/lists.docx b/test/docx/lists.docx
index 8b46351d9..356dc1ea9 100644
--- a/test/docx/lists.docx
+++ b/test/docx/lists.docx
Binary files differ
diff --git a/test/docx/lists.native b/test/docx/lists.native
index af922b335..1192da709 100644
--- a/test/docx/lists.native
+++ b/test/docx/lists.native
@@ -15,4 +15,4 @@
,Para [Str "Sub",Space,Str "paragraph"]]]]]]
,[Para [Str "Same",Space,Str "list"]]]
,BulletList
- [[Para [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]]
+ [[Plain [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]]
diff --git a/test/docx/nested_anchors_in_header.native b/test/docx/nested_anchors_in_header.native
index 562f60215..314b31663 100644
--- a/test/docx/nested_anchors_in_header.native
+++ b/test/docx/nested_anchors_in_header.native
@@ -1,4 +1,4 @@
-[Header 1 ("\1086\1075\1083\1072\1074\1083\1077\1085\1080\1077",["TOCHeading"],[]) [Str "\1054\1075\1083\1072\1074\1083\1077\1085\1080\1077"]
+[Header 1 ("\1086\1075\1083\1072\1074\1083\1077\1085\1080\1077",["TOC-Heading"],[]) [Str "\1054\1075\1083\1072\1074\1083\1077\1085\1080\1077"]
,Para [Link ("",[],[]) [Str "Short",Space,Str "instructions",Space,Str "1"] ("#short-instructions","")]
,Para [Link ("",[],[]) [Str "Some",Space,Str "instructions",Space,Str "1"] ("#some-instructions","")]
,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "opening",Space,Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")]