aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx.hs')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs162
1 files changed, 84 insertions, 78 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index a26986af2..9d17ab118 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -3,6 +3,7 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE FlexibleContexts #-}
{- |
Module : Text.Pandoc.Readers.Docx
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -65,6 +66,7 @@ import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
+import Data.Char (isSpace)
import qualified Data.Map as M
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
@@ -133,13 +135,13 @@ evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
evalDocxContext ctx env st = flip evalStateT st $ runReaderT ctx env
-- This is empty, but we put it in for future-proofing.
-spansToKeep :: [String]
+spansToKeep :: [CharStyleName]
spansToKeep = []
-divsToKeep :: [String]
-divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
+divsToKeep :: [ParaStyleName]
+divsToKeep = ["Definition", "Definition Term"]
-metaStyles :: M.Map String String
+metaStyles :: M.Map ParaStyleName String
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
@@ -151,7 +153,7 @@ sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)
isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
- not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
+ not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles)
isMetaPar _ = False
isEmptyPar :: BodyPart -> Bool
@@ -168,7 +170,7 @@ bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String M
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
- , (c : _)<- (pStyle pPr) `intersect` (M.keys metaStyles)
+ , (c : _)<- getStyleNames (pStyle pPr) `intersect` M.keys metaStyles
, (Just metaField) <- M.lookup c metaStyles = do
inlines <- smushInlines <$> mapM parPartToInlines parParts
remaining <- bodyPartsToMeta' bps
@@ -198,11 +200,29 @@ fixAuthors (MetaBlocks blks) =
g _ = MetaInlines []
fixAuthors mv = mv
-codeStyles :: [String]
-codeStyles = ["VerbatimChar"]
+isInheritedFromStyles :: (Eq (StyleName s), HasStyleName s, HasParentStyle s) => [StyleName s] -> s -> Bool
+isInheritedFromStyles names sty
+ | getStyleName sty `elem` names = True
+ | Just psty <- getParentStyle sty = isInheritedFromStyles names psty
+ | otherwise = False
-codeDivs :: [String]
-codeDivs = ["SourceCode"]
+hasStylesInheritedFrom :: [ParaStyleName] -> ParagraphStyle -> Bool
+hasStylesInheritedFrom ns s = any (isInheritedFromStyles ns) $ pStyle s
+
+removeStyleNamed :: ParaStyleName -> ParagraphStyle -> ParagraphStyle
+removeStyleNamed sn ps = ps{pStyle = filter (\psd -> getStyleName psd /= sn) $ pStyle ps}
+
+isCodeCharStyle :: CharStyle -> Bool
+isCodeCharStyle = isInheritedFromStyles ["Verbatim Char"]
+
+isCodeDiv :: ParagraphStyle -> Bool
+isCodeDiv = hasStylesInheritedFrom ["Source Code"]
+
+isBlockQuote :: ParStyle -> Bool
+isBlockQuote =
+ isInheritedFromStyles [
+ "Quote", "Block Text", "Block Quote", "Block Quotation"
+ ]
runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
@@ -228,57 +248,31 @@ parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
parPartToString _ = ""
-blacklistedCharStyles :: [String]
+blacklistedCharStyles :: [CharStyleName]
blacklistedCharStyles = ["Hyperlink"]
resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle
resolveDependentRunStyle rPr
- | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles =
+ | Just s <- rParentStyle rPr
+ , getStyleName s `elem` blacklistedCharStyles =
return rPr
- | Just (_, cs) <- rStyle rPr = do
+ | Just s <- rParentStyle rPr = do
opts <- asks docxOptions
if isEnabled Ext_styles opts
then return rPr
- else do rPr' <- resolveDependentRunStyle cs
- return $
- RunStyle { isBold = case isBold rPr of
- Just bool -> Just bool
- Nothing -> isBold rPr'
- , isItalic = case isItalic rPr of
- Just bool -> Just bool
- Nothing -> isItalic rPr'
- , isSmallCaps = case isSmallCaps rPr of
- Just bool -> Just bool
- Nothing -> isSmallCaps rPr'
- , isStrike = case isStrike rPr of
- Just bool -> Just bool
- Nothing -> isStrike rPr'
- , isRTL = case isRTL rPr of
- Just bool -> Just bool
- Nothing -> isRTL rPr'
- , rVertAlign = case rVertAlign rPr of
- Just valign -> Just valign
- Nothing -> rVertAlign rPr'
- , rUnderline = case rUnderline rPr of
- Just ulstyle -> Just ulstyle
- Nothing -> rUnderline rPr'
- , rStyle = rStyle rPr
- }
+ else leftBiasedMergeRunStyle rPr <$> resolveDependentRunStyle (cStyleData s)
| otherwise = return rPr
runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
runStyleToTransform rPr
- | Just (s, _) <- rStyle rPr
- , s `elem` spansToKeep = do
- transform <- runStyleToTransform rPr{rStyle = Nothing}
- return $ spanWith ("", [s], []) . transform
- | Just (s, _) <- rStyle rPr = do
- opts <- asks docxOptions
- let extraInfo = if isEnabled Ext_styles opts
- then spanWith ("", [], [("custom-style", s)])
- else id
- transform <- runStyleToTransform rPr{rStyle = Nothing}
- return $ extraInfo . transform
+ | Just sn <- getStyleName <$> rParentStyle rPr
+ , sn `elem` spansToKeep = do
+ transform <- runStyleToTransform rPr{rParentStyle = Nothing}
+ return $ spanWith ("", [normalizeToClassName sn], []) . transform
+ | Just s <- rParentStyle rPr = do
+ ei <- extraInfo spanWith s
+ transform <- runStyleToTransform rPr{rParentStyle = Nothing}
+ return $ ei . transform
| Just True <- isItalic rPr = do
transform <- runStyleToTransform rPr{isItalic = Nothing}
return $ emph . transform
@@ -310,8 +304,7 @@ runStyleToTransform rPr
runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
- | Just (s, _) <- rStyle rs
- , s `elem` codeStyles = do
+ | maybe False isCodeCharStyle $ rParentStyle rs = do
rPr <- resolveDependentRunStyle rs
let codeString = code $ concatMap runElemToString runElems
return $ case rVertAlign rPr of
@@ -526,39 +519,49 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
isSp LineBreak = True
isSp _ = False
+extraInfo :: (Eq (StyleName a), PandocMonad m, HasStyleName a)
+ => (Attr -> i -> i) -> a -> DocxContext m (i -> i)
+extraInfo f s = do
+ opts <- asks docxOptions
+ return $ if | isEnabled Ext_styles opts
+ -> f ("", [], [("custom-style", fromStyleName $ getStyleName s)])
+ | otherwise -> id
+
parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
parStyleToTransform pPr
| (c:cs) <- pStyle pPr
- , c `elem` divsToKeep = do
+ , getStyleName c `elem` divsToKeep = do
let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
- return $ divWith ("", [c], []) . transform
+ return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform
| (c:cs) <- pStyle pPr,
- c `elem` listParagraphDivs = do
+ getStyleName c `elem` listParagraphStyles = do
let pPr' = pPr { pStyle = cs, indentation = Nothing}
transform <- parStyleToTransform pPr'
- return $ divWith ("", [c], []) . transform
+ return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform
| (c:cs) <- pStyle pPr = do
- opts <- asks docxOptions
- let pPr' = pPr { pStyle = cs}
+ let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
- let extraInfo = if isEnabled Ext_styles opts
- then divWith ("", [], [("custom-style", c)])
- else id
- return $ extraInfo . (if fromMaybe False (pBlockQuote pPr) then blockQuote else id) . transform
+ ei <- extraInfo divWith c
+ return $ ei . (if isBlockQuote c then blockQuote else id) . transform
| null (pStyle pPr)
, Just left <- indentation pPr >>= leftParIndent = do
let pPr' = pPr { indentation = Nothing }
hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent
transform <- parStyleToTransform pPr'
- return $ if (left - hang) > 0
+ return $ if (left - hang) > 0
then blockQuote . transform
else transform
parStyleToTransform _ = return id
+normalizeToClassName :: (FromStyleName a) => a -> String
+normalizeToClassName = map go . fromStyleName
+ where go c | isSpace c = '-'
+ | otherwise = c
+
bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
- | not $ null $ codeDivs `intersect` (pStyle pPr) = do
+ | isCodeDiv pPr = do
transform <- parStyleToTransform pPr
return $
transform $
@@ -568,13 +571,16 @@ bodyPartToBlocks (Paragraph pPr parparts)
ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
- headerWith ("", delete style (pStyle pPr), []) n ils
+ headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils
| otherwise = do
ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts
prevParaIls <- gets docxPrevPara
dropIls <- gets docxDropCap
let ils' = dropIls <> ils
- if dropCap pPr
+ let (paraOrPlain, pPr')
+ | hasStylesInheritedFrom ["Compact"] pPr = (plain, removeStyleNamed "Compact" pPr)
+ | otherwise = (para, pPr)
+ if dropCap pPr'
then do modify $ \s -> s { docxDropCap = ils' }
return mempty
else do modify $ \s -> s { docxDropCap = mempty }
@@ -583,41 +589,41 @@ bodyPartToBlocks (Paragraph pPr parparts)
ils'
handleInsertion = do
modify $ \s -> s {docxPrevPara = mempty}
- transform <- parStyleToTransform pPr
- return $ transform $ para ils''
+ transform <- parStyleToTransform pPr'
+ return $ transform $ paraOrPlain ils''
opts <- asks docxOptions
if | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) ->
return mempty
- | Just (TrackedChange Insertion _) <- pChange pPr
+ | Just (TrackedChange Insertion _) <- pChange pPr'
, AcceptChanges <- readerTrackChanges opts ->
handleInsertion
- | Just (TrackedChange Insertion _) <- pChange pPr
+ | Just (TrackedChange Insertion _) <- pChange pPr'
, RejectChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
- | Just (TrackedChange Insertion cInfo) <- pChange pPr
+ | Just (TrackedChange Insertion cInfo) <- pChange pPr'
, AllChanges <- readerTrackChanges opts
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
- transform <- parStyleToTransform pPr
+ transform <- parStyleToTransform pPr'
return $ transform $
- para $ ils'' <> insertMark
- | Just (TrackedChange Deletion _) <- pChange pPr
+ paraOrPlain $ ils'' <> insertMark
+ | Just (TrackedChange Deletion _) <- pChange pPr'
, AcceptChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
- | Just (TrackedChange Deletion _) <- pChange pPr
+ | Just (TrackedChange Deletion _) <- pChange pPr'
, RejectChanges <- readerTrackChanges opts ->
handleInsertion
- | Just (TrackedChange Deletion cInfo) <- pChange pPr
+ | Just (TrackedChange Deletion cInfo) <- pChange pPr'
, AllChanges <- readerTrackChanges opts
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
- transform <- parStyleToTransform pPr
+ transform <- parStyleToTransform pPr'
return $ transform $
- para $ ils'' <> insertMark
+ paraOrPlain $ ils'' <> insertMark
| otherwise -> handleInsertion
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
-- We check whether this current numId has previously been used,
@@ -638,7 +644,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
blks <- bodyPartToBlocks (Paragraph pPr parparts)
return $ divWith ("", ["list-item"], kvs) blks
bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
- let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr}
+ let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
in
bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Tbl _ _ _ []) =