aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx.hs')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs76
1 files changed, 39 insertions, 37 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 387c3c7e2..cd4ff01db 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -4,6 +4,7 @@
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Docx
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -68,12 +69,12 @@ import Data.Default (Default)
import Data.List (delete, intersect)
import Data.Char (isSpace)
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import Text.Pandoc.Builder
--- import Text.Pandoc.Definition
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.Options
import Text.Pandoc.Readers.Docx.Combine
@@ -101,14 +102,14 @@ readDocx opts bytes
readDocx _ _ =
throwError $ PandocSomeError "couldn't parse docx file"
-data DState = DState { docxAnchorMap :: M.Map String String
- , docxAnchorSet :: Set.Set String
- , docxImmedPrevAnchor :: Maybe String
+data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
+ , docxAnchorSet :: Set.Set T.Text
+ , docxImmedPrevAnchor :: Maybe T.Text
, docxMediaBag :: MediaBag
, docxDropCap :: Inlines
-- keep track of (numId, lvl) values for
-- restarting
- , docxListState :: M.Map (String, String) Integer
+ , docxListState :: M.Map (T.Text, T.Text) Integer
, docxPrevPara :: Inlines
}
@@ -142,7 +143,7 @@ spansToKeep = []
divsToKeep :: [ParaStyleName]
divsToKeep = ["Definition", "Definition Term"]
-metaStyles :: M.Map ParaStyleName String
+metaStyles :: M.Map ParaStyleName T.Text
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
@@ -167,7 +168,7 @@ isEmptyPar (Paragraph _ parParts) =
isEmptyElem _ = True
isEmptyPar _ = False
-bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue)
+bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
@@ -232,22 +233,22 @@ runElemToInlines Tab = space
runElemToInlines SoftHyphen = text "\xad"
runElemToInlines NoBreakHyphen = text "\x2011"
-runElemToString :: RunElem -> String
-runElemToString (TextRun s) = s
-runElemToString LnBrk = ['\n']
-runElemToString Tab = ['\t']
-runElemToString SoftHyphen = ['\xad']
-runElemToString NoBreakHyphen = ['\x2011']
+runElemToText :: RunElem -> T.Text
+runElemToText (TextRun s) = s
+runElemToText LnBrk = T.singleton '\n'
+runElemToText Tab = T.singleton '\t'
+runElemToText SoftHyphen = T.singleton '\xad'
+runElemToText NoBreakHyphen = T.singleton '\x2011'
-runToString :: Run -> String
-runToString (Run _ runElems) = concatMap runElemToString runElems
-runToString _ = ""
+runToText :: Run -> T.Text
+runToText (Run _ runElems) = T.concat $ map runElemToText runElems
+runToText _ = ""
-parPartToString :: ParPart -> String
-parPartToString (PlainRun run) = runToString run
-parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
-parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
-parPartToString _ = ""
+parPartToText :: ParPart -> T.Text
+parPartToText (PlainRun run) = runToText run
+parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs
+parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs
+parPartToText _ = ""
blacklistedCharStyles :: [CharStyleName]
blacklistedCharStyles = ["Hyperlink"]
@@ -310,7 +311,7 @@ runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
| maybe False isCodeCharStyle $ rParentStyle rs = do
rPr <- resolveDependentRunStyle rs
- let codeString = code $ concatMap runElemToString runElems
+ let codeString = code $ T.concat $ map runElemToText runElems
return $ case rVertAlign rPr of
Just SupScrpt -> superscript codeString
Just SubScrpt -> subscript codeString
@@ -328,17 +329,17 @@ runToInlines (Endnote bps) = do
return $ note blksList
runToInlines (InlineDrawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
- return $ imageWith (extentToAttr ext) fp title $ text alt
+ return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
extentToAttr :: Extent -> Attr
extentToAttr (Just (w, h)) =
("", [], [("width", showDim w), ("height", showDim h)] )
where
- showDim d = show (d / 914400) ++ "in"
+ showDim d = tshow (d / 914400) <> "in"
extentToAttr _ = nullAttr
-blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines
+blocksToInlinesWarn :: PandocMonad m => T.Text -> Blocks -> DocxContext m Inlines
blocksToInlinesWarn cmtId blks = do
let blkList = toList blks
notParaOrPlain :: Block -> Bool
@@ -347,7 +348,7 @@ blocksToInlinesWarn cmtId blks = do
notParaOrPlain _ = True
unless ( not (any notParaOrPlain blkList)) $
lift $ P.report $ DocxParserWarning $
- "Docx comment " ++ cmtId ++ " will not retain formatting"
+ "Docx comment " <> cmtId <> " will not retain formatting"
return $ blocksToInlines' blkList
-- The majority of work in this function is done in the primed
@@ -440,12 +441,12 @@ parPartToInlines' (BookMark _ anchor) =
return $ spanWith (newAnchor, ["anchor"], []) mempty
parPartToInlines' (Drawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
- return $ imageWith (extentToAttr ext) fp title $ text alt
+ return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
parPartToInlines' Chart =
return $ spanWith ("", ["chart"], []) $ text "[CHART]"
parPartToInlines' (InternalHyperLink anchor runs) = do
ils <- smushInlines <$> mapM runToInlines runs
- return $ link ('#' : anchor) "" ils
+ return $ link ("#" <> anchor) "" ils
parPartToInlines' (ExternalHyperLink target runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link target "" ils
@@ -463,7 +464,7 @@ isAnchorSpan (Span (_, classes, kvs) _) =
null kvs
isAnchorSpan _ = False
-dummyAnchors :: [String]
+dummyAnchors :: [T.Text]
dummyAnchors = ["_GoBack"]
makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
@@ -477,7 +478,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
, (Span (anchIdent, ["anchor"], _) cIls) <- c = do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
- let newIdent = if null ident
+ let newIdent = if T.null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
newIls = concatMap f ils where f il | il == c = cIls
@@ -490,7 +491,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
- let newIdent = if null ident
+ let newIdent = if T.null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
@@ -558,8 +559,8 @@ parStyleToTransform pPr
else transform
parStyleToTransform _ = return id
-normalizeToClassName :: (FromStyleName a) => a -> String
-normalizeToClassName = map go . fromStyleName
+normalizeToClassName :: (FromStyleName a) => a -> T.Text
+normalizeToClassName = T.map go . fromStyleName
where go c | isSpace c = '-'
| otherwise = c
@@ -574,7 +575,8 @@ bodyPartToBlocks (Paragraph pPr parparts)
return $
transform $
codeBlock $
- concatMap parPartToString parparts
+ T.concat $
+ map parPartToText parparts
| Just (style, n) <- pHeading pPr = do
ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
@@ -646,7 +648,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
, ("num-id", numId)
, ("format", fmt)
, ("text", txt)
- , ("start", show start)
+ , ("start", tshow start)
]
modify $ \st -> st{ docxListState =
-- expire all the continuation data for lists of level > this one:
@@ -705,12 +707,12 @@ bodyPartToBlocks (OMathPara e) =
-- replace targets with generated anchors.
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
-rewriteLink' l@(Link attr ils ('#':target, title)) = do
+rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
anchorMap <- gets docxAnchorMap
case M.lookup target anchorMap of
Just newTarget -> do
modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)}
- return $ Link attr ils ('#':newTarget, title)
+ return $ Link attr ils ("#" <> newTarget, title)
Nothing -> do
modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)}
return l