diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 77 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 31 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 191 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 9 |
4 files changed, 207 insertions, 101 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 71baa5dde..61c17156e 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -110,6 +110,11 @@ data DEnv = DEnv { docxOptions :: ReaderOptions type DocxContext = ReaderT DEnv (State DState) +updateDState :: (DState -> DState) -> DocxContext () +updateDState f = do + st <- get + put $ f st + evalDocxContext :: DocxContext a -> DEnv -> DState -> a evalDocxContext ctx env st = evalState (runReaderT ctx env) st @@ -148,42 +153,48 @@ runStyleToContainers rPr = in classContainers ++ formatters - -divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] -divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c = - [Container $ \_ -> - Header n ("", delete ("Heading" ++ show n) cs, []) []] -divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = - (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) -divAttrToContainers (c:cs) kvs | c `elem` codeDivs = +parStyleToContainers :: ParagraphStyle -> [Container Block] +parStyleToContainers pPr | (c:cs) <- pStyle pPr, Just n <- isHeaderClass c = + [Container $ \_ -> Header n ("", delete ("Heading" ++ show n) cs, []) []] +parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` divsToKeep = + let pPr' = pPr { pStyle = cs } + in + (Container $ Div ("", [c], [])) : (parStyleToContainers pPr') +parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` codeDivs = -- This is a bit of a cludge. We make the codeblock from the raw -- parparts in bodyPartToBlocks. But we need something to match against. - (Container $ \_ -> CodeBlock ("", [], []) "") : (divAttrToContainers cs kvs) -divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = - let kvs' = filter (\(k,_) -> k /= "indent") kvs + let pPr' = pPr { pStyle = cs } in - (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs') -divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = - (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) -divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs -divAttrToContainers [] kvs | Just numString <- lookup "indent" kvs = - let kvs' = filter (\(k,_) -> k /= "indent") kvs + (Container $ \_ -> CodeBlock ("", [], []) "") : (parStyleToContainers pPr') +parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` listParagraphDivs = + let pPr' = pPr { pStyle = cs, indentation = Nothing} in - case numString of - "0" -> divAttrToContainers [] kvs' - ('-' : _) -> divAttrToContainers [] kvs' - _ -> (Container BlockQuote) : divAttrToContainers [] kvs' -divAttrToContainers _ _ = [] - + (Container $ Div ("", [c], [])) : (parStyleToContainers pPr') -parStyleToContainers :: ParagraphStyle -> [Container Block] -parStyleToContainers pPr = - let classes = pStyle pPr - kvs = case indent pPr of - Just n -> [("indent", show n)] - Nothing -> [] +parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` blockQuoteDivs = + let pPr' = pPr { pStyle = cs \\ blockQuoteDivs } + in + (Container BlockQuote) : (parStyleToContainers pPr') +parStyleToContainers pPr | (_:cs) <- pStyle pPr = + let pPr' = pPr { pStyle = cs} + in + parStyleToContainers pPr' +parStyleToContainers pPr | null (pStyle pPr), + Just left <- indentation pPr >>= leftParIndent, + Just hang <- indentation pPr >>= hangingParIndent = + let pPr' = pPr { indentation = Nothing } + in + case (left - hang) > 0 of + True -> (Container BlockQuote) : (parStyleToContainers pPr') + False -> parStyleToContainers pPr' +parStyleToContainers pPr | null (pStyle pPr), + Just left <- indentation pPr >>= leftParIndent = + let pPr' = pPr { indentation = Nothing } in - divAttrToContainers classes kvs + case left > 0 of + True -> (Container BlockQuote) : (parStyleToContainers pPr') + False -> parStyleToContainers pPr' +parStyleToContainers _ = [] strToInlines :: String -> [Inline] @@ -289,7 +300,7 @@ parPartToInlines (BookMark _ anchor) = let newAnchor = case anchor `elem` (M.elems anchorMap) of True -> uniqueIdent [Str anchor] (M.elems anchorMap) False -> anchor - put DState{ docxAnchorMap = M.insert anchor newAnchor anchorMap} + updateDState $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap} return [Span (anchor, ["anchor"], []) []] parPartToInlines (Drawing relid) = do (Docx _ _ _ rels _) <- asks docxDocument @@ -329,7 +340,7 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) do hdrIDMap <- gets docxAnchorMap let newIdent = uniqueIdent ils (M.elems hdrIDMap) - put DState{docxAnchorMap = M.insert ident newIdent hdrIDMap} + updateDState $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs)) -- Otherwise we just give it a name, and register that name (associate -- it with itself.) @@ -337,7 +348,7 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap let newIdent = uniqueIdent ils (M.elems hdrIDMap) - put DState{docxAnchorMap = M.insert newIdent newIdent hdrIDMap} + updateDState $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) ils makeHeaderAnchor blk = return blk diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 07f34450d..537c5c272 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -42,6 +42,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Relationship , Media , RunStyle(..) + , ParIndentation(..) , ParagraphStyle(..) , Row(..) , Cell(..) @@ -341,16 +342,37 @@ testBitMask bitMaskS n = [] -> False ((n', _) : _) -> ((n' .|. n) /= 0) +data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer + , rightParIndent :: Maybe Integer + , hangingParIndent :: Maybe Integer} + deriving Show + data ParagraphStyle = ParagraphStyle { pStyle :: [String] - , indent :: Maybe Integer + , indentation :: Maybe ParIndentation } deriving Show defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] - , indent = Nothing + , indentation = Nothing } +elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation +elemToParIndentation ns element + | qName (elName element) == "ind" && + qURI (elName element) == (lookup "w" ns) = + Just $ ParIndentation { + leftParIndent = + findAttr (QName "left" (lookup "w" ns) (Just "w")) element >>= + stringToInteger + , rightParIndent = + findAttr (QName "right" (lookup "w" ns) (Just "w")) element >>= + stringToInteger + , hangingParIndent = + findAttr (QName "hanging" (lookup "w" ns) (Just "w")) element >>= + stringToInteger} +elemToParIndentation _ _ = Nothing + elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle elemToParagraphStyle ns element = case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of @@ -360,10 +382,9 @@ elemToParagraphStyle ns element = mapMaybe (findAttr (QName "val" (lookup "w" ns) (Just "w"))) (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) - , indent = + , indentation = findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= - findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= - stringToInteger + elemToParIndentation ns } Nothing -> defaultParagraphStyle diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 5b0d9b6b4..4a536330d 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -55,6 +55,8 @@ module Text.Pandoc.Shared ( normalizeSpaces, extractSpaces, normalize, + normalizeInlines, + normalizeBlocks, stringify, compactify, compactify', @@ -84,7 +86,6 @@ module Text.Pandoc.Shared ( import Text.Pandoc.Definition import Text.Pandoc.Walk -import Text.Pandoc.Generic import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 @@ -350,72 +351,142 @@ extractSpaces f is = -- | Normalize @Pandoc@ document, consolidating doubled 'Space's, -- combining adjacent 'Str's and 'Emph's, remove 'Null's and -- empty elements, etc. -normalize :: (Eq a, Data a) => a -> a -normalize = topDown removeEmptyBlocks . - topDown consolidateInlines . - bottomUp (removeEmptyInlines . removeTrailingInlineSpaces) - -removeEmptyBlocks :: [Block] -> [Block] -removeEmptyBlocks (Null : xs) = removeEmptyBlocks xs -removeEmptyBlocks (BulletList [] : xs) = removeEmptyBlocks xs -removeEmptyBlocks (OrderedList _ [] : xs) = removeEmptyBlocks xs -removeEmptyBlocks (DefinitionList [] : xs) = removeEmptyBlocks xs -removeEmptyBlocks (RawBlock _ [] : xs) = removeEmptyBlocks xs -removeEmptyBlocks (x:xs) = x : removeEmptyBlocks xs -removeEmptyBlocks [] = [] - -removeEmptyInlines :: [Inline] -> [Inline] -removeEmptyInlines (Emph [] : zs) = removeEmptyInlines zs -removeEmptyInlines (Strong [] : zs) = removeEmptyInlines zs -removeEmptyInlines (Subscript [] : zs) = removeEmptyInlines zs -removeEmptyInlines (Superscript [] : zs) = removeEmptyInlines zs -removeEmptyInlines (SmallCaps [] : zs) = removeEmptyInlines zs -removeEmptyInlines (Strikeout [] : zs) = removeEmptyInlines zs -removeEmptyInlines (RawInline _ [] : zs) = removeEmptyInlines zs -removeEmptyInlines (Code _ [] : zs) = removeEmptyInlines zs -removeEmptyInlines (Str "" : zs) = removeEmptyInlines zs -removeEmptyInlines (x : xs) = x : removeEmptyInlines xs -removeEmptyInlines [] = [] - -removeTrailingInlineSpaces :: [Inline] -> [Inline] -removeTrailingInlineSpaces = reverse . removeLeadingInlineSpaces . reverse - -removeLeadingInlineSpaces :: [Inline] -> [Inline] -removeLeadingInlineSpaces = dropWhile isSpaceOrEmpty - -consolidateInlines :: [Inline] -> [Inline] -consolidateInlines (Str x : ys) = +normalize :: Pandoc -> Pandoc +normalize (Pandoc (Meta meta) blocks) = + Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks) + where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs + go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs + go (MetaList ms) = MetaList $ map go ms + go (MetaMap m) = MetaMap $ M.map go m + go x = x + +normalizeBlocks :: [Block] -> [Block] +normalizeBlocks (Null : xs) = normalizeBlocks xs +normalizeBlocks (Div attr bs : xs) = + Div attr (normalizeBlocks bs) : normalizeBlocks xs +normalizeBlocks (BlockQuote bs : xs) = + case normalizeBlocks bs of + [] -> normalizeBlocks xs + bs' -> BlockQuote bs' : normalizeBlocks xs +normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs +normalizeBlocks (BulletList items : xs) = + BulletList (map normalizeBlocks items) : normalizeBlocks xs +normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs +normalizeBlocks (OrderedList attr items : xs) = + OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs +normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs +normalizeBlocks (DefinitionList items : xs) = + DefinitionList (map go items) : normalizeBlocks xs + where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs) +normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs +normalizeBlocks (Para ils : xs) = + case normalizeInlines ils of + [] -> normalizeBlocks xs + ils' -> Para ils' : normalizeBlocks xs +normalizeBlocks (Plain ils : xs) = + case normalizeInlines ils of + [] -> normalizeBlocks xs + ils' -> Plain ils' : normalizeBlocks xs +normalizeBlocks (Header lev attr ils : xs) = + Header lev attr (normalizeInlines ils) : normalizeBlocks xs +normalizeBlocks (Table capt aligns widths hdrs rows : xs) = + Table (normalizeInlines capt) aligns widths + (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows) + : normalizeBlocks xs +normalizeBlocks (x:xs) = x : normalizeBlocks xs +normalizeBlocks [] = [] + +normalizeInlines :: [Inline] -> [Inline] +normalizeInlines (Str x : ys) = case concat (x : map fromStr strs) of - "" -> consolidateInlines rest - n -> Str n : consolidateInlines rest + "" -> rest + n -> Str n : rest where - (strs, rest) = span isStr ys + (strs, rest) = span isStr $ normalizeInlines ys isStr (Str _) = True isStr _ = False fromStr (Str z) = z - fromStr _ = error "consolidateInlines - fromStr - not a Str" -consolidateInlines (Space : ys) = Space : rest + fromStr _ = error "normalizeInlines - fromStr - not a Str" +normalizeInlines (Space : ys) = + if null rest + then [] + else Space : rest where isSp Space = True isSp _ = False - rest = consolidateInlines $ dropWhile isSp ys -consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $ - Emph (xs ++ ys) : zs -consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $ - Strong (xs ++ ys) : zs -consolidateInlines (Subscript xs : Subscript ys : zs) = consolidateInlines $ - Subscript (xs ++ ys) : zs -consolidateInlines (Superscript xs : Superscript ys : zs) = consolidateInlines $ - Superscript (xs ++ ys) : zs -consolidateInlines (SmallCaps xs : SmallCaps ys : zs) = consolidateInlines $ - SmallCaps (xs ++ ys) : zs -consolidateInlines (Strikeout xs : Strikeout ys : zs) = consolidateInlines $ - Strikeout (xs ++ ys) : zs -consolidateInlines (RawInline f x : RawInline f' y : zs) | f == f' = - consolidateInlines $ RawInline f (x ++ y) : zs -consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 = - consolidateInlines $ Code a1 (x ++ y) : zs -consolidateInlines (x : xs) = x : consolidateInlines xs -consolidateInlines [] = [] + rest = dropWhile isSp $ normalizeInlines ys +normalizeInlines (Emph xs : zs) = + case normalizeInlines zs of + (Emph ys : rest) -> normalizeInlines $ + Emph (normalizeInlines $ xs ++ ys) : rest + rest -> case normalizeInlines xs of + [] -> rest + xs' -> Emph xs' : rest +normalizeInlines (Strong xs : zs) = + case normalizeInlines zs of + (Strong ys : rest) -> normalizeInlines $ + Strong (normalizeInlines $ xs ++ ys) : rest + rest -> case normalizeInlines xs of + [] -> rest + xs' -> Strong xs' : rest +normalizeInlines (Subscript xs : zs) = + case normalizeInlines zs of + (Subscript ys : rest) -> normalizeInlines $ + Subscript (normalizeInlines $ xs ++ ys) : rest + rest -> case normalizeInlines xs of + [] -> rest + xs' -> Subscript xs' : rest +normalizeInlines (Superscript xs : zs) = + case normalizeInlines zs of + (Superscript ys : rest) -> normalizeInlines $ + Superscript (normalizeInlines $ xs ++ ys) : rest + rest -> case normalizeInlines xs of + [] -> rest + xs' -> Superscript xs' : rest +normalizeInlines (SmallCaps xs : zs) = + case normalizeInlines zs of + (SmallCaps ys : rest) -> normalizeInlines $ + SmallCaps (normalizeInlines $ xs ++ ys) : rest + rest -> case normalizeInlines xs of + [] -> rest + xs' -> SmallCaps xs' : rest +normalizeInlines (Strikeout xs : zs) = + case normalizeInlines zs of + (Strikeout ys : rest) -> normalizeInlines $ + Strikeout (normalizeInlines $ xs ++ ys) : rest + rest -> case normalizeInlines xs of + [] -> rest + xs' -> Strikeout xs' : rest +normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys +normalizeInlines (RawInline f xs : zs) = + case normalizeInlines zs of + (RawInline f' ys : rest) | f == f' -> normalizeInlines $ + RawInline f (xs ++ ys) : rest + rest -> RawInline f xs : rest +normalizeInlines (Code _ "" : ys) = normalizeInlines ys +normalizeInlines (Code attr xs : zs) = + case normalizeInlines zs of + (Code attr' ys : rest) | attr == attr' -> normalizeInlines $ + Code attr (xs ++ ys) : rest + rest -> Code attr xs : rest +-- allow empty spans, they may carry identifiers etc. +-- normalizeInlines (Span _ [] : ys) = normalizeInlines ys +normalizeInlines (Span attr xs : zs) = + case normalizeInlines zs of + (Span attr' ys : rest) | attr == attr' -> normalizeInlines $ + Span attr (normalizeInlines $ xs ++ ys) : rest + rest -> Span attr (normalizeInlines xs) : rest +normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) : + normalizeInlines ys +normalizeInlines (Quoted qt ils : ys) = + Quoted qt (normalizeInlines ils) : normalizeInlines ys +normalizeInlines (Link ils t : ys) = + Link (normalizeInlines ils) t : normalizeInlines ys +normalizeInlines (Image ils t : ys) = + Image (normalizeInlines ils) t : normalizeInlines ys +normalizeInlines (Cite cs ils : ys) = + Cite cs (normalizeInlines ils) : normalizeInlines ys +normalizeInlines (x : xs) = x : normalizeInlines xs +normalizeInlines [] = [] -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 31e64f14e..4b787b023 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -514,8 +514,11 @@ blockToOpenXML :: WriterOptions -> Block -> WS [Element] blockToOpenXML _ Null = return [] blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do - contents <- withParaProp (pStyle $ "Heading" ++ show lev) $ - blockToOpenXML opts (Para lst) + + paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $ + getParaProps False + contents <- inlinesToOpenXML opts lst + usedIdents <- gets stSectionIds let bookmarkName = if null ident then uniqueIdent lst usedIdents @@ -525,7 +528,7 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () - return $ [bookmarkStart] ++ contents ++ [bookmarkEnd] + return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure |