From 41f4476aabe4eacec06e47c5d6dba2503b1eb1f0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 23 Jan 2017 15:18:34 +0100 Subject: OpenDocument writer: don't profilerate text styles unnecessarily. This change makes the writer create only as many temporary text styles as are absolutely necessary. It also consolidates adjacent nodes with the same style. Closes #3371. --- src/Text/Pandoc/Writers/OpenDocument.hs | 87 ++++++++++++------ tests/writer.opendocument | 152 +++++++++----------------------- 2 files changed, 102 insertions(+), 137 deletions(-) diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index dec394797..5107cb499 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -41,8 +41,11 @@ import Text.Printf ( printf ) import Control.Arrow ( (***), (>>>) ) import Control.Monad.State hiding ( when ) import Data.Char (chr) +import qualified Data.Set as Set import qualified Data.Map as Map import Text.Pandoc.Writers.Shared +import Data.List (sortBy) +import Data.Ord (comparing) -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -58,8 +61,8 @@ data WriterState = , stTableStyles :: [Doc] , stParaStyles :: [Doc] , stListStyles :: [(Int, [Doc])] - , stTextStyles :: [Doc] - , stTextStyleAttr :: Map.Map TextStyle [(String,String)] + , stTextStyles :: Map.Map (Set.Set TextStyle) (String, Doc) + , stTextStyleAttr :: Set.Set TextStyle , stIndentPara :: Int , stInDefinition :: Bool , stTight :: Bool @@ -73,8 +76,8 @@ defaultWriterState = , stTableStyles = [] , stParaStyles = [] , stListStyles = [] - , stTextStyles = [] - , stTextStyleAttr = Map.empty + , stTextStyles = Map.empty + , stTextStyleAttr = Set.empty , stIndentPara = 0 , stInDefinition = False , stTight = False @@ -94,14 +97,13 @@ addNote i = modify $ \s -> s { stNotes = i : stNotes s } addParaStyle :: Doc -> State WriterState () addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s } -addTextStyle :: Doc -> State WriterState () -addTextStyle i = modify $ \s -> s { stTextStyles = i : stTextStyles s } +addTextStyle :: Set.Set TextStyle -> (String, Doc) -> State WriterState () +addTextStyle attrs i = modify $ \s -> + s { stTextStyles = Map.insert attrs i (stTextStyles s) } -addTextStyleAttr :: (TextStyle, [(String,String)]) -> State WriterState () -addTextStyleAttr (ts, xs) = modify $ \s -> s { stTextStyleAttr = Map.insert ts xs (stTextStyleAttr s) } - -rmTextStyleAttr :: TextStyle -> State WriterState () -rmTextStyleAttr ts = modify $ \s -> s { stTextStyleAttr = Map.delete ts (stTextStyleAttr s) } +addTextStyleAttr :: TextStyle -> State WriterState () +addTextStyleAttr t = modify $ \s -> + s { stTextStyleAttr = Set.insert t (stTextStyleAttr s) } increaseIndent :: State WriterState () increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s } @@ -136,20 +138,33 @@ inSpanTags :: String -> Doc -> Doc inSpanTags s = inTags False "text:span" [("text:style-name",s)] withTextStyle :: TextStyle -> State WriterState a -> State WriterState a -withTextStyle s f = addTextStyleAttr (s,textStyleAttr s) >> - f >>= \r -> rmTextStyleAttr s >> return r +withTextStyle s f = do + oldTextStyleAttr <- gets stTextStyleAttr + addTextStyleAttr s + res <- f + modify $ \st -> st{ stTextStyleAttr = oldTextStyleAttr } + return res inTextStyle :: Doc -> State WriterState Doc inTextStyle d = do at <- gets stTextStyleAttr - if Map.null at + if Set.null at then return d else do - tn <- (+) 1 . length <$> gets stTextStyles - addTextStyle $ inTags False "style:style" [("style:name" , "T" ++ show tn) - ,("style:family", "text" )] - $ selfClosingTag "style:text-properties" (concatMap snd $ Map.toList at) - return $ inTags False "text:span" [("text:style-name","T" ++ show tn)] d + styles <- gets stTextStyles + case Map.lookup at styles of + Just (styleName, _) -> return $ + inTags False "text:span" [("text:style-name",styleName)] d + Nothing -> do + let styleName = "T" ++ show (Map.size styles + 1) + addTextStyle at (styleName, + inTags False "style:style" + [("style:name", styleName) + ,("style:family", "text")] + $ selfClosingTag "style:text-properties" + (concatMap textStyleAttr (Set.toList at))) + return $ inTags False + "text:span" [("text:style-name",styleName)] d inHeaderTags :: Int -> Doc -> State WriterState Doc inHeaderTags i d = @@ -188,7 +203,9 @@ writeOpenDocument opts (Pandoc meta blocks) = meta b <- render' `fmap` blocksToOpenDocument opts blocks return (b, m) - styles = stTableStyles s ++ stParaStyles s ++ stTextStyles s + styles = stTableStyles s ++ stParaStyles s ++ + map snd (reverse $ sortBy (comparing fst) $ + Map.elems (stTextStyles s)) listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l) listStyles = map listStyle (stListStyles s) @@ -371,20 +388,40 @@ tableItemToOpenDocument o tn (n,i) = -- | Convert a list of inline elements to OpenDocument. inlinesToOpenDocument :: WriterOptions -> [Inline] -> State WriterState Doc -inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l +inlinesToOpenDocument o l = hcat <$> toChunks o l + +toChunks :: WriterOptions -> [Inline] -> State WriterState [Doc] +toChunks _ [] = return [] +toChunks o (x : xs) + | isChunkable x = do + contents <- (inTextStyle . hcat) =<< + mapM (inlineToOpenDocument o) (x:ys) + rest <- toChunks o zs + return (contents : rest) + | otherwise = do + contents <- inlineToOpenDocument o x + rest <- toChunks o xs + return (contents : rest) + where (ys, zs) = span isChunkable xs + +isChunkable :: Inline -> Bool +isChunkable (Str _) = True +isChunkable Space = True +isChunkable SoftBreak = True +isChunkable _ = False -- | Convert an inline element to OpenDocument. inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc inlineToOpenDocument o ils = case ils of - Space -> inTextStyle space + Space -> return space SoftBreak | writerWrapText o == WrapPreserve - -> inTextStyle (preformatted "\n") - | otherwise -> inTextStyle space + -> return $ preformatted "\n" + | otherwise -> return $ space Span _ xs -> inlinesToOpenDocument o xs LineBreak -> return $ selfClosingTag "text:line-break" [] - Str s -> inTextStyle $ handleSpaces $ escapeStringForXML s + Str s -> return $ handleSpaces $ escapeStringForXML s Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l Strong l -> withTextStyle Bold $ inlinesToOpenDocument o l Strikeout l -> withTextStyle Strike $ inlinesToOpenDocument o l diff --git a/tests/writer.opendocument b/tests/writer.opendocument index 6c84ae31b..d613ab5b8 100644 --- a/tests/writer.opendocument +++ b/tests/writer.opendocument @@ -632,73 +632,13 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + @@ -871,7 +811,7 @@ link Level 5 Level 1 Level 2 with -emphasis +emphasis Level 3 with no blank line Level 2 @@ -1244,11 +1184,11 @@ fruit yellow fruit Multiple blocks with italics: -apple +apple red fruitcontains seeds, crisp, pleasant to taste -orange +orange orange fruit{ orange code block }orange block quote Multiple definitions, @@ -1295,9 +1235,9 @@ indentation: Interpreted markdown in a table: This is -emphasized +emphasized And this is -strong +strong Here’s a simple block: foo This should be a code block, @@ -1323,47 +1263,38 @@ spaces on the line: Inline Markup This is -emphasized, and so -is -this. +emphasized, and so +is this. This is -strong, and so -is -this. +strong, and so +is this. An -emphasized -link. -This -is -strong -and -em. +emphasized +link. +This is +strong and em. So is -this word. -This -is -strong -and -em. +this word. +This is +strong and em. So is -this word. +this word. This is code: >, $, \, \$, <html>. -This -is -strikeout. +This is +strikeout. Superscripts: -abcd -ahello -ahello there. +abcd +ahello +ahello there. Subscripts: -H2O, -H23O, -Hmany of themO. +H2O, +H23O, +Hmany of themO. These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d. @@ -1395,16 +1326,16 @@ five. 2 + 2 = 4 - x ∈ y + x ∈ y - α ∧ ω + α ∧ ω 223 - p-Tree + p-Tree Here’s some display math: @@ -1412,7 +1343,7 @@ five. Here’s one that has a line break in it: - α + ω × x2. + α + ω × x2. These shouldn’t be math: @@ -1423,7 +1354,7 @@ five. $22,000 is a - lot of money. So is $34,000. + lot of money. So is $34,000. (It worked if “lot” is emphasized.) @@ -1432,10 +1363,7 @@ five. Escaped $: $73 - this - should - be - emphasized + this should be emphasized 23$. @@ -1585,10 +1513,10 @@ indented to show that they belong to the footnote (as with list items).{ <code> }If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. This -should not be a footnote +should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.3This -is easier to type. Inline notes +is easier to type. Inline notes may contain links and ] verbatim -- cgit v1.2.3