From b1eba3c65ca2981c78b66d1ee9218f3dfeda89c7 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 29 Jun 2014 08:14:36 -0400 Subject: Docx Reader: Update state properly Previously, a fresh state was created for the purpose of updating. In the future, when there is more than one field in the state, this obviously won't work. --- src/Text/Pandoc/Readers/Docx.hs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc/Readers/Docx.hs') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 71baa5dde..2e10ae3a4 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -110,6 +110,11 @@ data DEnv = DEnv { docxOptions :: ReaderOptions type DocxContext = ReaderT DEnv (State DState) +updateDState :: (DState -> DState) -> DocxContext () +updateDState f = do + st <- get + put $ f st + evalDocxContext :: DocxContext a -> DEnv -> DState -> a evalDocxContext ctx env st = evalState (runReaderT ctx env) st @@ -289,7 +294,7 @@ parPartToInlines (BookMark _ anchor) = let newAnchor = case anchor `elem` (M.elems anchorMap) of True -> uniqueIdent [Str anchor] (M.elems anchorMap) False -> anchor - put DState{ docxAnchorMap = M.insert anchor newAnchor anchorMap} + updateDState $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap} return [Span (anchor, ["anchor"], []) []] parPartToInlines (Drawing relid) = do (Docx _ _ _ rels _) <- asks docxDocument @@ -329,7 +334,7 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) do hdrIDMap <- gets docxAnchorMap let newIdent = uniqueIdent ils (M.elems hdrIDMap) - put DState{docxAnchorMap = M.insert ident newIdent hdrIDMap} + updateDState $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs)) -- Otherwise we just give it a name, and register that name (associate -- it with itself.) @@ -337,7 +342,7 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap let newIdent = uniqueIdent ils (M.elems hdrIDMap) - put DState{docxAnchorMap = M.insert newIdent newIdent hdrIDMap} + updateDState $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) ils makeHeaderAnchor blk = return blk -- cgit v1.2.3 From 0f59196e0ef2f0977d404699ae16a48009fa7632 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 29 Jun 2014 22:50:29 -0400 Subject: Docx reader: Make use of new ParIndentation info. Here, when hanging indents are greater than or equal to left indents, we don't set it to block quote. Such indents are frequently used in academic bibliographies. (Thanks to Caleb McDaniel.) --- src/Text/Pandoc/Readers/Docx.hs | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'src/Text/Pandoc/Readers/Docx.hs') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 71baa5dde..8357b4cca 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -94,6 +94,7 @@ import System.FilePath (combine) import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State +import Control.Applicative (liftA2) readDocx :: ReaderOptions -> B.ByteString @@ -148,7 +149,6 @@ runStyleToContainers rPr = in classContainers ++ formatters - divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c = [Container $ \_ -> @@ -166,22 +166,37 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs -divAttrToContainers [] kvs | Just numString <- lookup "indent" kvs = - let kvs' = filter (\(k,_) -> k /= "indent") kvs +divAttrToContainers [] kvs | Just _ <- lookup "indent" kvs + , Just flInd <- lookup "first-line-indent" kvs = + let + kvs' = filter (\(k,_) -> notElem k ["indent", "first-line-indent"]) kvs in - case numString of - "0" -> divAttrToContainers [] kvs' - ('-' : _) -> divAttrToContainers [] kvs' - _ -> (Container BlockQuote) : divAttrToContainers [] kvs' + case flInd of + "0" -> divAttrToContainers [] kvs' + ('-':_) -> divAttrToContainers [] kvs' + _ -> (Container BlockQuote) : divAttrToContainers [] kvs' +divAttrToContainers [] kvs | Just ind <- lookup "indent" kvs = + let + kvs' = filter (\(k,_) -> notElem k ["indent"]) kvs + in + case ind of + "0" -> divAttrToContainers [] kvs' + ('-':_) -> divAttrToContainers [] kvs' + _ -> (Container BlockQuote) : divAttrToContainers [] kvs' + divAttrToContainers _ _ = [] parStyleToContainers :: ParagraphStyle -> [Container Block] parStyleToContainers pPr = let classes = pStyle pPr - kvs = case indent pPr of - Just n -> [("indent", show n)] - Nothing -> [] + indent = indentation pPr >>= leftParIndent + hanging = indentation pPr >>= hangingParIndent + firstLineIndent = liftA2 (-) indent hanging + kvs = mapMaybe id + [ indent >>= (\n -> Just ("indent", show n)), + firstLineIndent >>= (\n -> Just ("first-line-indent", show n)) + ] in divAttrToContainers classes kvs -- cgit v1.2.3 From 0abfd386a4697bab78ca098fe439623aad5f4069 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 30 Jun 2014 11:19:06 -0400 Subject: Docx reader: clean up parStyle processing. This gets rid of `divAttrToContainers`: an internal convenience function which had become pretty inconvenient. Rather than converting classes and indentations to string lists and back, we deal with the `pPr` attribute directly. --- src/Text/Pandoc/Readers/Docx.hs | 81 ++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 45 deletions(-) (limited to 'src/Text/Pandoc/Readers/Docx.hs') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 6e9cf44b5..61c17156e 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -94,7 +94,6 @@ import System.FilePath (combine) import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State -import Control.Applicative (liftA2) readDocx :: ReaderOptions -> B.ByteString @@ -154,56 +153,48 @@ runStyleToContainers rPr = in classContainers ++ formatters -divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] -divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c = - [Container $ \_ -> - Header n ("", delete ("Heading" ++ show n) cs, []) []] -divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = - (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) -divAttrToContainers (c:cs) kvs | c `elem` codeDivs = +parStyleToContainers :: ParagraphStyle -> [Container Block] +parStyleToContainers pPr | (c:cs) <- pStyle pPr, Just n <- isHeaderClass c = + [Container $ \_ -> Header n ("", delete ("Heading" ++ show n) cs, []) []] +parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` divsToKeep = + let pPr' = pPr { pStyle = cs } + in + (Container $ Div ("", [c], [])) : (parStyleToContainers pPr') +parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` codeDivs = -- This is a bit of a cludge. We make the codeblock from the raw -- parparts in bodyPartToBlocks. But we need something to match against. - (Container $ \_ -> CodeBlock ("", [], []) "") : (divAttrToContainers cs kvs) -divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = - let kvs' = filter (\(k,_) -> k /= "indent") kvs - in - (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs') -divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = - (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) -divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs -divAttrToContainers [] kvs | Just _ <- lookup "indent" kvs - , Just flInd <- lookup "first-line-indent" kvs = - let - kvs' = filter (\(k,_) -> notElem k ["indent", "first-line-indent"]) kvs + let pPr' = pPr { pStyle = cs } in - case flInd of - "0" -> divAttrToContainers [] kvs' - ('-':_) -> divAttrToContainers [] kvs' - _ -> (Container BlockQuote) : divAttrToContainers [] kvs' -divAttrToContainers [] kvs | Just ind <- lookup "indent" kvs = - let - kvs' = filter (\(k,_) -> notElem k ["indent"]) kvs + (Container $ \_ -> CodeBlock ("", [], []) "") : (parStyleToContainers pPr') +parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` listParagraphDivs = + let pPr' = pPr { pStyle = cs, indentation = Nothing} in - case ind of - "0" -> divAttrToContainers [] kvs' - ('-':_) -> divAttrToContainers [] kvs' - _ -> (Container BlockQuote) : divAttrToContainers [] kvs' - -divAttrToContainers _ _ = [] + (Container $ Div ("", [c], [])) : (parStyleToContainers pPr') - -parStyleToContainers :: ParagraphStyle -> [Container Block] -parStyleToContainers pPr = - let classes = pStyle pPr - indent = indentation pPr >>= leftParIndent - hanging = indentation pPr >>= hangingParIndent - firstLineIndent = liftA2 (-) indent hanging - kvs = mapMaybe id - [ indent >>= (\n -> Just ("indent", show n)), - firstLineIndent >>= (\n -> Just ("first-line-indent", show n)) - ] +parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` blockQuoteDivs = + let pPr' = pPr { pStyle = cs \\ blockQuoteDivs } + in + (Container BlockQuote) : (parStyleToContainers pPr') +parStyleToContainers pPr | (_:cs) <- pStyle pPr = + let pPr' = pPr { pStyle = cs} + in + parStyleToContainers pPr' +parStyleToContainers pPr | null (pStyle pPr), + Just left <- indentation pPr >>= leftParIndent, + Just hang <- indentation pPr >>= hangingParIndent = + let pPr' = pPr { indentation = Nothing } + in + case (left - hang) > 0 of + True -> (Container BlockQuote) : (parStyleToContainers pPr') + False -> parStyleToContainers pPr' +parStyleToContainers pPr | null (pStyle pPr), + Just left <- indentation pPr >>= leftParIndent = + let pPr' = pPr { indentation = Nothing } in - divAttrToContainers classes kvs + case left > 0 of + True -> (Container BlockQuote) : (parStyleToContainers pPr') + False -> parStyleToContainers pPr' +parStyleToContainers _ = [] strToInlines :: String -> [Inline] -- cgit v1.2.3