From c0fcc8a7891892357854cf498ce262b256fac1ca Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 29 Jun 2014 18:44:22 -0400 Subject: Docx reader: Add ParIndentation type to parser. This lets us keep more information about the indentation, and act accordingly in the reader. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 07f34450d..537c5c272 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -42,6 +42,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Relationship , Media , RunStyle(..) + , ParIndentation(..) , ParagraphStyle(..) , Row(..) , Cell(..) @@ -341,16 +342,37 @@ testBitMask bitMaskS n = [] -> False ((n', _) : _) -> ((n' .|. n) /= 0) +data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer + , rightParIndent :: Maybe Integer + , hangingParIndent :: Maybe Integer} + deriving Show + data ParagraphStyle = ParagraphStyle { pStyle :: [String] - , indent :: Maybe Integer + , indentation :: Maybe ParIndentation } deriving Show defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] - , indent = Nothing + , indentation = Nothing } +elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation +elemToParIndentation ns element + | qName (elName element) == "ind" && + qURI (elName element) == (lookup "w" ns) = + Just $ ParIndentation { + leftParIndent = + findAttr (QName "left" (lookup "w" ns) (Just "w")) element >>= + stringToInteger + , rightParIndent = + findAttr (QName "right" (lookup "w" ns) (Just "w")) element >>= + stringToInteger + , hangingParIndent = + findAttr (QName "hanging" (lookup "w" ns) (Just "w")) element >>= + stringToInteger} +elemToParIndentation _ _ = Nothing + elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle elemToParagraphStyle ns element = case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of @@ -360,10 +382,9 @@ elemToParagraphStyle ns element = mapMaybe (findAttr (QName "val" (lookup "w" ns) (Just "w"))) (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) - , indent = + , indentation = findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= - findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= - stringToInteger + elemToParIndentation ns } Nothing -> defaultParagraphStyle -- cgit v1.2.3 From 0f59196e0ef2f0977d404699ae16a48009fa7632 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 29 Jun 2014 22:50:29 -0400 Subject: Docx reader: Make use of new ParIndentation info. Here, when hanging indents are greater than or equal to left indents, we don't set it to block quote. Such indents are frequently used in academic bibliographies. (Thanks to Caleb McDaniel.) --- src/Text/Pandoc/Readers/Docx.hs | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 71baa5dde..8357b4cca 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -94,6 +94,7 @@ import System.FilePath (combine) import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State +import Control.Applicative (liftA2) readDocx :: ReaderOptions -> B.ByteString @@ -148,7 +149,6 @@ runStyleToContainers rPr = in classContainers ++ formatters - divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c = [Container $ \_ -> @@ -166,22 +166,37 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs -divAttrToContainers [] kvs | Just numString <- lookup "indent" kvs = - let kvs' = filter (\(k,_) -> k /= "indent") kvs +divAttrToContainers [] kvs | Just _ <- lookup "indent" kvs + , Just flInd <- lookup "first-line-indent" kvs = + let + kvs' = filter (\(k,_) -> notElem k ["indent", "first-line-indent"]) kvs in - case numString of - "0" -> divAttrToContainers [] kvs' - ('-' : _) -> divAttrToContainers [] kvs' - _ -> (Container BlockQuote) : divAttrToContainers [] kvs' + case flInd of + "0" -> divAttrToContainers [] kvs' + ('-':_) -> divAttrToContainers [] kvs' + _ -> (Container BlockQuote) : divAttrToContainers [] kvs' +divAttrToContainers [] kvs | Just ind <- lookup "indent" kvs = + let + kvs' = filter (\(k,_) -> notElem k ["indent"]) kvs + in + case ind of + "0" -> divAttrToContainers [] kvs' + ('-':_) -> divAttrToContainers [] kvs' + _ -> (Container BlockQuote) : divAttrToContainers [] kvs' + divAttrToContainers _ _ = [] parStyleToContainers :: ParagraphStyle -> [Container Block] parStyleToContainers pPr = let classes = pStyle pPr - kvs = case indent pPr of - Just n -> [("indent", show n)] - Nothing -> [] + indent = indentation pPr >>= leftParIndent + hanging = indentation pPr >>= hangingParIndent + firstLineIndent = liftA2 (-) indent hanging + kvs = mapMaybe id + [ indent >>= (\n -> Just ("indent", show n)), + firstLineIndent >>= (\n -> Just ("first-line-indent", show n)) + ] in divAttrToContainers classes kvs -- cgit v1.2.3