From c0fcc8a7891892357854cf498ce262b256fac1ca Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 29 Jun 2014 18:44:22 -0400 Subject: Docx reader: Add ParIndentation type to parser. This lets us keep more information about the indentation, and act accordingly in the reader. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 07f34450d..537c5c272 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -42,6 +42,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Relationship , Media , RunStyle(..) + , ParIndentation(..) , ParagraphStyle(..) , Row(..) , Cell(..) @@ -341,16 +342,37 @@ testBitMask bitMaskS n = [] -> False ((n', _) : _) -> ((n' .|. n) /= 0) +data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer + , rightParIndent :: Maybe Integer + , hangingParIndent :: Maybe Integer} + deriving Show + data ParagraphStyle = ParagraphStyle { pStyle :: [String] - , indent :: Maybe Integer + , indentation :: Maybe ParIndentation } deriving Show defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] - , indent = Nothing + , indentation = Nothing } +elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation +elemToParIndentation ns element + | qName (elName element) == "ind" && + qURI (elName element) == (lookup "w" ns) = + Just $ ParIndentation { + leftParIndent = + findAttr (QName "left" (lookup "w" ns) (Just "w")) element >>= + stringToInteger + , rightParIndent = + findAttr (QName "right" (lookup "w" ns) (Just "w")) element >>= + stringToInteger + , hangingParIndent = + findAttr (QName "hanging" (lookup "w" ns) (Just "w")) element >>= + stringToInteger} +elemToParIndentation _ _ = Nothing + elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle elemToParagraphStyle ns element = case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of @@ -360,10 +382,9 @@ elemToParagraphStyle ns element = mapMaybe (findAttr (QName "val" (lookup "w" ns) (Just "w"))) (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) - , indent = + , indentation = findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= - findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= - stringToInteger + elemToParIndentation ns } Nothing -> defaultParagraphStyle -- cgit v1.2.3 From 0f59196e0ef2f0977d404699ae16a48009fa7632 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 29 Jun 2014 22:50:29 -0400 Subject: Docx reader: Make use of new ParIndentation info. Here, when hanging indents are greater than or equal to left indents, we don't set it to block quote. Such indents are frequently used in academic bibliographies. (Thanks to Caleb McDaniel.) --- src/Text/Pandoc/Readers/Docx.hs | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 71baa5dde..8357b4cca 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -94,6 +94,7 @@ import System.FilePath (combine) import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State +import Control.Applicative (liftA2) readDocx :: ReaderOptions -> B.ByteString @@ -148,7 +149,6 @@ runStyleToContainers rPr = in classContainers ++ formatters - divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c = [Container $ \_ -> @@ -166,22 +166,37 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs -divAttrToContainers [] kvs | Just numString <- lookup "indent" kvs = - let kvs' = filter (\(k,_) -> k /= "indent") kvs +divAttrToContainers [] kvs | Just _ <- lookup "indent" kvs + , Just flInd <- lookup "first-line-indent" kvs = + let + kvs' = filter (\(k,_) -> notElem k ["indent", "first-line-indent"]) kvs in - case numString of - "0" -> divAttrToContainers [] kvs' - ('-' : _) -> divAttrToContainers [] kvs' - _ -> (Container BlockQuote) : divAttrToContainers [] kvs' + case flInd of + "0" -> divAttrToContainers [] kvs' + ('-':_) -> divAttrToContainers [] kvs' + _ -> (Container BlockQuote) : divAttrToContainers [] kvs' +divAttrToContainers [] kvs | Just ind <- lookup "indent" kvs = + let + kvs' = filter (\(k,_) -> notElem k ["indent"]) kvs + in + case ind of + "0" -> divAttrToContainers [] kvs' + ('-':_) -> divAttrToContainers [] kvs' + _ -> (Container BlockQuote) : divAttrToContainers [] kvs' + divAttrToContainers _ _ = [] parStyleToContainers :: ParagraphStyle -> [Container Block] parStyleToContainers pPr = let classes = pStyle pPr - kvs = case indent pPr of - Just n -> [("indent", show n)] - Nothing -> [] + indent = indentation pPr >>= leftParIndent + hanging = indentation pPr >>= hangingParIndent + firstLineIndent = liftA2 (-) indent hanging + kvs = mapMaybe id + [ indent >>= (\n -> Just ("indent", show n)), + firstLineIndent >>= (\n -> Just ("first-line-indent", show n)) + ] in divAttrToContainers classes kvs -- cgit v1.2.3 From 1405e7b7091dedb50578c9e0cafd62f2c77ca689 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 29 Jun 2014 23:37:00 -0400 Subject: Docx reader: Add tests for hanging indent handline. We want to treat it as a plain paragraph if the hanging amount is greater to or equal to the left indent---i.e., if the first line has zero indentation. But we still want it to be a block quote if it starts to the right of the margin. Someone might format verse with wrapping lines with a hanging indent, for example. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.hanging_indent.docx | Bin 0 -> 29924 bytes tests/docx.hanging_indent.native | 3 +++ 3 files changed, 7 insertions(+) create mode 100644 tests/docx.hanging_indent.docx create mode 100644 tests/docx.hanging_indent.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 8c51217cf..a379bbf23 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -112,6 +112,10 @@ tests = [ testGroup "inlines" "blockquotes (parsing indent as blockquote)" "docx.block_quotes.docx" "docx.block_quotes_parse_indent.native" + , testCompare + "hanging indents" + "docx.hanging_indent.docx" + "docx.hanging_indent.native" , testCompare "tables" "docx.tables.docx" diff --git a/tests/docx.hanging_indent.docx b/tests/docx.hanging_indent.docx new file mode 100644 index 000000000..6f62dc731 Binary files /dev/null and b/tests/docx.hanging_indent.docx differ diff --git a/tests/docx.hanging_indent.native b/tests/docx.hanging_indent.native new file mode 100644 index 000000000..138a6967f --- /dev/null +++ b/tests/docx.hanging_indent.native @@ -0,0 +1,3 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "hanging",Space,Str "indent,",Space,Str "with",Space,Str "the",Space,Str "left",Space,Str "side",Space,Str "set",Space,Str "to",Space,Str "the",Space,Str "left",Space,Str "margin,",Space,Str "and",Space,Str "it",Space,Str "wraps",Space,Str "around",Space,Str "the",Space,Str "line."] +,BlockQuote + [Para [Str "Five",Space,Str "years",Space,Str "have",Space,Str "passed,",Space,Str "five",Space,Str "summers",Space,Str "with",Space,Str "the",Space,Str "length"]]] -- cgit v1.2.3