From 86fc44d6b3f82a2b274d4b592d1dd6152bd1eaf5 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 17:53:52 -0400 Subject: Add literal tabs to parser. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 22e9dd909..18200bcf9 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -466,7 +466,7 @@ data Run = Run RunStyle [RunElem] | Endnote String deriving Show -data RunElem = TextRun String | LnBrk +data RunElem = TextRun String | LnBrk | Tab deriving Show data RunStyle = RunStyle { isBold :: Bool @@ -545,6 +545,9 @@ elemToRunElem ns element | qName (elName element) == "br" && qURI (elName element) == (lookup "w" ns) = Just $ LnBrk + | qName (elName element) == "tab" && + qURI (elName element) == (lookup "w" ns) = + Just $ Tab | otherwise = Nothing -- cgit v1.2.3 From 0e7d2dbd4304902cb6c6d4e9618592b5148dc598 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 17:55:02 -0400 Subject: Have Docx reader properly interpret tabs. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4035cde99..c43879ed9 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -134,10 +134,12 @@ codeDivs = ["SourceCode"] runElemToInlines :: RunElem -> [Inline] runElemToInlines (TextRun s) = strToInlines s runElemToInlines (LnBrk) = [LineBreak] +runElemToInlines (Tab) = [Space] runElemToString :: RunElem -> String runElemToString (TextRun s) = s runElemToString (LnBrk) = ['\n'] +runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString -- cgit v1.2.3 From a934db9a320ec76e15e62954b75d0e8d2d972244 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:28:55 -0400 Subject: Introduce blockNormalize This will help take care of spaces introduced at the beginning of strings. --- src/Text/Pandoc/Readers/Docx.hs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index c43879ed9..9c1d0c5e6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -150,6 +150,19 @@ strNormalize (Str "" : ils) = strNormalize ils strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) strNormalize (il:ils) = il : (strNormalize ils) +blockNormalize :: Block -> Block +blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils) +blockNormalize (Plain ils) = Plain $ strNormalize ils +blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) +blockNormalize (Para ils) = Para $ strNormalize ils +blockNormalize (Header n attr (Space : ils)) = + blockNormalize $ Header n attr ils +blockNormalize (Table (Space : ils) align width hdr cells) = + blockNormalize $ Table ils align width hdr cells +blockNormalize (Table ils align width hdr cells) = + Table (strNormalize ils) align width hdr cells +blockNormalize blk = blk + runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = @@ -296,7 +309,7 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = bottomUp removeEmptyPars $ - bottomUp strNormalize $ + bottomUp blockNormalize $ bottomUp spanRemove $ bottomUp divRemove $ map (makeHeaderAnchors) $ -- cgit v1.2.3