From 86fc44d6b3f82a2b274d4b592d1dd6152bd1eaf5 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 17:53:52 -0400 Subject: Add literal tabs to parser. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 22e9dd909..18200bcf9 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -466,7 +466,7 @@ data Run = Run RunStyle [RunElem] | Endnote String deriving Show -data RunElem = TextRun String | LnBrk +data RunElem = TextRun String | LnBrk | Tab deriving Show data RunStyle = RunStyle { isBold :: Bool @@ -545,6 +545,9 @@ elemToRunElem ns element | qName (elName element) == "br" && qURI (elName element) == (lookup "w" ns) = Just $ LnBrk + | qName (elName element) == "tab" && + qURI (elName element) == (lookup "w" ns) = + Just $ Tab | otherwise = Nothing -- cgit v1.2.3 From 0e7d2dbd4304902cb6c6d4e9618592b5148dc598 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 17:55:02 -0400 Subject: Have Docx reader properly interpret tabs. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4035cde99..c43879ed9 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -134,10 +134,12 @@ codeDivs = ["SourceCode"] runElemToInlines :: RunElem -> [Inline] runElemToInlines (TextRun s) = strToInlines s runElemToInlines (LnBrk) = [LineBreak] +runElemToInlines (Tab) = [Space] runElemToString :: RunElem -> String runElemToString (TextRun s) = s runElemToString (LnBrk) = ['\n'] +runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString -- cgit v1.2.3 From a934db9a320ec76e15e62954b75d0e8d2d972244 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:28:55 -0400 Subject: Introduce blockNormalize This will help take care of spaces introduced at the beginning of strings. --- src/Text/Pandoc/Readers/Docx.hs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index c43879ed9..9c1d0c5e6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -150,6 +150,19 @@ strNormalize (Str "" : ils) = strNormalize ils strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) strNormalize (il:ils) = il : (strNormalize ils) +blockNormalize :: Block -> Block +blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils) +blockNormalize (Plain ils) = Plain $ strNormalize ils +blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) +blockNormalize (Para ils) = Para $ strNormalize ils +blockNormalize (Header n attr (Space : ils)) = + blockNormalize $ Header n attr ils +blockNormalize (Table (Space : ils) align width hdr cells) = + blockNormalize $ Table ils align width hdr cells +blockNormalize (Table ils align width hdr cells) = + Table (strNormalize ils) align width hdr cells +blockNormalize blk = blk + runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = @@ -296,7 +309,7 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = bottomUp removeEmptyPars $ - bottomUp strNormalize $ + bottomUp blockNormalize $ bottomUp spanRemove $ bottomUp divRemove $ map (makeHeaderAnchors) $ -- cgit v1.2.3 From d19996d7438fbd2ce56cf3ce46b99cd71437cacb Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:29:59 -0400 Subject: Fix notes test. This previously allowed spaces at the beginning of a paragraph. --- tests/docx.notes.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/docx.notes.native b/tests/docx.notes.native index 1e9b6bba4..5a94b1999 100644 --- a/tests/docx.notes.native +++ b/tests/docx.notes.native @@ -1,2 +1,2 @@ [Header 2 ("",[],[]) [Str "A",Space,Str "footnote"] -,Para [Str "Test",Space,Str "footnote.",Note [Para [Space,Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Space,Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] +,Para [Str "Test",Space,Str "footnote.",Note [Para [Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] -- cgit v1.2.3 From da0d1d27ac98ca28e66bc2df3de2bce738068fb8 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:33:22 -0400 Subject: Add tabs tests. --- tests/Tests/Readers/Docx.hs | 7 ++++++- tests/docx.tabs.docx | Bin 0 -> 12919 bytes tests/docx.tabs.native | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/docx.tabs.docx create mode 100644 tests/docx.tabs.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 273f03f4d..3a13641a9 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -44,7 +44,12 @@ tests = [ testGroup "inlines" , testCompare "handling unicode input" "docx.unicode.docx" - "docx.unicode.native"] + "docx.unicode.native" + , testCompare + "literal tabs" + "docx.tabs.docx" + "docx.tabs.native" + ] , testGroup "blocks" [ testCompare "headers" diff --git a/tests/docx.tabs.docx b/tests/docx.tabs.docx new file mode 100644 index 000000000..6ff5f4bb1 Binary files /dev/null and b/tests/docx.tabs.docx differ diff --git a/tests/docx.tabs.native b/tests/docx.tabs.native new file mode 100644 index 000000000..05461f20b --- /dev/null +++ b/tests/docx.tabs.native @@ -0,0 +1,2 @@ +[Para [Str "Some",Space,Str "text",Space,Str "separated",Space,Str "by",Space,Str "a",Space,Str "tab."] +,Para [Str "Tab-indented",Space,Str "text."]] -- cgit v1.2.3