diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-06-19 22:47:32 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-06-19 22:47:32 -0700 |
commit | 0d8e0e567479e723c986b2384c39a997f9065fa2 (patch) | |
tree | cb1a367c616c275a1dae941ceafc9ec6a5d0b29b | |
parent | 5cb53a48d541b97b5f60968715a5969133196d70 (diff) | |
parent | da0d1d27ac98ca28e66bc2df3de2bce738068fb8 (diff) | |
download | pandoc-0d8e0e567479e723c986b2384c39a997f9065fa2.tar.gz |
Merge pull request #1354 from jkr/literalTab
Parse literal tabs in docx
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 5 | ||||
-rw-r--r-- | tests/Tests/Readers/Docx.hs | 7 | ||||
-rw-r--r-- | tests/docx.notes.native | 2 | ||||
-rw-r--r-- | tests/docx.tabs.docx | bin | 0 -> 12919 bytes | |||
-rw-r--r-- | tests/docx.tabs.native | 2 |
6 files changed, 29 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4035cde99..9c1d0c5e6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -134,10 +134,12 @@ codeDivs = ["SourceCode"] runElemToInlines :: RunElem -> [Inline] runElemToInlines (TextRun s) = strToInlines s runElemToInlines (LnBrk) = [LineBreak] +runElemToInlines (Tab) = [Space] runElemToString :: RunElem -> String runElemToString (TextRun s) = s runElemToString (LnBrk) = ['\n'] +runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString @@ -148,6 +150,19 @@ strNormalize (Str "" : ils) = strNormalize ils strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) strNormalize (il:ils) = il : (strNormalize ils) +blockNormalize :: Block -> Block +blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils) +blockNormalize (Plain ils) = Plain $ strNormalize ils +blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) +blockNormalize (Para ils) = Para $ strNormalize ils +blockNormalize (Header n attr (Space : ils)) = + blockNormalize $ Header n attr ils +blockNormalize (Table (Space : ils) align width hdr cells) = + blockNormalize $ Table ils align width hdr cells +blockNormalize (Table ils align width hdr cells) = + Table (strNormalize ils) align width hdr cells +blockNormalize blk = blk + runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = @@ -294,7 +309,7 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = bottomUp removeEmptyPars $ - bottomUp strNormalize $ + bottomUp blockNormalize $ bottomUp spanRemove $ bottomUp divRemove $ map (makeHeaderAnchors) $ diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 22e9dd909..18200bcf9 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -466,7 +466,7 @@ data Run = Run RunStyle [RunElem] | Endnote String deriving Show -data RunElem = TextRun String | LnBrk +data RunElem = TextRun String | LnBrk | Tab deriving Show data RunStyle = RunStyle { isBold :: Bool @@ -545,6 +545,9 @@ elemToRunElem ns element | qName (elName element) == "br" && qURI (elName element) == (lookup "w" ns) = Just $ LnBrk + | qName (elName element) == "tab" && + qURI (elName element) == (lookup "w" ns) = + Just $ Tab | otherwise = Nothing diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 273f03f4d..3a13641a9 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -44,7 +44,12 @@ tests = [ testGroup "inlines" , testCompare "handling unicode input" "docx.unicode.docx" - "docx.unicode.native"] + "docx.unicode.native" + , testCompare + "literal tabs" + "docx.tabs.docx" + "docx.tabs.native" + ] , testGroup "blocks" [ testCompare "headers" diff --git a/tests/docx.notes.native b/tests/docx.notes.native index 1e9b6bba4..5a94b1999 100644 --- a/tests/docx.notes.native +++ b/tests/docx.notes.native @@ -1,2 +1,2 @@ [Header 2 ("",[],[]) [Str "A",Space,Str "footnote"] -,Para [Str "Test",Space,Str "footnote.",Note [Para [Space,Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Space,Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] +,Para [Str "Test",Space,Str "footnote.",Note [Para [Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] diff --git a/tests/docx.tabs.docx b/tests/docx.tabs.docx Binary files differnew file mode 100644 index 000000000..6ff5f4bb1 --- /dev/null +++ b/tests/docx.tabs.docx diff --git a/tests/docx.tabs.native b/tests/docx.tabs.native new file mode 100644 index 000000000..05461f20b --- /dev/null +++ b/tests/docx.tabs.native @@ -0,0 +1,2 @@ +[Para [Str "Some",Space,Str "text",Space,Str "separated",Space,Str "by",Space,Str "a",Space,Str "tab."] +,Para [Str "Tab-indented",Space,Str "text."]] |