aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-06-19 22:47:32 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-06-19 22:47:32 -0700
commit0d8e0e567479e723c986b2384c39a997f9065fa2 (patch)
treecb1a367c616c275a1dae941ceafc9ec6a5d0b29b
parent5cb53a48d541b97b5f60968715a5969133196d70 (diff)
parentda0d1d27ac98ca28e66bc2df3de2bce738068fb8 (diff)
downloadpandoc-0d8e0e567479e723c986b2384c39a997f9065fa2.tar.gz
Merge pull request #1354 from jkr/literalTab
Parse literal tabs in docx
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs17
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs5
-rw-r--r--tests/Tests/Readers/Docx.hs7
-rw-r--r--tests/docx.notes.native2
-rw-r--r--tests/docx.tabs.docxbin0 -> 12919 bytes
-rw-r--r--tests/docx.tabs.native2
6 files changed, 29 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 4035cde99..9c1d0c5e6 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -134,10 +134,12 @@ codeDivs = ["SourceCode"]
runElemToInlines :: RunElem -> [Inline]
runElemToInlines (TextRun s) = strToInlines s
runElemToInlines (LnBrk) = [LineBreak]
+runElemToInlines (Tab) = [Space]
runElemToString :: RunElem -> String
runElemToString (TextRun s) = s
runElemToString (LnBrk) = ['\n']
+runElemToString (Tab) = ['\t']
runElemsToString :: [RunElem] -> String
runElemsToString = concatMap runElemToString
@@ -148,6 +150,19 @@ strNormalize (Str "" : ils) = strNormalize ils
strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l)
strNormalize (il:ils) = il : (strNormalize ils)
+blockNormalize :: Block -> Block
+blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils)
+blockNormalize (Plain ils) = Plain $ strNormalize ils
+blockNormalize (Para (Space : ils)) = blockNormalize (Para ils)
+blockNormalize (Para ils) = Para $ strNormalize ils
+blockNormalize (Header n attr (Space : ils)) =
+ blockNormalize $ Header n attr ils
+blockNormalize (Table (Space : ils) align width hdr cells) =
+ blockNormalize $ Table ils align width hdr cells
+blockNormalize (Table ils align width hdr cells) =
+ Table (strNormalize ils) align width hdr cells
+blockNormalize blk = blk
+
runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
runToInlines _ _ (Run rs runElems)
| isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
@@ -294,7 +309,7 @@ makeImagesSelfContained _ inline = inline
bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
bodyToBlocks opts docx (Body bps) =
bottomUp removeEmptyPars $
- bottomUp strNormalize $
+ bottomUp blockNormalize $
bottomUp spanRemove $
bottomUp divRemove $
map (makeHeaderAnchors) $
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 22e9dd909..18200bcf9 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -466,7 +466,7 @@ data Run = Run RunStyle [RunElem]
| Endnote String
deriving Show
-data RunElem = TextRun String | LnBrk
+data RunElem = TextRun String | LnBrk | Tab
deriving Show
data RunStyle = RunStyle { isBold :: Bool
@@ -545,6 +545,9 @@ elemToRunElem ns element
| qName (elName element) == "br" &&
qURI (elName element) == (lookup "w" ns) =
Just $ LnBrk
+ | qName (elName element) == "tab" &&
+ qURI (elName element) == (lookup "w" ns) =
+ Just $ Tab
| otherwise = Nothing
diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs
index 273f03f4d..3a13641a9 100644
--- a/tests/Tests/Readers/Docx.hs
+++ b/tests/Tests/Readers/Docx.hs
@@ -44,7 +44,12 @@ tests = [ testGroup "inlines"
, testCompare
"handling unicode input"
"docx.unicode.docx"
- "docx.unicode.native"]
+ "docx.unicode.native"
+ , testCompare
+ "literal tabs"
+ "docx.tabs.docx"
+ "docx.tabs.native"
+ ]
, testGroup "blocks"
[ testCompare
"headers"
diff --git a/tests/docx.notes.native b/tests/docx.notes.native
index 1e9b6bba4..5a94b1999 100644
--- a/tests/docx.notes.native
+++ b/tests/docx.notes.native
@@ -1,2 +1,2 @@
[Header 2 ("",[],[]) [Str "A",Space,Str "footnote"]
-,Para [Str "Test",Space,Str "footnote.",Note [Para [Space,Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Space,Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]]
+,Para [Str "Test",Space,Str "footnote.",Note [Para [Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]]
diff --git a/tests/docx.tabs.docx b/tests/docx.tabs.docx
new file mode 100644
index 000000000..6ff5f4bb1
--- /dev/null
+++ b/tests/docx.tabs.docx
Binary files differ
diff --git a/tests/docx.tabs.native b/tests/docx.tabs.native
new file mode 100644
index 000000000..05461f20b
--- /dev/null
+++ b/tests/docx.tabs.native
@@ -0,0 +1,2 @@
+[Para [Str "Some",Space,Str "text",Space,Str "separated",Space,Str "by",Space,Str "a",Space,Str "tab."]
+,Para [Str "Tab-indented",Space,Str "text."]]