From 0b899ce7efa80df700b17b7a4b9f6de1fb88ce8c Mon Sep 17 00:00:00 2001 From: Ophir Lifshitz Date: Sun, 4 Oct 2015 06:07:23 -0400 Subject: Docx Reader: Parse soft, no-break hyphen elements --- src/Text/Pandoc/Readers/Docx.hs | 4 ++++ src/Text/Pandoc/Readers/Docx/Parse.hs | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 67a97ae85..8b8d1ede1 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -206,11 +206,15 @@ runElemToInlines :: RunElem -> Inlines runElemToInlines (TextRun s) = text s runElemToInlines (LnBrk) = linebreak runElemToInlines (Tab) = space +runElemToInlines (SoftHyphen) = text "\xad" +runElemToInlines (NoBreakHyphen) = text "\x2011" runElemToString :: RunElem -> String runElemToString (TextRun s) = s runElemToString (LnBrk) = ['\n'] runElemToString (Tab) = ['\t'] +runElemToString (SoftHyphen) = ['\xad'] +runElemToString (NoBreakHyphen) = ['\x2011'] runToString :: Run -> String runToString (Run _ runElems) = concatMap runElemToString runElems diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index cce80fb48..53af19dfd 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -208,7 +208,7 @@ data Run = Run RunStyle [RunElem] | InlineDrawing FilePath B.ByteString deriving Show -data RunElem = TextRun String | LnBrk | Tab +data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show data VertAlign = BaseLn | SupScrpt | SubScrpt @@ -877,6 +877,8 @@ elemToRunElem ns element map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab + | isElem ns "w" "softHyphen" element = return SoftHyphen + | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen | isElem ns "w" "sym" element = return (getSymChar ns element) | otherwise = throwError WrongElem where -- cgit v1.2.3 From dfd06467eadace7b37cb3ebc53c943755d0436eb Mon Sep 17 00:00:00 2001 From: Ophir Lifshitz Date: Sun, 4 Oct 2015 06:08:17 -0400 Subject: Docx Reader: Create special punctuation test --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx/special_punctuation.docx | Bin 0 -> 8408 bytes tests/docx/special_punctuation.native | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 tests/docx/special_punctuation.docx create mode 100644 tests/docx/special_punctuation.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 47292bc99..7e3f1979e 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -130,6 +130,10 @@ tests = [ testGroup "inlines" "literal tabs" "docx/tabs.docx" "docx/tabs.native" + , testCompare + "special punctuation" + "docx/special_punctuation.docx" + "docx/special_punctuation.native" , testCompare "normalizing inlines" "docx/normalize.docx" diff --git a/tests/docx/special_punctuation.docx b/tests/docx/special_punctuation.docx new file mode 100644 index 000000000..8e0bb55c9 Binary files /dev/null and b/tests/docx/special_punctuation.docx differ diff --git a/tests/docx/special_punctuation.native b/tests/docx/special_punctuation.native new file mode 100644 index 000000000..304289f44 --- /dev/null +++ b/tests/docx/special_punctuation.native @@ -0,0 +1,2 @@ +[Para [Str "Soft",Space,Str "hyphen:",Space,Str "[\173]"] +,Para [Str "Non-breaking",Space,Str "hyphen:",Space,Str "[\8209]"]] -- cgit v1.2.3