aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2011-12-12 00:12:44 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2011-12-12 00:12:44 -0800
commit1adb8074071f543f0ff541916fef9a750538b0c9 (patch)
tree73fb5fde10a9930a696bef80311d0c80588126f6 /src/Text/Pandoc
parent9f9a57de19058154221f2a80bd9b307b22d7bde1 (diff)
downloadpandoc-1adb8074071f543f0ff541916fef9a750538b0c9.tar.gz
EPUB: Use UTF-8 rather than decimal entities.
This addresses a problem with kindlegen pointed out by Axel Kielhorn.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs14
1 files changed, 12 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index cf5cad253..1bd614952 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -45,7 +45,7 @@ import Text.XML.Light hiding (ppTopElement)
import Text.Pandoc.UUID
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.Markdown ( writePlain )
-import Data.Char ( toLower )
+import Data.Char ( toLower, isDigit )
import Network.URI ( unEscapeString )
-- | Produce an EPUB file from a Pandoc document.
@@ -276,7 +276,17 @@ transformBlock x = x
-- | Version of 'ppTopElement' that specifies UTF-8 encoding.
ppTopElement :: Element -> String
-ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement
+ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
+ -- unEntity removes decimal entities introduced by ppElement
+ -- (kindlegen seems to choke on these).
+ where unEntity [] = ""
+ unEntity ('&':'#':d:xs) | isDigit d =
+ let ds = takeWhile isDigit xs
+ c = read $ '\'' : '\\' : d : ds ++ "'"
+ in if c > '\127'
+ then c : unEntity (drop (length ds + 2) xs)
+ else '&':'#':d:ds ++ unEntity (drop (length ds + 2) xs)
+ unEntity (x:xs) = x : unEntity xs
imageTypeOf :: FilePath -> Maybe String
imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of