diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2011-12-12 00:12:44 -0800 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2011-12-12 00:12:44 -0800 |
commit | 1adb8074071f543f0ff541916fef9a750538b0c9 (patch) | |
tree | 73fb5fde10a9930a696bef80311d0c80588126f6 /src/Text/Pandoc/Writers | |
parent | 9f9a57de19058154221f2a80bd9b307b22d7bde1 (diff) | |
download | pandoc-1adb8074071f543f0ff541916fef9a750538b0c9.tar.gz |
EPUB: Use UTF-8 rather than decimal entities.
This addresses a problem with kindlegen pointed out by
Axel Kielhorn.
Diffstat (limited to 'src/Text/Pandoc/Writers')
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index cf5cad253..1bd614952 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -45,7 +45,7 @@ import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) -import Data.Char ( toLower ) +import Data.Char ( toLower, isDigit ) import Network.URI ( unEscapeString ) -- | Produce an EPUB file from a Pandoc document. @@ -276,7 +276,17 @@ transformBlock x = x -- | Version of 'ppTopElement' that specifies UTF-8 encoding. ppTopElement :: Element -> String -ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement +ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement + -- unEntity removes decimal entities introduced by ppElement + -- (kindlegen seems to choke on these). + where unEntity [] = "" + unEntity ('&':'#':d:xs) | isDigit d = + let ds = takeWhile isDigit xs + c = read $ '\'' : '\\' : d : ds ++ "'" + in if c > '\127' + then c : unEntity (drop (length ds + 2) xs) + else '&':'#':d:ds ++ unEntity (drop (length ds + 2) xs) + unEntity (x:xs) = x : unEntity xs imageTypeOf :: FilePath -> Maybe String imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of |