From 4e5745134a8867d68a4678285272869d1c0ebce3 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Tue, 2 Jan 2007 00:29:22 +0000 Subject: Use entities for all characters above 127 in docbook output. Though XML tools should support unicode, some people will be using SGML tools, and these do not. Using entities makes the docbook files more portable. Also refactored encodeEntities and charToHtmlEntity in HtmlEntities.hs git-svn-id: https://pandoc.googlecode.com/svn/trunk@394 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/HtmlEntities.hs | 17 ++++++++++------- src/Text/Pandoc/Writers/Docbook.hs | 12 +++++++----- 2 files changed, 17 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/HtmlEntities.hs b/src/Text/Pandoc/HtmlEntities.hs index 07eb3fb53..157588262 100644 --- a/src/Text/Pandoc/HtmlEntities.hs +++ b/src/Text/Pandoc/HtmlEntities.hs @@ -57,7 +57,10 @@ decodeEntities str = -- | Returns a string with characters replaced with entity references where -- possible. encodeEntities :: String -> String -encodeEntities = concatMap (\c -> fromMaybe [c] (charToHtmlEntity c)) +encodeEntities [] = [] +encodeEntities (c:cs) = if ord c < 127 + then c:(encodeEntities cs) + else (charToHtmlEntity c) ++ (encodeEntities cs) -- | If the string is a valid entity reference, returns @Just@ the character, -- otherwise @Nothing@. @@ -69,14 +72,14 @@ htmlEntityToChar entity = Just (_, _, _, [sub]) -> Just (chr (read sub)) Nothing -> Nothing --- | If there is an entity reference corresponding to the character, returns --- @Just@ the entity reference, otherwise @Nothing@. -charToHtmlEntity :: Char -> Maybe String +-- | Returns a string containing an entity reference for the character. +charToHtmlEntity :: Char -> String charToHtmlEntity char = - let matches = filter (\(entity, character) -> (character == char)) htmlEntityTable in + let matches = filter (\(entity, character) -> (character == char)) + htmlEntityTable in if (length matches) == 0 - then Nothing - else Just (fst (head matches)) + then "&#" ++ show (ord char) ++ ";" + else fst (head matches) htmlEntityTable :: [(String, Char)] htmlEntityTable = [ diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 3916aa214..61f24807b 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -33,9 +33,10 @@ module Text.Pandoc.Writers.Docbook ( import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Writers.HTML ( stringToSmartHtml, stringToHtml ) +import Text.Pandoc.HtmlEntities ( encodeEntities ) import Text.Html ( stringToHtmlString ) import Text.Regex ( mkRegex, matchRegex ) -import Data.Char ( toLower ) +import Data.Char ( toLower, ord ) import Data.List ( isPrefixOf, partition ) import Text.PrettyPrint.HughesPJ hiding ( Str ) @@ -179,9 +180,10 @@ wrap options lst = fsep $ map (hcat . (map (inlineToDocbook options))) (splitByS -- | Escape a string for XML (with "smart" option if specified). stringToXML :: WriterOptions -> String -> String -stringToXML options = if writerSmart options - then stringToSmartHtml - else stringToHtml +stringToXML options = encodeEntities . + (if writerSmart options + then stringToSmartHtml + else stringToHtml) -- | Escape string to XML appropriate for attributes attributeStringToXML :: String -> String @@ -189,7 +191,7 @@ attributeStringToXML = gsub "\"" """ . codeStringToXML -- | Escape a literal string for XML. codeStringToXML :: String -> String -codeStringToXML = gsub "<" "<" . gsub "&" "&" +codeStringToXML = encodeEntities . gsub "<" "<" . gsub "&" "&" -- | Convert a list of inline elements to Docbook. inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -- cgit v1.2.3