diff options
author | John MacFarlane <jgm@berkeley.edu> | 2018-11-01 15:59:40 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-11-01 15:59:59 -0700 |
commit | 5350ce26b463ccb8edc7b0c7b1be72558e87992c (patch) | |
tree | 5335f1aec0eab047e6f8ac57af2fda3c656e4bf1 | |
parent | 614b1c0c6188589128c4bfebeab65fa6306a916c (diff) | |
download | pandoc-5350ce26b463ccb8edc7b0c7b1be72558e87992c.tar.gz |
Text.Pandoc.XML: add toHtml5Entities.
[API change]
-rw-r--r-- | src/Text/Pandoc/XML.hs | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index add46bd6c..169951591 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -35,6 +35,7 @@ module Text.Pandoc.XML ( escapeCharForXML, inTagsSimple, inTagsIndented, toEntities, + toHtml5Entities, fromEntities ) where import Prelude @@ -43,6 +44,8 @@ import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Pretty +import qualified Data.Map as M +import Text.HTML.TagSoup.Entity (htmlEntities) -- | Escape one character as needed for XML. escapeCharForXML :: Char -> String @@ -100,6 +103,21 @@ toEntities = T.concatMap go where go c | isAscii c = T.singleton c | otherwise = T.pack ("&#" ++ show (ord c) ++ ";") +-- | Escape all non-ascii characters using HTML5 entities, falling +-- back to numerical entities. +toHtml5Entities :: Text -> Text +toHtml5Entities = T.concatMap go + where go c | isAscii c = T.singleton c + | otherwise = + case M.lookup c html5EntityMap of + Just t -> T.singleton '&' <> t <> T.singleton ';' + Nothing -> T.pack ("&#" ++ show (ord c) ++ ";") + +html5EntityMap :: M.Map Char Text +html5EntityMap = M.fromList [(c, T.takeWhile (/=';') (T.pack ent)) + | (ent@(_:_), [c]) <- htmlEntities + , last ent == ';'] + -- Unescapes XML entities fromEntities :: String -> String fromEntities ('&':xs) = |