From 550b931c3cf3ece7226b98aa0547efb3facdd816 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 5 Feb 2012 14:37:33 -0800 Subject: Text.Pandoc.XML: Export fromEntities. Remove old 'deEntities' from pandoc.hs. --- src/Text/Pandoc/XML.hs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 1532e790b..e3df51e0d 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -34,10 +34,12 @@ module Text.Pandoc.XML ( stripTags, selfClosingTag, inTagsSimple, inTagsIndented, - toEntities ) where + toEntities, + fromEntities ) where import Text.Pandoc.Pretty import Data.Char (ord, isAscii) +import Text.HTML.TagSoup.Entity (lookupEntity) -- | Remove everything between <...> stripTags :: String -> String @@ -98,3 +100,15 @@ toEntities [] = "" toEntities (c:cs) | isAscii c = c : toEntities cs | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs + +-- Unescapes XML entities +fromEntities :: String -> String +fromEntities ('&':xs) = + case lookupEntity ent of + Just c -> c : fromEntities rest + Nothing -> '&' : fromEntities rest + where (ent, rest) = case break (==';') xs of + (zs,';':ys) -> (zs,ys) + (zs,ys) -> (zs,ys) +fromEntities (x:xs) = x : fromEntities xs +fromEntities [] = [] -- cgit v1.2.3