diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/XML.hs | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 1532e790b..e3df51e0d 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -34,10 +34,12 @@ module Text.Pandoc.XML ( stripTags, selfClosingTag, inTagsSimple, inTagsIndented, - toEntities ) where + toEntities, + fromEntities ) where import Text.Pandoc.Pretty import Data.Char (ord, isAscii) +import Text.HTML.TagSoup.Entity (lookupEntity) -- | Remove everything between <...> stripTags :: String -> String @@ -98,3 +100,15 @@ toEntities [] = "" toEntities (c:cs) | isAscii c = c : toEntities cs | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs + +-- Unescapes XML entities +fromEntities :: String -> String +fromEntities ('&':xs) = + case lookupEntity ent of + Just c -> c : fromEntities rest + Nothing -> '&' : fromEntities rest + where (ent, rest) = case break (==';') xs of + (zs,';':ys) -> (zs,ys) + (zs,ys) -> (zs,ys) +fromEntities (x:xs) = x : fromEntities xs +fromEntities [] = [] |