aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/XML.hs16
1 files changed, 15 insertions, 1 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 1532e790b..e3df51e0d 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -34,10 +34,12 @@ module Text.Pandoc.XML ( stripTags,
selfClosingTag,
inTagsSimple,
inTagsIndented,
- toEntities ) where
+ toEntities,
+ fromEntities ) where
import Text.Pandoc.Pretty
import Data.Char (ord, isAscii)
+import Text.HTML.TagSoup.Entity (lookupEntity)
-- | Remove everything between <...>
stripTags :: String -> String
@@ -98,3 +100,15 @@ toEntities [] = ""
toEntities (c:cs)
| isAscii c = c : toEntities cs
| otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
+
+-- Unescapes XML entities
+fromEntities :: String -> String
+fromEntities ('&':xs) =
+ case lookupEntity ent of
+ Just c -> c : fromEntities rest
+ Nothing -> '&' : fromEntities rest
+ where (ent, rest) = case break (==';') xs of
+ (zs,';':ys) -> (zs,ys)
+ (zs,ys) -> (zs,ys)
+fromEntities (x:xs) = x : fromEntities xs
+fromEntities [] = []