aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-02-05 14:37:33 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-02-05 14:37:33 -0800
commit550b931c3cf3ece7226b98aa0547efb3facdd816 (patch)
tree256d81109569f54836931d068825201d1326229f /src/Text/Pandoc
parent4dec972cfe6827f4c7a40cf1c50e4a0120d4e442 (diff)
downloadpandoc-550b931c3cf3ece7226b98aa0547efb3facdd816.tar.gz
Text.Pandoc.XML: Export fromEntities.
Remove old 'deEntities' from pandoc.hs.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/XML.hs16
1 files changed, 15 insertions, 1 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 1532e790b..e3df51e0d 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -34,10 +34,12 @@ module Text.Pandoc.XML ( stripTags,
selfClosingTag,
inTagsSimple,
inTagsIndented,
- toEntities ) where
+ toEntities,
+ fromEntities ) where
import Text.Pandoc.Pretty
import Data.Char (ord, isAscii)
+import Text.HTML.TagSoup.Entity (lookupEntity)
-- | Remove everything between <...>
stripTags :: String -> String
@@ -98,3 +100,15 @@ toEntities [] = ""
toEntities (c:cs)
| isAscii c = c : toEntities cs
| otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
+
+-- Unescapes XML entities
+fromEntities :: String -> String
+fromEntities ('&':xs) =
+ case lookupEntity ent of
+ Just c -> c : fromEntities rest
+ Nothing -> '&' : fromEntities rest
+ where (ent, rest) = case break (==';') xs of
+ (zs,';':ys) -> (zs,ys)
+ (zs,ys) -> (zs,ys)
+fromEntities (x:xs) = x : fromEntities xs
+fromEntities [] = []