diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2012-02-05 14:37:33 -0800 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2012-02-05 14:37:33 -0800 |
commit | 550b931c3cf3ece7226b98aa0547efb3facdd816 (patch) | |
tree | 256d81109569f54836931d068825201d1326229f /src | |
parent | 4dec972cfe6827f4c7a40cf1c50e4a0120d4e442 (diff) | |
download | pandoc-550b931c3cf3ece7226b98aa0547efb3facdd816.tar.gz |
Text.Pandoc.XML: Export fromEntities.
Remove old 'deEntities' from pandoc.hs.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/XML.hs | 16 | ||||
-rw-r--r-- | src/pandoc.hs | 16 |
2 files changed, 17 insertions, 15 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 1532e790b..e3df51e0d 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -34,10 +34,12 @@ module Text.Pandoc.XML ( stripTags, selfClosingTag, inTagsSimple, inTagsIndented, - toEntities ) where + toEntities, + fromEntities ) where import Text.Pandoc.Pretty import Data.Char (ord, isAscii) +import Text.HTML.TagSoup.Entity (lookupEntity) -- | Remove everything between <...> stripTags :: String -> String @@ -98,3 +100,15 @@ toEntities [] = "" toEntities (c:cs) | isAscii c = c : toEntities cs | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs + +-- Unescapes XML entities +fromEntities :: String -> String +fromEntities ('&':xs) = + case lookupEntity ent of + Just c -> c : fromEntities rest + Nothing -> '&' : fromEntities rest + where (ent, rest) = case break (==';') xs of + (zs,';':ys) -> (zs,ys) + (zs,ys) -> (zs,ys) +fromEntities (x:xs) = x : fromEntities xs +fromEntities [] = [] diff --git a/src/pandoc.hs b/src/pandoc.hs index 3e6ade9a6..aa166b7e3 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -34,6 +34,7 @@ import Text.Pandoc.PDF (tex2pdf) import Text.Pandoc.Readers.LaTeX (handleIncludes) import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, headerShift, findDataFile, normalize, err, warn ) +import Text.Pandoc.XML ( toEntities, fromEntities ) import Text.Pandoc.SelfContained ( makeSelfContained ) import Text.Pandoc.Highlighting ( languages, Style, tango, pygments, espresso, kate, haddock, monochrome ) @@ -55,7 +56,6 @@ import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 (toString ) -import Text.HTML.TagSoup.Entity (lookupEntity) import Codec.Binary.UTF8.String (decodeString, encodeString) import Text.CSL.Reference (Reference(..)) @@ -663,18 +663,6 @@ options = ] --- Unescapes XML entities -deEntity :: String -> String -deEntity ('&':xs) = - case lookupEntity ent of - Just c -> c : deEntity rest - Nothing -> '&' : deEntity rest - where (ent, rest) = case break (==';') xs of - (zs,';':ys) -> (zs,ys) - (zs,ys) -> (zs,ys) -deEntity (x:xs) = x : deEntity xs -deEntity [] = [] - -- Returns usage message usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo @@ -902,7 +890,7 @@ main = do -- unescape reference ids, which may contain XML entities, so -- that we can do lookups with regular string equality - let unescapeRefId ref = ref{ refId = deEntity (refId ref) } + let unescapeRefId ref = ref{ refId = fromEntities (refId ref) } refs <- mapM (\f -> catch (CSL.readBiblioFile f) $ \e -> err 23 $ "Error reading bibliography `" ++ f ++ "'" ++ "\n" ++ show e) |