diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-02-18 18:11:27 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-02-18 18:11:27 -0800 |
commit | ef642e2bbc1f46056fc27560ceba791f27f2daa6 (patch) | |
tree | 46392182f41876b082b1a7d8fb370dfa84693ed2 /src/Text/Pandoc | |
parent | 0f5c56dfb171e27745f4fe8530325223ecefe52a (diff) | |
download | pandoc-ef642e2bbc1f46056fc27560ceba791f27f2daa6.tar.gz |
T.P.XML Improve fromEntities.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/XML.hs | 30 |
1 files changed, 13 insertions, 17 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index c4e3ed1e7..6dbbce1d2 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,6 +1,5 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.XML Copyright : Copyright (C) 2006-2021 John MacFarlane @@ -123,23 +122,20 @@ html5EntityMap = foldr go mempty htmlEntities -- Unescapes XML entities fromEntities :: Text -> Text -fromEntities = T.pack . fromEntities' +fromEntities t + = let (x, y) = T.break (== '&') t + in if T.null y + then t + else x <> + let (ent, rest) = T.break (\c -> isSpace c || c == ';') y + rest' = case T.uncons rest of + Just (';',ys) -> ys + _ -> rest + ent' = T.drop 1 ent <> ";" + in case T.pack <$> lookupEntity (T.unpack ent') of + Just c -> c <> fromEntities rest' + Nothing -> ent <> fromEntities rest -fromEntities' :: Text -> String -fromEntities' (T.uncons -> Just ('&', xs)) = - case lookupEntity $ T.unpack ent' of - Just c -> c <> fromEntities' rest - Nothing -> "&" <> fromEntities' xs - where (ent, rest) = case T.break (\c -> isSpace c || c == ';') xs of - (zs,T.uncons -> Just (';',ys)) -> (zs,ys) - (zs, ys) -> (zs,ys) - ent' - | Just ys <- T.stripPrefix "#X" ent = "#x" <> ys -- workaround tagsoup bug - | Just ('#', _) <- T.uncons ent = ent - | otherwise = ent <> ";" -fromEntities' t = case T.uncons t of - Just (x, xs) -> x : fromEntities' xs - Nothing -> "" html5Attributes :: Set.Set Text html5Attributes = Set.fromList |