aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-06 19:47:05 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-06 19:47:05 +0000
commit58dcef062556067b753892c8020d7f8c4e0bed0c (patch)
tree6716c906de2d3e4ae3ba71a8bf045980efc0a291
parentcbfe29f20577654942c04e54763ce5d4ce40a031 (diff)
downloadpandoc-58dcef062556067b753892c8020d7f8c4e0bed0c.tar.gz
Added support for hexadecimal entities: e.g. &#xA0AB;
git-svn-id: https://pandoc.googlecode.com/svn/trunk@441 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r--src/Text/Pandoc/Entities.hs12
1 files changed, 6 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/Entities.hs
index 26785b9a8..ea5676b79 100644
--- a/src/Text/Pandoc/Entities.hs
+++ b/src/Text/Pandoc/Entities.hs
@@ -39,13 +39,13 @@ import Data.Char ( chr, ord )
import Text.Regex ( mkRegex, matchRegexAll, Regex )
import Maybe ( fromMaybe )
--- | Regular expression for decimal coded entity.
-decimalCodedEntity :: Text.Regex.Regex
-decimalCodedEntity = mkRegex "&#([0-9]+);"
+-- | Regular expression for numerical coded entity.
+numericalEntity :: Text.Regex.Regex
+numericalEntity = mkRegex "&#([0-9]+|[xX][0-9A-Fa-f]+);"
-- | Regular expression for character entity.
characterEntity :: Text.Regex.Regex
-characterEntity = mkRegex "&#[0-9]+;|&[A-Za-z0-9]+;"
+characterEntity = mkRegex "&#[0-9]+;|&#[xX][0-9A-Fa-f]+;|&[A-Za-z0-9]+;"
-- | Return a string with all entity references decoded to unicode characters
-- where possible.
@@ -73,8 +73,8 @@ entityToChar :: String -> Maybe Char
entityToChar entity =
case (lookup entity entityTable) of
Just ch -> Just ch
- Nothing -> case (matchRegexAll decimalCodedEntity entity) of
- Just (_, _, _, [sub]) -> Just (chr (read sub))
+ Nothing -> case (matchRegexAll numericalEntity entity) of
+ Just (_, _, _, [sub]) -> Just (chr (read ('0':sub)))
Nothing -> Nothing
-- | Returns a string containing an entity reference for the character.