From 58dcef062556067b753892c8020d7f8c4e0bed0c Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 6 Jan 2007 19:47:05 +0000 Subject: Added support for hexadecimal entities: e.g. ꂫ git-svn-id: https://pandoc.googlecode.com/svn/trunk@441 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Entities.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/Entities.hs index 26785b9a8..ea5676b79 100644 --- a/src/Text/Pandoc/Entities.hs +++ b/src/Text/Pandoc/Entities.hs @@ -39,13 +39,13 @@ import Data.Char ( chr, ord ) import Text.Regex ( mkRegex, matchRegexAll, Regex ) import Maybe ( fromMaybe ) --- | Regular expression for decimal coded entity. -decimalCodedEntity :: Text.Regex.Regex -decimalCodedEntity = mkRegex "&#([0-9]+);" +-- | Regular expression for numerical coded entity. +numericalEntity :: Text.Regex.Regex +numericalEntity = mkRegex "&#([0-9]+|[xX][0-9A-Fa-f]+);" -- | Regular expression for character entity. characterEntity :: Text.Regex.Regex -characterEntity = mkRegex "&#[0-9]+;|&[A-Za-z0-9]+;" +characterEntity = mkRegex "&#[0-9]+;|&#[xX][0-9A-Fa-f]+;|&[A-Za-z0-9]+;" -- | Return a string with all entity references decoded to unicode characters -- where possible. @@ -73,8 +73,8 @@ entityToChar :: String -> Maybe Char entityToChar entity = case (lookup entity entityTable) of Just ch -> Just ch - Nothing -> case (matchRegexAll decimalCodedEntity entity) of - Just (_, _, _, [sub]) -> Just (chr (read sub)) + Nothing -> case (matchRegexAll numericalEntity entity) of + Just (_, _, _, [sub]) -> Just (chr (read ('0':sub))) Nothing -> Nothing -- | Returns a string containing an entity reference for the character. -- cgit v1.2.3