diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-06 19:47:05 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-06 19:47:05 +0000 |
commit | 58dcef062556067b753892c8020d7f8c4e0bed0c (patch) | |
tree | 6716c906de2d3e4ae3ba71a8bf045980efc0a291 | |
parent | cbfe29f20577654942c04e54763ce5d4ce40a031 (diff) | |
download | pandoc-58dcef062556067b753892c8020d7f8c4e0bed0c.tar.gz |
Added support for hexadecimal entities: e.g. ꂫ
git-svn-id: https://pandoc.googlecode.com/svn/trunk@441 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r-- | src/Text/Pandoc/Entities.hs | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/Entities.hs index 26785b9a8..ea5676b79 100644 --- a/src/Text/Pandoc/Entities.hs +++ b/src/Text/Pandoc/Entities.hs @@ -39,13 +39,13 @@ import Data.Char ( chr, ord ) import Text.Regex ( mkRegex, matchRegexAll, Regex ) import Maybe ( fromMaybe ) --- | Regular expression for decimal coded entity. -decimalCodedEntity :: Text.Regex.Regex -decimalCodedEntity = mkRegex "&#([0-9]+);" +-- | Regular expression for numerical coded entity. +numericalEntity :: Text.Regex.Regex +numericalEntity = mkRegex "&#([0-9]+|[xX][0-9A-Fa-f]+);" -- | Regular expression for character entity. characterEntity :: Text.Regex.Regex -characterEntity = mkRegex "&#[0-9]+;|&[A-Za-z0-9]+;" +characterEntity = mkRegex "&#[0-9]+;|&#[xX][0-9A-Fa-f]+;|&[A-Za-z0-9]+;" -- | Return a string with all entity references decoded to unicode characters -- where possible. @@ -73,8 +73,8 @@ entityToChar :: String -> Maybe Char entityToChar entity = case (lookup entity entityTable) of Just ch -> Just ch - Nothing -> case (matchRegexAll decimalCodedEntity entity) of - Just (_, _, _, [sub]) -> Just (chr (read sub)) + Nothing -> case (matchRegexAll numericalEntity entity) of + Just (_, _, _, [sub]) -> Just (chr (read ('0':sub))) Nothing -> Nothing -- | Returns a string containing an entity reference for the character. |