aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-02-05 22:52:00 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-02-05 22:52:00 -0800
commite2c157f86f985e4ab5c702fff87b647f4ae842c8 (patch)
tree86461eed10458276a391a0a326c47675d292a654 /src/Text/Pandoc
parent9bf6e665eabca101bd88bf2634b8b5a4736493b1 (diff)
downloadpandoc-e2c157f86f985e4ab5c702fff87b647f4ae842c8.tar.gz
Removed module Text.Pandoc.CharacterReferences.
Moved characterReference parser to Text.Pandoc.Parsing. decodeCharacterReferences is now replaced by fromEntities in Text.Pandoc.XML.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/CharacterReferences.hs72
-rw-r--r--src/Text/Pandoc/Parsing.hs12
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs6
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs5
4 files changed, 16 insertions, 79 deletions
diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs
deleted file mode 100644
index 8157d94d3..000000000
--- a/src/Text/Pandoc/CharacterReferences.hs
+++ /dev/null
@@ -1,72 +0,0 @@
-{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.CharacterReferences
- Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for parsing character references.
--}
-module Text.Pandoc.CharacterReferences (
- characterReference,
- decodeCharacterReferences,
- ) where
-import Text.ParserCombinators.Parsec
-import Text.HTML.TagSoup.Entity ( lookupNamedEntity, lookupNumericEntity )
-import Data.Maybe ( fromMaybe )
-
--- | Parse character entity.
-characterReference :: GenParser Char st Char
-characterReference = try $ do
- char '&'
- character <- numRef <|> entity
- char ';'
- return character
-
-numRef :: GenParser Char st Char
-numRef = do
- char '#'
- num <- hexNum <|> decNum
- return $ fromMaybe '?' $ lookupNumericEntity num
-
-hexNum :: GenParser Char st [Char]
-hexNum = do
- x <- oneOf "Xx"
- num <- many1 hexDigit
- return (x:num)
-
-decNum :: GenParser Char st [Char]
-decNum = many1 digit
-
-entity :: GenParser Char st Char
-entity = do
- body <- many1 alphaNum
- return $ fromMaybe '?' $ lookupNamedEntity body
-
--- | Convert entities in a string to characters.
-decodeCharacterReferences :: String -> String
-decodeCharacterReferences str =
- case parse (many (characterReference <|> anyChar)) str str of
- Left err -> error $ "\nError: " ++ show err
- Right result -> result
-
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index bb0ac18cf..08769a4f4 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -51,6 +51,7 @@ module Text.Pandoc.Parsing ( (>>~),
failIfStrict,
failUnlessLHS,
escaped,
+ characterReference,
anyOrderedListMarker,
orderedListMarker,
charRef,
@@ -78,7 +79,6 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.ParserCombinators.Parsec
-import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
import Data.List ( intercalate, transpose )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
@@ -86,6 +86,7 @@ import Control.Monad ( join, liftM, guard )
import Text.Pandoc.Shared
import qualified Data.Map as M
import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
+import Text.HTML.TagSoup.Entity ( lookupEntity )
-- | Like >>, but returns the operation on the left.
-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
@@ -337,6 +338,15 @@ escaped :: GenParser Char st Char -- ^ Parser for character to escape
-> GenParser Char st Char
escaped parser = try $ char '\\' >> parser
+-- | Parse character entity.
+characterReference :: GenParser Char st Char
+characterReference = try $ do
+ char '&'
+ ent <- manyTill nonspaceChar (char ';')
+ case lookupEntity ent of
+ Just c -> return c
+ Nothing -> return '?'
+
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
upperRoman :: GenParser Char st (ListNumberStyle, Int)
upperRoman = do
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 8da0f7c16..607d0971a 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -42,7 +42,7 @@ import Text.Pandoc.Parsing
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.Pandoc.XML ( fromEntities )
import Text.ParserCombinators.Parsec
import Control.Monad (when, liftM, guard, mzero)
import Text.HTML.TagSoup
@@ -244,7 +244,7 @@ referenceTitle = try $ do
<|> do delim <- char '\'' <|> char '"'
manyTill litChar (try (char delim >> skipSpaces >>
notFollowedBy (noneOf ")\n")))
- return $ decodeCharacterReferences tit
+ return $ fromEntities tit
noteMarker :: GenParser Char ParserState [Char]
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
@@ -1176,7 +1176,7 @@ linkTitle = try $ do
skipSpaces
delim <- oneOf "'\""
tit <- manyTill litChar (try (char delim >> skipSpaces >> eof))
- return $ decodeCharacterReferences tit
+ return $ fromEntities tit
link :: GenParser Char ParserState Inline
link = try $ do
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index f46d08570..f35b29370 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -31,7 +31,6 @@ Conversion of 'Pandoc' documents to HTML.
-}
module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
import Text.Pandoc.Definition
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.Pandoc.Shared
import Text.Pandoc.Templates
import Text.Pandoc.Generic
@@ -39,7 +38,7 @@ import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Slides
import Text.Pandoc.Highlighting ( highlight, styleToCss,
formatHtmlInline, formatHtmlBlock )
-import Text.Pandoc.XML (stripTags, escapeStringForXML)
+import Text.Pandoc.XML (stripTags, escapeStringForXML, fromEntities)
import Network.HTTP ( urlEncode )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
@@ -344,7 +343,7 @@ obfuscateChar char =
-- | Obfuscate string using entities.
obfuscateString :: String -> String
-obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
+obfuscateString = concatMap obfuscateChar . fromEntities
attrsToHtml :: WriterOptions -> Attr -> [Attribute]
attrsToHtml opts (id',classes',keyvals) =