From e2c157f86f985e4ab5c702fff87b647f4ae842c8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 5 Feb 2012 22:52:00 -0800 Subject: Removed module Text.Pandoc.CharacterReferences. Moved characterReference parser to Text.Pandoc.Parsing. decodeCharacterReferences is now replaced by fromEntities in Text.Pandoc.XML. --- src/Text/Pandoc/CharacterReferences.hs | 72 ---------------------------------- src/Text/Pandoc/Parsing.hs | 12 +++++- src/Text/Pandoc/Readers/Markdown.hs | 6 +-- src/Text/Pandoc/Writers/HTML.hs | 5 +-- 4 files changed, 16 insertions(+), 79 deletions(-) delete mode 100644 src/Text/Pandoc/CharacterReferences.hs (limited to 'src') diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs deleted file mode 100644 index 8157d94d3..000000000 --- a/src/Text/Pandoc/CharacterReferences.hs +++ /dev/null @@ -1,72 +0,0 @@ -{- -Copyright (C) 2006-2010 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.CharacterReferences - Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Functions for parsing character references. --} -module Text.Pandoc.CharacterReferences ( - characterReference, - decodeCharacterReferences, - ) where -import Text.ParserCombinators.Parsec -import Text.HTML.TagSoup.Entity ( lookupNamedEntity, lookupNumericEntity ) -import Data.Maybe ( fromMaybe ) - --- | Parse character entity. -characterReference :: GenParser Char st Char -characterReference = try $ do - char '&' - character <- numRef <|> entity - char ';' - return character - -numRef :: GenParser Char st Char -numRef = do - char '#' - num <- hexNum <|> decNum - return $ fromMaybe '?' $ lookupNumericEntity num - -hexNum :: GenParser Char st [Char] -hexNum = do - x <- oneOf "Xx" - num <- many1 hexDigit - return (x:num) - -decNum :: GenParser Char st [Char] -decNum = many1 digit - -entity :: GenParser Char st Char -entity = do - body <- many1 alphaNum - return $ fromMaybe '?' $ lookupNamedEntity body - --- | Convert entities in a string to characters. -decodeCharacterReferences :: String -> String -decodeCharacterReferences str = - case parse (many (characterReference <|> anyChar)) str str of - Left err -> error $ "\nError: " ++ show err - Right result -> result - diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index bb0ac18cf..08769a4f4 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -51,6 +51,7 @@ module Text.Pandoc.Parsing ( (>>~), failIfStrict, failUnlessLHS, escaped, + characterReference, anyOrderedListMarker, orderedListMarker, charRef, @@ -78,7 +79,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.ParserCombinators.Parsec -import Text.Pandoc.CharacterReferences ( characterReference ) import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) @@ -86,6 +86,7 @@ import Control.Monad ( join, liftM, guard ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) +import Text.HTML.TagSoup.Entity ( lookupEntity ) -- | Like >>, but returns the operation on the left. -- (Suggested by Tillmann Rendel on Haskell-cafe list.) @@ -337,6 +338,15 @@ escaped :: GenParser Char st Char -- ^ Parser for character to escape -> GenParser Char st Char escaped parser = try $ char '\\' >> parser +-- | Parse character entity. +characterReference :: GenParser Char st Char +characterReference = try $ do + char '&' + ent <- manyTill nonspaceChar (char ';') + case lookupEntity ent of + Just c -> return c + Nothing -> return '?' + -- | Parses an uppercase roman numeral and returns (UpperRoman, number). upperRoman :: GenParser Char st (ListNumberStyle, Int) upperRoman = do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 8da0f7c16..607d0971a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -42,7 +42,7 @@ import Text.Pandoc.Parsing import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) +import Text.Pandoc.XML ( fromEntities ) import Text.ParserCombinators.Parsec import Control.Monad (when, liftM, guard, mzero) import Text.HTML.TagSoup @@ -244,7 +244,7 @@ referenceTitle = try $ do <|> do delim <- char '\'' <|> char '"' manyTill litChar (try (char delim >> skipSpaces >> notFollowedBy (noneOf ")\n"))) - return $ decodeCharacterReferences tit + return $ fromEntities tit noteMarker :: GenParser Char ParserState [Char] noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') @@ -1176,7 +1176,7 @@ linkTitle = try $ do skipSpaces delim <- oneOf "'\"" tit <- manyTill litChar (try (char delim >> skipSpaces >> eof)) - return $ decodeCharacterReferences tit + return $ fromEntities tit link :: GenParser Char ParserState Inline link = try $ do diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index f46d08570..f35b29370 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -31,7 +31,6 @@ Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Pandoc.Generic @@ -39,7 +38,7 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Slides import Text.Pandoc.Highlighting ( highlight, styleToCss, formatHtmlInline, formatHtmlBlock ) -import Text.Pandoc.XML (stripTags, escapeStringForXML) +import Text.Pandoc.XML (stripTags, escapeStringForXML, fromEntities) import Network.HTTP ( urlEncode ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -344,7 +343,7 @@ obfuscateChar char = -- | Obfuscate string using entities. obfuscateString :: String -> String -obfuscateString = concatMap obfuscateChar . decodeCharacterReferences +obfuscateString = concatMap obfuscateChar . fromEntities attrsToHtml :: WriterOptions -> Attr -> [Attribute] attrsToHtml opts (id',classes',keyvals) = -- cgit v1.2.3