diff options
-rw-r--r-- | src/Text/Pandoc/Entities.hs | 18 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 18 |
6 files changed, 25 insertions, 43 deletions
diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/Entities.hs index 696f943a6..e91cf3864 100644 --- a/src/Text/Pandoc/Entities.hs +++ b/src/Text/Pandoc/Entities.hs @@ -34,7 +34,6 @@ module Text.Pandoc.Entities ( encodeEntities, decodeEntities, escapeSGMLChar, - stringToSGML, characterEntity ) where import Data.Char ( chr, ord ) @@ -115,23 +114,6 @@ decodeEntities str = Left err -> error $ "\nError: " ++ show err Right result -> result --- | Escape string for SGML, preserving entity references. -stringToSGML :: String -> String -stringToSGML str = - let regular = do - str <- many1 (satisfy (not . needsEscaping)) - return str - special = do - notFollowedBy characterEntity - c <- anyChar - return $ escapeSGMLChar c - entity = do - ent <- manyTill anyChar (char ';') - return (ent ++ ";") in - case parse (many (regular <|> special <|> entity)) str str of - Left err -> error $ "\nError: " ++ show err - Right result -> concat result - entityTable :: [(String, Char)] entityTable = [ (""", chr 34), diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index fc06b657e..3fcb33698 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -445,7 +445,9 @@ extractAttribute name [] = Nothing extractAttribute name ((attrName, contents):rest) = let name' = map toLower name attrName' = map toLower attrName in - if (attrName' == name') then Just contents else extractAttribute name rest + if (attrName' == name') + then Just (decodeEntities contents) + else extractAttribute name rest link = try (do (tag, attributes) <- htmlTag "a" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0f1ef348d..a7456426f 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -42,7 +42,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType, htmlBlockElement ) -import Text.Pandoc.Entities ( characterEntity ) +import Text.Pandoc.Entities ( characterEntity, decodeEntities ) import Text.ParserCombinators.Parsec -- | Read markdown from an input string and return a Pandoc document. @@ -144,14 +144,14 @@ authorsLine = try (do skipSpaces authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") newline - return (map removeLeadingTrailingSpace authors)) + return (map (decodeEntities . removeLeadingTrailingSpace) authors)) dateLine = try (do char '%' skipSpaces date <- many (noneOf "\n") newline - return (removeTrailingSpace date)) + return (decodeEntities $ removeTrailingSpace date)) titleBlock = try (do failIfStrict @@ -894,7 +894,7 @@ titleWith startChar endChar = try (do char endChar skipSpaces notFollowedBy (noneOf ")\n"))) - return tit) + return $ decodeEntities tit) title = choice [ titleWith '(' ')', titleWith '"' '"', diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 02f8782b2..83b64b4fb 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -72,7 +72,7 @@ module Text.Pandoc.Shared ( ) where import Text.Pandoc.Definition import Text.ParserCombinators.Parsec as Parsec -import Text.Pandoc.Entities ( decodeEntities, encodeEntities, stringToSGML ) +import Text.Pandoc.Entities ( decodeEntities, encodeEntities ) import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>), ($$), nest, Doc, isEmpty ) import Data.Char ( toLower, ord ) @@ -524,8 +524,8 @@ replaceRefLinksInline keytable other = other -- | Return a text object with a string of formatted SGML attributes. attributeList :: [(String, String)] -> Doc attributeList = text . concatMap - (\(a, b) -> " " ++ stringToSGML a ++ "=\"" ++ - stringToSGML b ++ "\"") + (\(a, b) -> " " ++ encodeEntities a ++ "=\"" ++ + encodeEntities b ++ "\"") -- | Put the supplied contents between start and end tags of tagType, -- with specified attributes and (if specified) indentation. diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 7e50f8ede..405b2978a 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -32,7 +32,7 @@ module Text.Pandoc.Writers.Docbook ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( encodeEntities, stringToSGML ) +import Text.Pandoc.Entities ( encodeEntities ) import Data.Char ( toLower, ord ) import Data.List ( isPrefixOf, partition, drop ) import Text.PrettyPrint.HughesPJ hiding ( Str ) @@ -64,8 +64,8 @@ authorToDocbook name = inTagsIndented "author" $ then -- last name first let (lastname, rest) = break (==',') name firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ stringToSGML firstname) <> - inTagsSimple "surname" (text $ stringToSGML lastname) + inTagsSimple "firstname" (text $ encodeEntities firstname) <> + inTagsSimple "surname" (text $ encodeEntities lastname) else -- last name last let namewords = words name lengthname = length namewords @@ -73,8 +73,8 @@ authorToDocbook name = inTagsIndented "author" $ 0 -> ("","") 1 -> ("", name) n -> (joinWithSep " " (take (n-1) namewords), last namewords) in - inTagsSimple "firstname" (text $ stringToSGML firstname) $$ - inTagsSimple "surname" (text $ stringToSGML lastname) + inTagsSimple "firstname" (text $ encodeEntities firstname) $$ + inTagsSimple "surname" (text $ encodeEntities lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String @@ -86,7 +86,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) = then inTagsIndented "articleinfo" $ (inTagsSimple "title" (wrap opts title)) $$ (vcat (map authorToDocbook authors)) $$ - (inTagsSimple "date" (text $ stringToSGML date)) + (inTagsSimple "date" (text $ encodeEntities date)) else empty blocks' = replaceReferenceLinks blocks (noteBlocks, blocks'') = partition isNoteBlock blocks' @@ -227,7 +227,7 @@ inlineToDocbook opts (Image alt (Src src tit)) = then empty else inTagsIndented "objectinfo" $ inTagsIndented "title" - (text $ stringToSGML tit) in + (text $ encodeEntities tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 0f2a2b5dc..8a654e3c9 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -32,7 +32,7 @@ module Text.Pandoc.Writers.HTML ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( encodeEntities, stringToSGML ) +import Text.Pandoc.Entities ( encodeEntities ) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -127,11 +127,11 @@ htmlHeader opts (Meta title authors date) = then empty else selfClosingTag "meta" [("name", "author"), ("content", - joinWithSep ", " (map stringToSGML authors))] + joinWithSep ", " (map encodeEntities authors))] datetext = if (date == "") then empty else selfClosingTag "meta" [("name", "date"), - ("content", stringToSGML date)] in + ("content", encodeEntities date)] in text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$ text "</head>\n<body>" @@ -248,20 +248,18 @@ inlineToHtml opts (TeX str) = text $ encodeEntities str inlineToHtml opts (HtmlInline str) = text str inlineToHtml opts (LineBreak) = selfClosingTag "br" [] inlineToHtml opts Space = space -inlineToHtml opts (Link txt (Src src tit)) = - let title = stringToSGML tit in +inlineToHtml opts (Link txt (Src src title)) = if (isPrefixOf "mailto:" src) then obfuscateLink opts txt src - else inTags False "a" ([("href", encodeEntities src)] ++ - if null tit then [] else [("title", title)]) + else inTags False "a" ([("href", src)] ++ + if null title then [] else [("title", title)]) (inlineListToHtml opts txt) inlineToHtml opts (Link txt (Ref ref)) = char '[' <> (inlineListToHtml opts txt) <> text "][" <> (inlineListToHtml opts ref) <> char ']' -- this is what markdown does, for better or worse -inlineToHtml opts (Image alt (Src source tit)) = - let title = stringToSGML tit - alternate = render $ inlineListToHtml opts alt in +inlineToHtml opts (Image alt (Src source title)) = + let alternate = render $ inlineListToHtml opts alt in selfClosingTag "img" $ [("src", source)] ++ (if null alternate then [] else [("alt", alternate)]) ++ [("title", title)] -- note: null title is included, as in Markdown.pl |