aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Entities.hs18
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs4
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs8
-rw-r--r--src/Text/Pandoc/Shared.hs6
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs14
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs18
6 files changed, 25 insertions, 43 deletions
diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/Entities.hs
index 696f943a6..e91cf3864 100644
--- a/src/Text/Pandoc/Entities.hs
+++ b/src/Text/Pandoc/Entities.hs
@@ -34,7 +34,6 @@ module Text.Pandoc.Entities (
encodeEntities,
decodeEntities,
escapeSGMLChar,
- stringToSGML,
characterEntity
) where
import Data.Char ( chr, ord )
@@ -115,23 +114,6 @@ decodeEntities str =
Left err -> error $ "\nError: " ++ show err
Right result -> result
--- | Escape string for SGML, preserving entity references.
-stringToSGML :: String -> String
-stringToSGML str =
- let regular = do
- str <- many1 (satisfy (not . needsEscaping))
- return str
- special = do
- notFollowedBy characterEntity
- c <- anyChar
- return $ escapeSGMLChar c
- entity = do
- ent <- manyTill anyChar (char ';')
- return (ent ++ ";") in
- case parse (many (regular <|> special <|> entity)) str str of
- Left err -> error $ "\nError: " ++ show err
- Right result -> concat result
-
entityTable :: [(String, Char)]
entityTable = [
("&quot;", chr 34),
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fc06b657e..3fcb33698 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -445,7 +445,9 @@ extractAttribute name [] = Nothing
extractAttribute name ((attrName, contents):rest) =
let name' = map toLower name
attrName' = map toLower attrName in
- if (attrName' == name') then Just contents else extractAttribute name rest
+ if (attrName' == name')
+ then Just (decodeEntities contents)
+ else extractAttribute name rest
link = try (do
(tag, attributes) <- htmlTag "a"
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 0f1ef348d..a7456426f 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -42,7 +42,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
anyHtmlTag, anyHtmlEndTag,
htmlEndTag, extractTagType,
htmlBlockElement )
-import Text.Pandoc.Entities ( characterEntity )
+import Text.Pandoc.Entities ( characterEntity, decodeEntities )
import Text.ParserCombinators.Parsec
-- | Read markdown from an input string and return a Pandoc document.
@@ -144,14 +144,14 @@ authorsLine = try (do
skipSpaces
authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
newline
- return (map removeLeadingTrailingSpace authors))
+ return (map (decodeEntities . removeLeadingTrailingSpace) authors))
dateLine = try (do
char '%'
skipSpaces
date <- many (noneOf "\n")
newline
- return (removeTrailingSpace date))
+ return (decodeEntities $ removeTrailingSpace date))
titleBlock = try (do
failIfStrict
@@ -894,7 +894,7 @@ titleWith startChar endChar = try (do
char endChar
skipSpaces
notFollowedBy (noneOf ")\n")))
- return tit)
+ return $ decodeEntities tit)
title = choice [ titleWith '(' ')',
titleWith '"' '"',
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 02f8782b2..83b64b4fb 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -72,7 +72,7 @@ module Text.Pandoc.Shared (
) where
import Text.Pandoc.Definition
import Text.ParserCombinators.Parsec as Parsec
-import Text.Pandoc.Entities ( decodeEntities, encodeEntities, stringToSGML )
+import Text.Pandoc.Entities ( decodeEntities, encodeEntities )
import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>),
($$), nest, Doc, isEmpty )
import Data.Char ( toLower, ord )
@@ -524,8 +524,8 @@ replaceRefLinksInline keytable other = other
-- | Return a text object with a string of formatted SGML attributes.
attributeList :: [(String, String)] -> Doc
attributeList = text . concatMap
- (\(a, b) -> " " ++ stringToSGML a ++ "=\"" ++
- stringToSGML b ++ "\"")
+ (\(a, b) -> " " ++ encodeEntities a ++ "=\"" ++
+ encodeEntities b ++ "\"")
-- | Put the supplied contents between start and end tags of tagType,
-- with specified attributes and (if specified) indentation.
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 7e50f8ede..405b2978a 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -32,7 +32,7 @@ module Text.Pandoc.Writers.Docbook (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
+import Text.Pandoc.Entities ( encodeEntities )
import Data.Char ( toLower, ord )
import Data.List ( isPrefixOf, partition, drop )
import Text.PrettyPrint.HughesPJ hiding ( Str )
@@ -64,8 +64,8 @@ authorToDocbook name = inTagsIndented "author" $
then -- last name first
let (lastname, rest) = break (==',') name
firstname = removeLeadingSpace rest in
- inTagsSimple "firstname" (text $ stringToSGML firstname) <>
- inTagsSimple "surname" (text $ stringToSGML lastname)
+ inTagsSimple "firstname" (text $ encodeEntities firstname) <>
+ inTagsSimple "surname" (text $ encodeEntities lastname)
else -- last name last
let namewords = words name
lengthname = length namewords
@@ -73,8 +73,8 @@ authorToDocbook name = inTagsIndented "author" $
0 -> ("","")
1 -> ("", name)
n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
- inTagsSimple "firstname" (text $ stringToSGML firstname) $$
- inTagsSimple "surname" (text $ stringToSGML lastname)
+ inTagsSimple "firstname" (text $ encodeEntities firstname) $$
+ inTagsSimple "surname" (text $ encodeEntities lastname)
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
@@ -86,7 +86,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) =
then inTagsIndented "articleinfo" $
(inTagsSimple "title" (wrap opts title)) $$
(vcat (map authorToDocbook authors)) $$
- (inTagsSimple "date" (text $ stringToSGML date))
+ (inTagsSimple "date" (text $ encodeEntities date))
else empty
blocks' = replaceReferenceLinks blocks
(noteBlocks, blocks'') = partition isNoteBlock blocks'
@@ -227,7 +227,7 @@ inlineToDocbook opts (Image alt (Src src tit)) =
then empty
else inTagsIndented "objectinfo" $
inTagsIndented "title"
- (text $ stringToSGML tit) in
+ (text $ encodeEntities tit) in
inTagsIndented "inlinemediaobject" $
inTagsIndented "imageobject" $
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 0f2a2b5dc..8a654e3c9 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -32,7 +32,7 @@ module Text.Pandoc.Writers.HTML (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
+import Text.Pandoc.Entities ( encodeEntities )
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
@@ -127,11 +127,11 @@ htmlHeader opts (Meta title authors date) =
then empty
else selfClosingTag "meta" [("name", "author"),
("content",
- joinWithSep ", " (map stringToSGML authors))]
+ joinWithSep ", " (map encodeEntities authors))]
datetext = if (date == "")
then empty
else selfClosingTag "meta" [("name", "date"),
- ("content", stringToSGML date)] in
+ ("content", encodeEntities date)] in
text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$
text "</head>\n<body>"
@@ -248,20 +248,18 @@ inlineToHtml opts (TeX str) = text $ encodeEntities str
inlineToHtml opts (HtmlInline str) = text str
inlineToHtml opts (LineBreak) = selfClosingTag "br" []
inlineToHtml opts Space = space
-inlineToHtml opts (Link txt (Src src tit)) =
- let title = stringToSGML tit in
+inlineToHtml opts (Link txt (Src src title)) =
if (isPrefixOf "mailto:" src)
then obfuscateLink opts txt src
- else inTags False "a" ([("href", encodeEntities src)] ++
- if null tit then [] else [("title", title)])
+ else inTags False "a" ([("href", src)] ++
+ if null title then [] else [("title", title)])
(inlineListToHtml opts txt)
inlineToHtml opts (Link txt (Ref ref)) =
char '[' <> (inlineListToHtml opts txt) <> text "][" <>
(inlineListToHtml opts ref) <> char ']'
-- this is what markdown does, for better or worse
-inlineToHtml opts (Image alt (Src source tit)) =
- let title = stringToSGML tit
- alternate = render $ inlineListToHtml opts alt in
+inlineToHtml opts (Image alt (Src source title)) =
+ let alternate = render $ inlineListToHtml opts alt in
selfClosingTag "img" $ [("src", source)] ++
(if null alternate then [] else [("alt", alternate)]) ++
[("title", title)] -- note: null title is included, as in Markdown.pl