aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs15
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs13
2 files changed, 14 insertions, 14 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 135a90ea8..fc06b657e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -44,7 +44,7 @@ import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( decodeEntities, entityToChar )
+import Text.Pandoc.Entities ( characterEntity, decodeEntities )
import Maybe ( fromMaybe )
import Data.List ( intersect, takeWhile, dropWhile )
import Data.Char ( toUpper, toLower, isAlphaNum )
@@ -391,14 +391,9 @@ text = choice [ entity, strong, emph, code, str, linebreak, whitespace ] <?> "t
special = choice [ link, image, rawHtmlInline ] <?>
"link, inline html, or image"
-entity = try (do
- char '&'
- body <- choice [(many1 letter), (try (do
- char '#'
- num <- many1 digit
- return ("#" ++ num)))]
- char ';'
- return (Str [fromMaybe '?' (entityToChar ("&" ++ body ++ ";"))]))
+entity = do
+ ent <- characterEntity
+ return $ Str [ent]
code = try (do
htmlTag "code"
@@ -439,7 +434,7 @@ linebreak = do
str = do
result <- many1 (noneOf "<& \t\n")
- return (Str (decodeEntities result))
+ return (Str result)
--
-- links and images
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 3aa0a6f12..9b3f047e9 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -42,7 +42,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
anyHtmlTag, anyHtmlEndTag,
htmlEndTag, extractTagType,
htmlBlockElement )
-import Text.Pandoc.Entities ( decodeEntities )
+import Text.Pandoc.Entities ( characterEntity )
import Text.ParserCombinators.Parsec
-- | Read markdown from an input string and return a Pandoc document.
@@ -88,12 +88,13 @@ blockQuoteChar = '>'
hyphenChar = '-'
ellipsesChar = '.'
listColSepChar = '|'
+entityStart = '&'
-- treat these as potentially non-text when parsing inline:
specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd,
autoLinkStart, mathStart, mathEnd, imageStart, noteStart,
- hyphenChar, ellipsesChar] ++ quoteChars
+ hyphenChar, ellipsesChar, entityStart] ++ quoteChars
--
-- auxiliary functions
@@ -674,7 +675,7 @@ text = choice [ escapedChar, math, strong, emph, smartPunctuation,
code, ltSign, symbol,
str, linebreak, tabchar, whitespace, endline ] <?> "text"
-inline = choice [ rawLaTeXInline', escapedChar, special, text ] <?> "inline"
+inline = choice [ rawLaTeXInline', escapedChar, entity, special, text ] <?> "inline"
special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline',
autoLink, image ] <?> "link, inline html, note, or image"
@@ -827,9 +828,13 @@ linebreak = try (do
nonEndline = noneOf endLineChars
+entity = do
+ ent <- characterEntity
+ return $ Str [ent]
+
str = do
result <- many1 ((noneOf (specialChars ++ spaceChars ++ endLineChars)))
- return (Str (decodeEntities result))
+ return (Str result)
-- an endline character that can be treated as a space, not a structural break
endline = try (do