aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2010-12-07 20:44:43 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2010-12-07 20:44:43 -0800
commit33ba35da9fd584a17b6b1011daa76a8232799c18 (patch)
tree8516d3b07845d0ff3b948d32786eabd80c9fd0c8
parent3a5fceeef9c5f55b1769fbe95bf420ca3a20ea35 (diff)
downloadpandoc-33ba35da9fd584a17b6b1011daa76a8232799c18.tar.gz
Smart punctuation: recognize entities.
Now &ldquo;Hi&rdquo; gets parsed as a Quoted DoubleQuote inline.
-rw-r--r--src/Text/Pandoc/Parsing.hs30
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs2
2 files changed, 23 insertions, 9 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 104b75343..1b206e4c7 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -730,10 +730,16 @@ failIfInQuoteContext context = do
then fail "already inside quotes"
else return ()
+charOrRef :: [Char] -> GenParser Char st Char
+charOrRef cs =
+ oneOf cs <|> try (do c <- characterReference
+ guard (c `elem` cs)
+ return c)
+
singleQuoteStart :: GenParser Char ParserState ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- try $ do oneOf "'\8216"
+ try $ do charOrRef "'\8216"
notFollowedBy (oneOf ")!],.;:-? \t\n")
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
satisfy (not . isAlphaNum)))
@@ -742,28 +748,36 @@ singleQuoteStart = do
singleQuoteEnd :: GenParser Char st ()
singleQuoteEnd = try $ do
- oneOf "'\8217"
+ charOrRef "'\8217"
notFollowedBy alphaNum
doubleQuoteStart :: GenParser Char ParserState ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
- try $ do oneOf "\"\8220"
+ try $ do charOrRef "\"\8220"
notFollowedBy (oneOf " \t\n")
doubleQuoteEnd :: GenParser Char st ()
-doubleQuoteEnd = oneOf "\"\8221" >> return ()
+doubleQuoteEnd = do
+ charOrRef "\"\8221"
+ return ()
ellipses :: GenParser Char st Inline
-ellipses = try $ string "..." >> return Ellipses
+ellipses = do
+ try (charOrRef "…") <|> try (string "..." >> return '…')
+ return Ellipses
dash :: GenParser Char st Inline
dash = enDash <|> emDash
enDash :: GenParser Char st Inline
-enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
+enDash = do
+ try (charOrRef "–") <|>
+ try (char '-' >> notFollowedBy (noneOf "0123456789") >> return '–')
+ return EnDash
emDash :: GenParser Char st Inline
-emDash = oneOfStrings ["---", "--"] >> return EmDash
-
+emDash = do
+ try (charOrRef "—") <|> (oneOfStrings ["---", "--"] >> return '—')
+ return EmDash
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c9f6adcb3..41eea16c5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -900,7 +900,6 @@ inlineParsers = [ str
, whitespace
, endline
, code
- , charRef
, (fourOrMore '*' <|> fourOrMore '_')
, strong
, emph
@@ -919,6 +918,7 @@ inlineParsers = [ str
, escapedChar
, exampleRef
, smartPunctuation inline
+ , charRef
, symbol
, ltSign ]