aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Markdown.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs74
1 files changed, 62 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 30d6a11df..7fab2ad01 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -86,12 +86,13 @@ titleOpeners = "\"'("
setextHChars = ['=','-']
blockQuoteChar = '>'
hyphenChar = '-'
+ellipsesChar = '.'
-- treat these as potentially non-text when parsing inline:
specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd,
autoLinkStart, mathStart, mathEnd, imageStart, noteStart,
- hyphenChar]
+ hyphenChar, ellipsesChar] ++ quoteChars
--
-- auxiliary functions
@@ -120,6 +121,11 @@ failUnlessBeginningOfLine = do
pos <- getPosition
if sourceColumn pos == 1 then return () else fail "not beginning of line"
+-- | Fail unless we're in "smart typography" mode.
+failUnlessSmart = do
+ state <- getState
+ if stateSmart state then return () else fail "Smart typography feature"
+
--
-- document structure
--
@@ -519,11 +525,11 @@ rawLaTeXEnvironment' = do
-- inline
--
-text = choice [ math, strong, emph, code, str, linebreak, tabchar,
- whitespace, endline ] <?> "text"
+text = choice [ escapedChar, math, strong, emph, smartPunctuation,
+ code, ltSign, symbol,
+ str, linebreak, tabchar, whitespace, endline ] <?> "text"
-inline = choice [ rawLaTeXInline', escapedChar, special, hyphens, text,
- ltSign, symbol ] <?> "inline"
+inline = choice [ rawLaTeXInline', escapedChar, special, text ] <?> "inline"
special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline',
autoLink, image ] <?> "link, inline html, note, or image"
@@ -531,6 +537,7 @@ special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline',
escapedChar = escaped anyChar
ltSign = try (do
+ notFollowedBy (noneOf "<") -- continue only if it's a <
notFollowedBy' rawHtmlBlocks -- don't return < if it starts html
char '<'
return (Str ['<']))
@@ -541,13 +548,6 @@ symbol = do
result <- oneOf specialCharsMinusLt
return (Str [result])
-hyphens = try (do
- result <- many1 (char '-')
- if (length result) == 1
- then skipEndline -- don't want to treat endline after hyphen as a space
- else do{ string ""; return Space }
- return (Str result))
-
-- parses inline code, between n codeStarts and n codeEnds
code = try (do
starts <- many1 (char codeStart)
@@ -583,6 +583,56 @@ strong = do
(count 2 (char emphEndAlt)) inline) ]
return (Strong (normalizeSpaces result))
+smartPunctuation = do
+ failUnlessSmart
+ choice [ quoted, apostrophe, dash, ellipses ]
+
+apostrophe = do
+ char '\'' <|> char '\8217'
+ return Apostrophe
+
+quoted = do
+ doubleQuoted <|> singleQuoted
+
+singleQuoted = try (do
+ result <- enclosed singleQuoteStart singleQuoteEnd
+ (do{notFollowedBy' singleQuoted; inline} <|> apostrophe)
+ return $ Quoted SingleQuote $ normalizeSpaces result)
+
+doubleQuoted = try (do
+ result <- enclosed doubleQuoteStart doubleQuoteEnd inline
+ return $ Quoted DoubleQuote $ normalizeSpaces result)
+
+singleQuoteStart = try (do
+ char '\'' <|> char '\8216'
+ notFollowedBy' whitespace)
+
+singleQuoteEnd = try (do
+ oneOfStrings ["'", "\8217"]
+ notFollowedBy alphaNum)
+
+doubleQuoteStart = char '"' <|> char '\8220'
+
+doubleQuoteEnd = char '"' <|> char '\8221'
+
+ellipses = try (do
+ oneOfStrings ["...", " . . . ", ". . .", " . . ."]
+ return Ellipses)
+
+dash = enDash <|> emDash
+
+enDash = try (do
+ char '-'
+ followedBy' (many1 digit)
+ return EnDash)
+
+emDash = try (do
+ skipSpaces
+ oneOfStrings ["---", "--"]
+ skipSpaces
+ option ' ' newline
+ return EmDash)
+
whitespace = do
many1 (oneOf spaceChars) <?> "whitespace"
return Space