diff options
author | John MacFarlane <jgm@berkeley.edu> | 2010-12-06 20:36:58 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2010-12-06 20:36:58 -0800 |
commit | 7864f30717767f89ee33532b59819b51ef2e14d4 (patch) | |
tree | c1a19f3b6c416cde72d929f76c453a43a7fe5c2c | |
parent | 5a4609584c84114e8d148f558bed86353c7f0146 (diff) | |
download | pandoc-7864f30717767f89ee33532b59819b51ef2e14d4.tar.gz |
Markdown reader: handle curly quotes better.
Previously, curly quotes were just parsed literally, leading
to problems in some output formats. Now they are parsed as
Quoted inlines, if --smart is specified.
Resolves Issue #270.
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 29 | ||||
-rw-r--r-- | tests/RunTests.hs | 2 | ||||
-rw-r--r-- | tests/markdown-reader-more.native | 5 | ||||
-rw-r--r-- | tests/markdown-reader-more.txt | 6 |
4 files changed, 25 insertions, 17 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index feee31ec5..90e8e19bb 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -71,7 +71,7 @@ setextHChars = "=-" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\[]*_~`<>$!^-.&@'\";" +specialChars = "\\[]*_~`<>$!^-.&@'\";\8216\8217\8220\8221" -- -- auxiliary functions @@ -1076,10 +1076,11 @@ singleQuoted = try $ do return . Quoted SingleQuote . normalizeSpaces doubleQuoted :: GenParser Char ParserState Inline -doubleQuoted = try $ do +doubleQuoted = try $ do doubleQuoteStart - withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= - return . Quoted DoubleQuote . normalizeSpaces + withQuoteContext InDoubleQuote $ do + contents <- manyTill inline doubleQuoteEnd + return . Quoted DoubleQuote . normalizeSpaces $ contents failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () failIfInQuoteContext context = do @@ -1088,31 +1089,29 @@ failIfInQuoteContext context = do then fail "already inside quotes" else return () -singleQuoteStart :: GenParser Char ParserState Char +singleQuoteStart :: GenParser Char ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote - try $ do char '\'' + try $ do oneOf "'\8216" notFollowedBy (oneOf ")!],.;:-? \t\n") notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> satisfy (not . isAlphaNum))) -- possess/contraction - return '\'' + return () -singleQuoteEnd :: GenParser Char st Char +singleQuoteEnd :: GenParser Char st () singleQuoteEnd = try $ do - char '\'' + oneOf "'\8217" notFollowedBy alphaNum - return '\'' -doubleQuoteStart :: GenParser Char ParserState Char +doubleQuoteStart :: GenParser Char ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote - try $ do char '"' + try $ do oneOf "\"\8220" notFollowedBy (oneOf " \t\n") - return '"' -doubleQuoteEnd :: GenParser Char st Char -doubleQuoteEnd = char '"' +doubleQuoteEnd :: GenParser Char st () +doubleQuoteEnd = oneOf "\"\8221" >> return () ellipses :: GenParser Char st Inline ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses diff --git a/tests/RunTests.hs b/tests/RunTests.hs index 94b56d04d..0d8a1a6a1 100644 --- a/tests/RunTests.hs +++ b/tests/RunTests.hs @@ -96,7 +96,7 @@ main = do "testsuite.txt" "testsuite.native" r7 <- runTest "markdown reader (tables)" ["-r", "markdown", "-w", "native"] "tables.txt" "tables.native" - r7a <- runTest "markdown reader (more)" ["-r", "markdown", "-w", "native"] + r7a <- runTest "markdown reader (more)" ["-r", "markdown", "-w", "native", "-S"] "markdown-reader-more.txt" "markdown-reader-more.native" r8 <- runTest "rst reader" ["-r", "rst", "-w", "native", "-s", "-S"] "rst-reader.rst" "rst-reader.native" diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 55968af32..784b14ccc 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -45,4 +45,7 @@ Pandoc (Meta {docTitle = [Str "Title",Space,Str "spanning",Space,Str "multiple", , Header 2 [Str "Case",Str "-",Str "insensitive",Space,Str "references"] , Para [Link [Str "Fum"] ("/fum","")] , Para [Link [Str "FUM"] ("/fum","")] -, Para [Link [Str "bat"] ("/bat","")] ] +, Para [Link [Str "bat"] ("/bat","")] +, Header 2 [Str "Curly",Space,Str "smart",Space,Str "quotes"] +, Para [Quoted DoubleQuote [Str "Hi"]] +, Para [Quoted SingleQuote [Str "Hi"]] ] diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index dd43a5df3..f4540d84d 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -116,3 +116,9 @@ $\tuple{x,y}$ [fum]: /fum [BAT]: /bat + +## Curly smart quotes + +“Hi” + +‘Hi’ |