aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs74
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs74
2 files changed, 114 insertions, 34 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f82705bb2..9e966cc04 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -50,23 +50,12 @@ readLaTeX = readWith parseLaTeX
testString = testStringWith parseLaTeX
-- characters with special meaning
-specialChars = "\\$%&^&_~#{}\n \t|<>"
+specialChars = "\\$%&^&_~#{}\n \t|<>'\"-"
--
-- utility functions
--
--- | Change quotation marks in a string back to "basic" quotes.
-normalizeQuotes :: String -> String
-normalizeQuotes = gsub "''" "\"" . gsub "`" "'"
-
--- | Change LaTeX En dashes between digits to hyphens.
-normalizeDashes :: String -> String
-normalizeDashes = gsub "([0-9])--([0-9])" "\\1-\\2"
-
-normalizePunctuation :: String -> String
-normalizePunctuation = normalizeDashes . normalizeQuotes
-
-- | Returns text between brackets and its matching pair.
bracketedText openB closeB = try (do
char openB
@@ -132,10 +121,10 @@ anyEnvironment = try (do
--
-- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble = do
+processLaTeXPreamble = try (do
manyTill (choice [bibliographic, comment, unknownCommand, nullBlock])
(try (string "\\begin{document}"))
- spaces
+ spaces)
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX = do
@@ -392,16 +381,13 @@ comment = try (do
-- inline
--
-inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots,
+inline = choice [ strong, emph, ref, lab, code, linebreak, math, ellipses,
+ emDash, enDash, hyphen, quoted, apostrophe,
accentedChar, specialChar, specialInline, escapedChar,
unescapedChar, str, endline, whitespace ] <?> "inline"
-specialInline = choice [ link, image, footnote, rawLaTeXInline ] <?>
- "link, raw TeX, note, or image"
-
-ldots = try (do
- string "\\ldots"
- return (Str "..."))
+specialInline = choice [ link, image, footnote, rawLaTeXInline ]
+ <?> "link, raw TeX, note, or image"
accentedChar = normalAccentedChar <|> specialAccentedChar
@@ -526,6 +512,49 @@ emph = try (do
result <- manyTill inline (char '}')
return (Emph result))
+apostrophe = do
+ char '\''
+ return Apostrophe
+
+quoted = do
+ doubleQuoted <|> singleQuoted
+
+singleQuoted = try (do
+ result <- enclosed singleQuoteStart singleQuoteEnd inline
+ return $ Quoted SingleQuote $ normalizeSpaces result)
+
+doubleQuoted = try (do
+ result <- enclosed doubleQuoteStart doubleQuoteEnd inline
+ return $ Quoted DoubleQuote $ normalizeSpaces result)
+
+singleQuoteStart = char '`'
+
+singleQuoteEnd = try (do
+ char '\''
+ notFollowedBy alphaNum)
+
+doubleQuoteStart = try (string "``")
+
+doubleQuoteEnd = try (string "''")
+
+ellipses = try (do
+ string "\\ldots"
+ option "" (string "{}")
+ return Ellipses)
+
+enDash = try (do
+ string "--"
+ notFollowedBy (char '-')
+ return EnDash)
+
+emDash = try (do
+ string "---"
+ return EmDash)
+
+hyphen = do
+ char '-'
+ return (Str "-")
+
lab = try (do
string "\\label{"
result <- manyTill anyChar (char '}')
@@ -552,7 +581,7 @@ linebreak = try (do
str = do
result <- many1 (noneOf specialChars)
- return (Str (normalizePunctuation result))
+ return (Str result)
-- endline internal to paragraph
endline = try (do
@@ -624,3 +653,4 @@ rawLaTeXInline = try (do
then fail "not an inline command"
else string ""
return (TeX ("\\" ++ name ++ star ++ argStr)))
+
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 30d6a11df..7fab2ad01 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -86,12 +86,13 @@ titleOpeners = "\"'("
setextHChars = ['=','-']
blockQuoteChar = '>'
hyphenChar = '-'
+ellipsesChar = '.'
-- treat these as potentially non-text when parsing inline:
specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd,
autoLinkStart, mathStart, mathEnd, imageStart, noteStart,
- hyphenChar]
+ hyphenChar, ellipsesChar] ++ quoteChars
--
-- auxiliary functions
@@ -120,6 +121,11 @@ failUnlessBeginningOfLine = do
pos <- getPosition
if sourceColumn pos == 1 then return () else fail "not beginning of line"
+-- | Fail unless we're in "smart typography" mode.
+failUnlessSmart = do
+ state <- getState
+ if stateSmart state then return () else fail "Smart typography feature"
+
--
-- document structure
--
@@ -519,11 +525,11 @@ rawLaTeXEnvironment' = do
-- inline
--
-text = choice [ math, strong, emph, code, str, linebreak, tabchar,
- whitespace, endline ] <?> "text"
+text = choice [ escapedChar, math, strong, emph, smartPunctuation,
+ code, ltSign, symbol,
+ str, linebreak, tabchar, whitespace, endline ] <?> "text"
-inline = choice [ rawLaTeXInline', escapedChar, special, hyphens, text,
- ltSign, symbol ] <?> "inline"
+inline = choice [ rawLaTeXInline', escapedChar, special, text ] <?> "inline"
special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline',
autoLink, image ] <?> "link, inline html, note, or image"
@@ -531,6 +537,7 @@ special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline',
escapedChar = escaped anyChar
ltSign = try (do
+ notFollowedBy (noneOf "<") -- continue only if it's a <
notFollowedBy' rawHtmlBlocks -- don't return < if it starts html
char '<'
return (Str ['<']))
@@ -541,13 +548,6 @@ symbol = do
result <- oneOf specialCharsMinusLt
return (Str [result])
-hyphens = try (do
- result <- many1 (char '-')
- if (length result) == 1
- then skipEndline -- don't want to treat endline after hyphen as a space
- else do{ string ""; return Space }
- return (Str result))
-
-- parses inline code, between n codeStarts and n codeEnds
code = try (do
starts <- many1 (char codeStart)
@@ -583,6 +583,56 @@ strong = do
(count 2 (char emphEndAlt)) inline) ]
return (Strong (normalizeSpaces result))
+smartPunctuation = do
+ failUnlessSmart
+ choice [ quoted, apostrophe, dash, ellipses ]
+
+apostrophe = do
+ char '\'' <|> char '\8217'
+ return Apostrophe
+
+quoted = do
+ doubleQuoted <|> singleQuoted
+
+singleQuoted = try (do
+ result <- enclosed singleQuoteStart singleQuoteEnd
+ (do{notFollowedBy' singleQuoted; inline} <|> apostrophe)
+ return $ Quoted SingleQuote $ normalizeSpaces result)
+
+doubleQuoted = try (do
+ result <- enclosed doubleQuoteStart doubleQuoteEnd inline
+ return $ Quoted DoubleQuote $ normalizeSpaces result)
+
+singleQuoteStart = try (do
+ char '\'' <|> char '\8216'
+ notFollowedBy' whitespace)
+
+singleQuoteEnd = try (do
+ oneOfStrings ["'", "\8217"]
+ notFollowedBy alphaNum)
+
+doubleQuoteStart = char '"' <|> char '\8220'
+
+doubleQuoteEnd = char '"' <|> char '\8221'
+
+ellipses = try (do
+ oneOfStrings ["...", " . . . ", ". . .", " . . ."]
+ return Ellipses)
+
+dash = enDash <|> emDash
+
+enDash = try (do
+ char '-'
+ followedBy' (many1 digit)
+ return EnDash)
+
+emDash = try (do
+ skipSpaces
+ oneOfStrings ["---", "--"]
+ skipSpaces
+ option ' ' newline
+ return EmDash)
+
whitespace = do
many1 (oneOf spaceChars) <?> "whitespace"
return Space