diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 74 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 74 |
2 files changed, 114 insertions, 34 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f82705bb2..9e966cc04 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -50,23 +50,12 @@ readLaTeX = readWith parseLaTeX testString = testStringWith parseLaTeX -- characters with special meaning -specialChars = "\\$%&^&_~#{}\n \t|<>" +specialChars = "\\$%&^&_~#{}\n \t|<>'\"-" -- -- utility functions -- --- | Change quotation marks in a string back to "basic" quotes. -normalizeQuotes :: String -> String -normalizeQuotes = gsub "''" "\"" . gsub "`" "'" - --- | Change LaTeX En dashes between digits to hyphens. -normalizeDashes :: String -> String -normalizeDashes = gsub "([0-9])--([0-9])" "\\1-\\2" - -normalizePunctuation :: String -> String -normalizePunctuation = normalizeDashes . normalizeQuotes - -- | Returns text between brackets and its matching pair. bracketedText openB closeB = try (do char openB @@ -132,10 +121,10 @@ anyEnvironment = try (do -- -- | Process LaTeX preamble, extracting metadata. -processLaTeXPreamble = do +processLaTeXPreamble = try (do manyTill (choice [bibliographic, comment, unknownCommand, nullBlock]) (try (string "\\begin{document}")) - spaces + spaces) -- | Parse LaTeX and return 'Pandoc'. parseLaTeX = do @@ -392,16 +381,13 @@ comment = try (do -- inline -- -inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, +inline = choice [ strong, emph, ref, lab, code, linebreak, math, ellipses, + emDash, enDash, hyphen, quoted, apostrophe, accentedChar, specialChar, specialInline, escapedChar, unescapedChar, str, endline, whitespace ] <?> "inline" -specialInline = choice [ link, image, footnote, rawLaTeXInline ] <?> - "link, raw TeX, note, or image" - -ldots = try (do - string "\\ldots" - return (Str "...")) +specialInline = choice [ link, image, footnote, rawLaTeXInline ] + <?> "link, raw TeX, note, or image" accentedChar = normalAccentedChar <|> specialAccentedChar @@ -526,6 +512,49 @@ emph = try (do result <- manyTill inline (char '}') return (Emph result)) +apostrophe = do + char '\'' + return Apostrophe + +quoted = do + doubleQuoted <|> singleQuoted + +singleQuoted = try (do + result <- enclosed singleQuoteStart singleQuoteEnd inline + return $ Quoted SingleQuote $ normalizeSpaces result) + +doubleQuoted = try (do + result <- enclosed doubleQuoteStart doubleQuoteEnd inline + return $ Quoted DoubleQuote $ normalizeSpaces result) + +singleQuoteStart = char '`' + +singleQuoteEnd = try (do + char '\'' + notFollowedBy alphaNum) + +doubleQuoteStart = try (string "``") + +doubleQuoteEnd = try (string "''") + +ellipses = try (do + string "\\ldots" + option "" (string "{}") + return Ellipses) + +enDash = try (do + string "--" + notFollowedBy (char '-') + return EnDash) + +emDash = try (do + string "---" + return EmDash) + +hyphen = do + char '-' + return (Str "-") + lab = try (do string "\\label{" result <- manyTill anyChar (char '}') @@ -552,7 +581,7 @@ linebreak = try (do str = do result <- many1 (noneOf specialChars) - return (Str (normalizePunctuation result)) + return (Str result) -- endline internal to paragraph endline = try (do @@ -624,3 +653,4 @@ rawLaTeXInline = try (do then fail "not an inline command" else string "" return (TeX ("\\" ++ name ++ star ++ argStr))) + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 30d6a11df..7fab2ad01 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -86,12 +86,13 @@ titleOpeners = "\"'(" setextHChars = ['=','-'] blockQuoteChar = '>' hyphenChar = '-' +ellipsesChar = '.' -- treat these as potentially non-text when parsing inline: specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd, autoLinkStart, mathStart, mathEnd, imageStart, noteStart, - hyphenChar] + hyphenChar, ellipsesChar] ++ quoteChars -- -- auxiliary functions @@ -120,6 +121,11 @@ failUnlessBeginningOfLine = do pos <- getPosition if sourceColumn pos == 1 then return () else fail "not beginning of line" +-- | Fail unless we're in "smart typography" mode. +failUnlessSmart = do + state <- getState + if stateSmart state then return () else fail "Smart typography feature" + -- -- document structure -- @@ -519,11 +525,11 @@ rawLaTeXEnvironment' = do -- inline -- -text = choice [ math, strong, emph, code, str, linebreak, tabchar, - whitespace, endline ] <?> "text" +text = choice [ escapedChar, math, strong, emph, smartPunctuation, + code, ltSign, symbol, + str, linebreak, tabchar, whitespace, endline ] <?> "text" -inline = choice [ rawLaTeXInline', escapedChar, special, hyphens, text, - ltSign, symbol ] <?> "inline" +inline = choice [ rawLaTeXInline', escapedChar, special, text ] <?> "inline" special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline', autoLink, image ] <?> "link, inline html, note, or image" @@ -531,6 +537,7 @@ special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline', escapedChar = escaped anyChar ltSign = try (do + notFollowedBy (noneOf "<") -- continue only if it's a < notFollowedBy' rawHtmlBlocks -- don't return < if it starts html char '<' return (Str ['<'])) @@ -541,13 +548,6 @@ symbol = do result <- oneOf specialCharsMinusLt return (Str [result]) -hyphens = try (do - result <- many1 (char '-') - if (length result) == 1 - then skipEndline -- don't want to treat endline after hyphen as a space - else do{ string ""; return Space } - return (Str result)) - -- parses inline code, between n codeStarts and n codeEnds code = try (do starts <- many1 (char codeStart) @@ -583,6 +583,56 @@ strong = do (count 2 (char emphEndAlt)) inline) ] return (Strong (normalizeSpaces result)) +smartPunctuation = do + failUnlessSmart + choice [ quoted, apostrophe, dash, ellipses ] + +apostrophe = do + char '\'' <|> char '\8217' + return Apostrophe + +quoted = do + doubleQuoted <|> singleQuoted + +singleQuoted = try (do + result <- enclosed singleQuoteStart singleQuoteEnd + (do{notFollowedBy' singleQuoted; inline} <|> apostrophe) + return $ Quoted SingleQuote $ normalizeSpaces result) + +doubleQuoted = try (do + result <- enclosed doubleQuoteStart doubleQuoteEnd inline + return $ Quoted DoubleQuote $ normalizeSpaces result) + +singleQuoteStart = try (do + char '\'' <|> char '\8216' + notFollowedBy' whitespace) + +singleQuoteEnd = try (do + oneOfStrings ["'", "\8217"] + notFollowedBy alphaNum) + +doubleQuoteStart = char '"' <|> char '\8220' + +doubleQuoteEnd = char '"' <|> char '\8221' + +ellipses = try (do + oneOfStrings ["...", " . . . ", ". . .", " . . ."] + return Ellipses) + +dash = enDash <|> emDash + +enDash = try (do + char '-' + followedBy' (many1 digit) + return EnDash) + +emDash = try (do + skipSpaces + oneOfStrings ["---", "--"] + skipSpaces + option ' ' newline + return EmDash) + whitespace = do many1 (oneOf spaceChars) <?> "whitespace" return Space |