diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-06 09:54:58 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-06 09:54:58 +0000 |
commit | bb8478e4e24b431ca81ee7f669d517eb11a47500 (patch) | |
tree | 3dedfbceaf88404e531b81e37f07b8f026f07ebb /src/Text/Pandoc/Readers | |
parent | 06e6107f535ae921f4b1fec2e7de7dd98b793435 (diff) | |
download | pandoc-bb8478e4e24b431ca81ee7f669d517eb11a47500.tar.gz |
Merged changes from 'quotes' branch since r431. Smart typography
is now handled in the Markdown and LaTeX readers, rather than in
the writers. The HTML writer has been rewritten to use the
prettyprinting library.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@436 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 74 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 74 |
2 files changed, 114 insertions, 34 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f82705bb2..9e966cc04 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -50,23 +50,12 @@ readLaTeX = readWith parseLaTeX testString = testStringWith parseLaTeX -- characters with special meaning -specialChars = "\\$%&^&_~#{}\n \t|<>" +specialChars = "\\$%&^&_~#{}\n \t|<>'\"-" -- -- utility functions -- --- | Change quotation marks in a string back to "basic" quotes. -normalizeQuotes :: String -> String -normalizeQuotes = gsub "''" "\"" . gsub "`" "'" - --- | Change LaTeX En dashes between digits to hyphens. -normalizeDashes :: String -> String -normalizeDashes = gsub "([0-9])--([0-9])" "\\1-\\2" - -normalizePunctuation :: String -> String -normalizePunctuation = normalizeDashes . normalizeQuotes - -- | Returns text between brackets and its matching pair. bracketedText openB closeB = try (do char openB @@ -132,10 +121,10 @@ anyEnvironment = try (do -- -- | Process LaTeX preamble, extracting metadata. -processLaTeXPreamble = do +processLaTeXPreamble = try (do manyTill (choice [bibliographic, comment, unknownCommand, nullBlock]) (try (string "\\begin{document}")) - spaces + spaces) -- | Parse LaTeX and return 'Pandoc'. parseLaTeX = do @@ -392,16 +381,13 @@ comment = try (do -- inline -- -inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, +inline = choice [ strong, emph, ref, lab, code, linebreak, math, ellipses, + emDash, enDash, hyphen, quoted, apostrophe, accentedChar, specialChar, specialInline, escapedChar, unescapedChar, str, endline, whitespace ] <?> "inline" -specialInline = choice [ link, image, footnote, rawLaTeXInline ] <?> - "link, raw TeX, note, or image" - -ldots = try (do - string "\\ldots" - return (Str "...")) +specialInline = choice [ link, image, footnote, rawLaTeXInline ] + <?> "link, raw TeX, note, or image" accentedChar = normalAccentedChar <|> specialAccentedChar @@ -526,6 +512,49 @@ emph = try (do result <- manyTill inline (char '}') return (Emph result)) +apostrophe = do + char '\'' + return Apostrophe + +quoted = do + doubleQuoted <|> singleQuoted + +singleQuoted = try (do + result <- enclosed singleQuoteStart singleQuoteEnd inline + return $ Quoted SingleQuote $ normalizeSpaces result) + +doubleQuoted = try (do + result <- enclosed doubleQuoteStart doubleQuoteEnd inline + return $ Quoted DoubleQuote $ normalizeSpaces result) + +singleQuoteStart = char '`' + +singleQuoteEnd = try (do + char '\'' + notFollowedBy alphaNum) + +doubleQuoteStart = try (string "``") + +doubleQuoteEnd = try (string "''") + +ellipses = try (do + string "\\ldots" + option "" (string "{}") + return Ellipses) + +enDash = try (do + string "--" + notFollowedBy (char '-') + return EnDash) + +emDash = try (do + string "---" + return EmDash) + +hyphen = do + char '-' + return (Str "-") + lab = try (do string "\\label{" result <- manyTill anyChar (char '}') @@ -552,7 +581,7 @@ linebreak = try (do str = do result <- many1 (noneOf specialChars) - return (Str (normalizePunctuation result)) + return (Str result) -- endline internal to paragraph endline = try (do @@ -624,3 +653,4 @@ rawLaTeXInline = try (do then fail "not an inline command" else string "" return (TeX ("\\" ++ name ++ star ++ argStr))) + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 30d6a11df..7fab2ad01 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -86,12 +86,13 @@ titleOpeners = "\"'(" setextHChars = ['=','-'] blockQuoteChar = '>' hyphenChar = '-' +ellipsesChar = '.' -- treat these as potentially non-text when parsing inline: specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd, autoLinkStart, mathStart, mathEnd, imageStart, noteStart, - hyphenChar] + hyphenChar, ellipsesChar] ++ quoteChars -- -- auxiliary functions @@ -120,6 +121,11 @@ failUnlessBeginningOfLine = do pos <- getPosition if sourceColumn pos == 1 then return () else fail "not beginning of line" +-- | Fail unless we're in "smart typography" mode. +failUnlessSmart = do + state <- getState + if stateSmart state then return () else fail "Smart typography feature" + -- -- document structure -- @@ -519,11 +525,11 @@ rawLaTeXEnvironment' = do -- inline -- -text = choice [ math, strong, emph, code, str, linebreak, tabchar, - whitespace, endline ] <?> "text" +text = choice [ escapedChar, math, strong, emph, smartPunctuation, + code, ltSign, symbol, + str, linebreak, tabchar, whitespace, endline ] <?> "text" -inline = choice [ rawLaTeXInline', escapedChar, special, hyphens, text, - ltSign, symbol ] <?> "inline" +inline = choice [ rawLaTeXInline', escapedChar, special, text ] <?> "inline" special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline', autoLink, image ] <?> "link, inline html, note, or image" @@ -531,6 +537,7 @@ special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline', escapedChar = escaped anyChar ltSign = try (do + notFollowedBy (noneOf "<") -- continue only if it's a < notFollowedBy' rawHtmlBlocks -- don't return < if it starts html char '<' return (Str ['<'])) @@ -541,13 +548,6 @@ symbol = do result <- oneOf specialCharsMinusLt return (Str [result]) -hyphens = try (do - result <- many1 (char '-') - if (length result) == 1 - then skipEndline -- don't want to treat endline after hyphen as a space - else do{ string ""; return Space } - return (Str result)) - -- parses inline code, between n codeStarts and n codeEnds code = try (do starts <- many1 (char codeStart) @@ -583,6 +583,56 @@ strong = do (count 2 (char emphEndAlt)) inline) ] return (Strong (normalizeSpaces result)) +smartPunctuation = do + failUnlessSmart + choice [ quoted, apostrophe, dash, ellipses ] + +apostrophe = do + char '\'' <|> char '\8217' + return Apostrophe + +quoted = do + doubleQuoted <|> singleQuoted + +singleQuoted = try (do + result <- enclosed singleQuoteStart singleQuoteEnd + (do{notFollowedBy' singleQuoted; inline} <|> apostrophe) + return $ Quoted SingleQuote $ normalizeSpaces result) + +doubleQuoted = try (do + result <- enclosed doubleQuoteStart doubleQuoteEnd inline + return $ Quoted DoubleQuote $ normalizeSpaces result) + +singleQuoteStart = try (do + char '\'' <|> char '\8216' + notFollowedBy' whitespace) + +singleQuoteEnd = try (do + oneOfStrings ["'", "\8217"] + notFollowedBy alphaNum) + +doubleQuoteStart = char '"' <|> char '\8220' + +doubleQuoteEnd = char '"' <|> char '\8221' + +ellipses = try (do + oneOfStrings ["...", " . . . ", ". . .", " . . ."] + return Ellipses) + +dash = enDash <|> emDash + +enDash = try (do + char '-' + followedBy' (many1 digit) + return EnDash) + +emDash = try (do + skipSpaces + oneOfStrings ["---", "--"] + skipSpaces + option ' ' newline + return EmDash) + whitespace = do many1 (oneOf spaceChars) <?> "whitespace" return Space |