From 50ca61ef4977217889b386f84abd0ba190fac3f2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Dec 2010 19:03:08 -0800 Subject: Moved smartPunctuation from Markdown to Parsing. + Parameterized smartPunctuation on an inline parser. + Handle smartPunctuation in Textile reader. --- src/Text/Pandoc/Parsing.hs | 95 +++++++++++++++++++++++++++++++++++-- src/Text/Pandoc/Readers/Markdown.hs | 91 +---------------------------------- src/Text/Pandoc/Readers/Textile.hs | 15 ++---- 3 files changed, 99 insertions(+), 102 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index ecb3dd262..d63fcd0a7 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -67,17 +67,18 @@ module Text.Pandoc.Parsing ( (>>~), Key, toKey, fromKey, - lookupKeySrc ) + lookupKeySrc, + smartPunctuation ) where import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.ParserCombinators.Parsec import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isAscii ) +import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import Control.Monad ( join, liftM ) +import Control.Monad ( join, liftM, guard ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (Macro) @@ -678,3 +679,91 @@ lookupKeySrc table key = case M.lookup key table of Nothing -> Nothing Just src -> Just src +-- | Fail unless we're in "smart typography" mode. +failUnlessSmart :: GenParser tok ParserState () +failUnlessSmart = getState >>= guard . stateSmart + +smartPunctuation :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +smartPunctuation inlineParser = do + failUnlessSmart + choice [ quoted inlineParser, apostrophe, dash, ellipses ] + +apostrophe :: GenParser Char ParserState Inline +apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe + +quoted :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser + +withQuoteContext :: QuoteContext + -> (GenParser Char ParserState Inline) + -> GenParser Char ParserState Inline +withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = stateQuoteContext oldState + setState oldState { stateQuoteContext = context } + result <- parser + newState <- getState + setState newState { stateQuoteContext = oldQuoteContext } + return result + +singleQuoted :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +singleQuoted inlineParser = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= + return . Quoted SingleQuote . normalizeSpaces + +doubleQuoted :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +doubleQuoted inlineParser = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ do + contents <- manyTill inlineParser doubleQuoteEnd + return . Quoted DoubleQuote . normalizeSpaces $ contents + +failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () +failIfInQuoteContext context = do + st <- getState + if stateQuoteContext st == context + then fail "already inside quotes" + else return () + +singleQuoteStart :: GenParser Char ParserState () +singleQuoteStart = do + failIfInQuoteContext InSingleQuote + try $ do oneOf "'\8216" + notFollowedBy (oneOf ")!],.;:-? \t\n") + notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> + satisfy (not . isAlphaNum))) + -- possess/contraction + return () + +singleQuoteEnd :: GenParser Char st () +singleQuoteEnd = try $ do + oneOf "'\8217" + notFollowedBy alphaNum + +doubleQuoteStart :: GenParser Char ParserState () +doubleQuoteStart = do + failIfInQuoteContext InDoubleQuote + try $ do oneOf "\"\8220" + notFollowedBy (oneOf " \t\n") + +doubleQuoteEnd :: GenParser Char st () +doubleQuoteEnd = oneOf "\"\8221" >> return () + +ellipses :: GenParser Char st Inline +ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses + +dash :: GenParser Char st Inline +dash = enDash <|> emDash + +enDash :: GenParser Char st Inline +enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash + +emDash :: GenParser Char st Inline +emDash = oneOfStrings ["---", "--"] >> return EmDash + + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ad7d2a0cc..accb4cdc4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -27,10 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( - readMarkdown, - smartPunctuation - ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown ) where import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) import qualified Data.Map as M @@ -108,12 +105,6 @@ failUnlessBeginningOfLine = do pos <- getPosition if sourceColumn pos == 1 then return () else fail "not beginning of line" --- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: GenParser tok ParserState () -failUnlessSmart = do - state <- getState - if stateSmart state then return () else pzero - -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. inlinesInBalancedBrackets :: GenParser Char ParserState Inline @@ -906,7 +897,7 @@ inline = choice inlineParsers "inline" inlineParsers :: [GenParser Char ParserState Inline] inlineParsers = [ str - , smartPunctuation + , smartPunctuation inline , whitespace , endline , code @@ -1047,84 +1038,6 @@ subscript = failIfStrict >> enclosed (char '~') (char '~') (notFollowedBy spaceChar >> inline) >>= -- may not contain Space return . Subscript -smartPunctuation :: GenParser Char ParserState Inline -smartPunctuation = failUnlessSmart >> - choice [ quoted, apostrophe, dash, ellipses ] - -apostrophe :: GenParser Char ParserState Inline -apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe - -quoted :: GenParser Char ParserState Inline -quoted = doubleQuoted <|> singleQuoted - -withQuoteContext :: QuoteContext - -> (GenParser Char ParserState Inline) - -> GenParser Char ParserState Inline -withQuoteContext context parser = do - oldState <- getState - let oldQuoteContext = stateQuoteContext oldState - setState oldState { stateQuoteContext = context } - result <- parser - newState <- getState - setState newState { stateQuoteContext = oldQuoteContext } - return result - -singleQuoted :: GenParser Char ParserState Inline -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= - return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted :: GenParser Char ParserState Inline -doubleQuoted = try $ do - doubleQuoteStart - withQuoteContext InDoubleQuote $ do - contents <- manyTill inline doubleQuoteEnd - return . Quoted DoubleQuote . normalizeSpaces $ contents - -failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () -failIfInQuoteContext context = do - st <- getState - if stateQuoteContext st == context - then fail "already inside quotes" - else return () - -singleQuoteStart :: GenParser Char ParserState () -singleQuoteStart = do - failIfInQuoteContext InSingleQuote - try $ do oneOf "'\8216" - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) - -- possess/contraction - return () - -singleQuoteEnd :: GenParser Char st () -singleQuoteEnd = try $ do - oneOf "'\8217" - notFollowedBy alphaNum - -doubleQuoteStart :: GenParser Char ParserState () -doubleQuoteStart = do - failIfInQuoteContext InDoubleQuote - try $ do oneOf "\"\8220" - notFollowedBy (oneOf " \t\n") - -doubleQuoteEnd :: GenParser Char st () -doubleQuoteEnd = oneOf "\"\8221" >> return () - -ellipses :: GenParser Char st Inline -ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses - -dash :: GenParser Char st Inline -dash = enDash <|> emDash - -enDash :: GenParser Char st Inline -enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash - -emDash :: GenParser Char st Inline -emDash = oneOfStrings ["---", "--"] >> return EmDash - whitespace :: GenParser Char ParserState Inline whitespace = spaceChar >> ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak)) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index e1d608eed..4c655691a 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -34,21 +34,17 @@ Implemented and parsed: - Lists - blockquote - Inlines : strong, emph, cite, code, deleted, superscript, - subscript, links, smart punctuation + subscript, links Implemented but discarded: - HTML-specific and CSS-specific attributes Left to be implemented: - - Pandoc Meta Information (title, author, date) - footnotes - dimension sign - - uppercase + - all caps - definition lists - continued blocks (ex bq..) - - - - TODO : refactor common patterns across readers : - autolink @@ -58,9 +54,8 @@ TODO : refactor common patterns across readers : -} -module Text.Pandoc.Readers.Textile ( - readTextile - ) where +module Text.Pandoc.Readers.Textile ( readTextile) where + import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing @@ -313,7 +308,6 @@ inlineParsers = [ autoLink , mark , str , htmlSpan --- , smartPunctuation -- from markdown reader , whitespace , endline , rawHtmlInline @@ -328,6 +322,7 @@ inlineParsers = [ autoLink , simpleInline (char '~') Subscript , link , image + , smartPunctuation inline , symbol ] -- cgit v1.2.3