diff options
authorJohn MacFarlane <jgm@berkeley.edu>2010-12-07 19:03:08 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2010-12-07 19:03:08 -0800
commit50ca61ef4977217889b386f84abd0ba190fac3f2 (patch)
parentf917b46500377df2c5b33b9c9a7827e00650a29f (diff)
Moved smartPunctuation from Markdown to Parsing.
+ Parameterized smartPunctuation on an inline parser. + Handle smartPunctuation in Textile reader.
4 files changed, 107 insertions, 110 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index ecb3dd262..d63fcd0a7 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -67,17 +67,18 @@ module Text.Pandoc.Parsing ( (>>~),
- lookupKeySrc )
+ lookupKeySrc,
+ smartPunctuation )
import Text.Pandoc.Definition
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.ParserCombinators.Parsec
import Text.Pandoc.CharacterReferences ( characterReference )
-import Data.Char ( toLower, toUpper, ord, isAscii )
+import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum )
import Data.List ( intercalate, transpose )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
-import Control.Monad ( join, liftM )
+import Control.Monad ( join, liftM, guard )
import Text.Pandoc.Shared
import qualified Data.Map as M
import Text.TeXMath.Macros (Macro)
@@ -678,3 +679,91 @@ lookupKeySrc table key = case M.lookup key table of
Nothing -> Nothing
Just src -> Just src
+-- | Fail unless we're in "smart typography" mode.
+failUnlessSmart :: GenParser tok ParserState ()
+failUnlessSmart = getState >>= guard . stateSmart
+smartPunctuation :: GenParser Char ParserState Inline
+ -> GenParser Char ParserState Inline
+smartPunctuation inlineParser = do
+ failUnlessSmart
+ choice [ quoted inlineParser, apostrophe, dash, ellipses ]
+apostrophe :: GenParser Char ParserState Inline
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
+quoted :: GenParser Char ParserState Inline
+ -> GenParser Char ParserState Inline
+quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
+withQuoteContext :: QuoteContext
+ -> (GenParser Char ParserState Inline)
+ -> GenParser Char ParserState Inline
+withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
+singleQuoted :: GenParser Char ParserState Inline
+ -> GenParser Char ParserState Inline
+singleQuoted inlineParser = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
+ return . Quoted SingleQuote . normalizeSpaces
+doubleQuoted :: GenParser Char ParserState Inline
+ -> GenParser Char ParserState Inline
+doubleQuoted inlineParser = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $ do
+ contents <- manyTill inlineParser doubleQuoteEnd
+ return . Quoted DoubleQuote . normalizeSpaces $ contents
+failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
+failIfInQuoteContext context = do
+ st <- getState
+ if stateQuoteContext st == context
+ then fail "already inside quotes"
+ else return ()
+singleQuoteStart :: GenParser Char ParserState ()
+singleQuoteStart = do
+ failIfInQuoteContext InSingleQuote
+ try $ do oneOf "'\8216"
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum)))
+ -- possess/contraction
+ return ()
+singleQuoteEnd :: GenParser Char st ()
+singleQuoteEnd = try $ do
+ oneOf "'\8217"
+ notFollowedBy alphaNum
+doubleQuoteStart :: GenParser Char ParserState ()
+doubleQuoteStart = do
+ failIfInQuoteContext InDoubleQuote
+ try $ do oneOf "\"\8220"
+ notFollowedBy (oneOf " \t\n")
+doubleQuoteEnd :: GenParser Char st ()
+doubleQuoteEnd = oneOf "\"\8221" >> return ()
+ellipses :: GenParser Char st Inline
+ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
+dash :: GenParser Char st Inline
+dash = enDash <|> emDash
+enDash :: GenParser Char st Inline
+enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
+emDash :: GenParser Char st Inline
+emDash = oneOfStrings ["---", "--"] >> return EmDash
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index ad7d2a0cc..accb4cdc4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -27,10 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of markdown-formatted plain text to 'Pandoc' document.
-module Text.Pandoc.Readers.Markdown (
- readMarkdown,
- smartPunctuation
- ) where
+module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate )
import qualified Data.Map as M
@@ -108,12 +105,6 @@ failUnlessBeginningOfLine = do
pos <- getPosition
if sourceColumn pos == 1 then return () else fail "not beginning of line"
--- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: GenParser tok ParserState ()
-failUnlessSmart = do
- state <- getState
- if stateSmart state then return () else pzero
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
inlinesInBalancedBrackets :: GenParser Char ParserState Inline
@@ -906,7 +897,7 @@ inline = choice inlineParsers <?> "inline"
inlineParsers :: [GenParser Char ParserState Inline]
inlineParsers = [ str
- , smartPunctuation
+ , smartPunctuation inline
, whitespace
, endline
, code
@@ -1047,84 +1038,6 @@ subscript = failIfStrict >> enclosed (char '~') (char '~')
(notFollowedBy spaceChar >> inline) >>= -- may not contain Space
return . Subscript
-smartPunctuation :: GenParser Char ParserState Inline
-smartPunctuation = failUnlessSmart >>
- choice [ quoted, apostrophe, dash, ellipses ]
-apostrophe :: GenParser Char ParserState Inline
-apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
-quoted :: GenParser Char ParserState Inline
-quoted = doubleQuoted <|> singleQuoted
-withQuoteContext :: QuoteContext
- -> (GenParser Char ParserState Inline)
- -> GenParser Char ParserState Inline
-withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-singleQuoted :: GenParser Char ParserState Inline
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
- return . Quoted SingleQuote . normalizeSpaces
-doubleQuoted :: GenParser Char ParserState Inline
-doubleQuoted = try $ do
- doubleQuoteStart
- withQuoteContext InDoubleQuote $ do
- contents <- manyTill inline doubleQuoteEnd
- return . Quoted DoubleQuote . normalizeSpaces $ contents
-failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
-failIfInQuoteContext context = do
- st <- getState
- if stateQuoteContext st == context
- then fail "already inside quotes"
- else return ()
-singleQuoteStart :: GenParser Char ParserState ()
-singleQuoteStart = do
- failIfInQuoteContext InSingleQuote
- try $ do oneOf "'\8216"
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
- -- possess/contraction
- return ()
-singleQuoteEnd :: GenParser Char st ()
-singleQuoteEnd = try $ do
- oneOf "'\8217"
- notFollowedBy alphaNum
-doubleQuoteStart :: GenParser Char ParserState ()
-doubleQuoteStart = do
- failIfInQuoteContext InDoubleQuote
- try $ do oneOf "\"\8220"
- notFollowedBy (oneOf " \t\n")
-doubleQuoteEnd :: GenParser Char st ()
-doubleQuoteEnd = oneOf "\"\8221" >> return ()
-ellipses :: GenParser Char st Inline
-ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
-dash :: GenParser Char st Inline
-dash = enDash <|> emDash
-enDash :: GenParser Char st Inline
-enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
-emDash :: GenParser Char st Inline
-emDash = oneOfStrings ["---", "--"] >> return EmDash
whitespace :: GenParser Char ParserState Inline
whitespace = spaceChar >>
( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak))
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index e1d608eed..4c655691a 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -34,21 +34,17 @@ Implemented and parsed:
- Lists
- blockquote
- Inlines : strong, emph, cite, code, deleted, superscript,
- subscript, links, smart punctuation
+ subscript, links
Implemented but discarded:
- HTML-specific and CSS-specific attributes
Left to be implemented:
- - Pandoc Meta Information (title, author, date)
- footnotes
- dimension sign
- - uppercase
+ - all caps
- definition lists
- continued blocks (ex bq..)
- -
TODO : refactor common patterns across readers :
- autolink
@@ -58,9 +54,8 @@ TODO : refactor common patterns across readers :
-module Text.Pandoc.Readers.Textile (
- readTextile
- ) where
+module Text.Pandoc.Readers.Textile ( readTextile) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
@@ -313,7 +308,6 @@ inlineParsers = [ autoLink
, mark
, str
, htmlSpan
--- , smartPunctuation -- from markdown reader
, whitespace
, endline
, rawHtmlInline
@@ -328,6 +322,7 @@ inlineParsers = [ autoLink
, simpleInline (char '~') Subscript
, link
, image
+ , smartPunctuation inline
, symbol
diff --git a/tests/textile-reader.native b/tests/textile-reader.native
index 613c6ac6b..f2024c4d8 100644
--- a/tests/textile-reader.native
+++ b/tests/textile-reader.native
@@ -1,5 +1,5 @@
Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
-[ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."]
+[ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Apostrophe,Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."]
, HorizontalRule
, Header 1 [Str "Headers"]
, Header 2 [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")]
@@ -8,9 +8,9 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
, Header 5 [Str "Level",Space,Str "5"]
, Header 6 [Str "Level",Space,Str "6"]
, Header 1 [Str "Paragraphs"]
-, Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."]
+, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."]
, Para [Str "Line",Space,Str "breaks",Space,Str "are",Space,Str "preserved",Space,Str "in",Space,Str "textile",Str ",",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "not",Space,Str "wrap",Space,Str "your",Space,Str "very",LineBreak,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor",Space,Str "and",Space,Str "have",Space,Str "it",Space,Str "rendered",LineBreak,Str "with",Space,Str "no",Space,Str "break",Str "."]
-, Para [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."]
+, Para [Str "Here",Apostrophe,Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."]
, BulletList
[ [ Plain [Str "criminey",Str "."] ]
@@ -56,9 +56,9 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
, Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str ".",LineBreak,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str ".",LineBreak,Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."]
, Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em",Str "."]],LineBreak,Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str ".",LineBreak,Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]]
, Para [Str "Superscripts",Str ":",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str ".",LineBreak,Str "Subscripts",Str ":",Space,Str "H",Subscript [Str "2"],Str "O",Str ",",Space,Str "H",Subscript [Str "23"],Str "O",Str ",",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O",Str "."]
-, Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,Str "-",Str "-",Space,Str "automatic",Space,Str "dashes",Str "."]
-, Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Str ".",Str ".",Str ".",Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."]
-, Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Str "\"",Str "I",Str "'",Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you",Str "\"",Space,Str "for",Space,Str "example",Str "."]
+, Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,EmDash,Space,Str "automatic",Space,Str "dashes",Str "."]
+, Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Ellipses,Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."]
+, Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Quoted DoubleQuote [Str "I",Apostrophe,Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you"],Space,Str "for",Space,Str "example",Str "."]
, Header 1 [Str "Links"]
, Header 2 [Str "Explicit"]
, Para [Str "Just",Space,Str "a",Space,Link [Str "url"] ("http://www.url.com","")]
@@ -82,7 +82,7 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
[ [ Plain [Str "bella"] ]
, [ Plain [Str "45"] ]
, [ Plain [Str "f"] ] ] ]
-, Para [Str "and",Space,Str "some",Space,Str "text",Space,Str "following",Space,Str ".",Str ".",Str "."]
+, Para [Str "and",Space,Str "some",Space,Str "text",Space,Str "following",Space,Ellipses]
, Header 2 [Str "With",Space,Str "headers"]
, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[ [ Plain [Str "name"] ]
@@ -123,7 +123,7 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
, RawHtml "</div>"
, Para [Str "as",Space,Str "well",Str "."]
, BulletList
- [ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ]
+ [ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Apostrophe,Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ]
, [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ]
, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
, Header 1 [Str "Acronyms",Space,Str "and",Space,Str "marks"]