diff options
| -rw-r--r-- | src/Text/Pandoc/Parsing.hs | 95 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 91 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 15 | ||||
| -rw-r--r-- | tests/textile-reader.native | 16 | 
4 files changed, 107 insertions, 110 deletions
| diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index ecb3dd262..d63fcd0a7 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -67,17 +67,18 @@ module Text.Pandoc.Parsing ( (>>~),                               Key,                               toKey,                               fromKey, -                             lookupKeySrc ) +                             lookupKeySrc, +                             smartPunctuation )  where  import Text.Pandoc.Definition  import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)  import Text.ParserCombinators.Parsec  import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isAscii ) +import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum )  import Data.List ( intercalate, transpose )  import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import Control.Monad ( join, liftM ) +import Control.Monad ( join, liftM, guard )  import Text.Pandoc.Shared  import qualified Data.Map as M  import Text.TeXMath.Macros (Macro) @@ -678,3 +679,91 @@ lookupKeySrc table key = case M.lookup key table of                             Nothing  -> Nothing                             Just src -> Just src +-- | Fail unless we're in "smart typography" mode. +failUnlessSmart :: GenParser tok ParserState () +failUnlessSmart = getState >>= guard . stateSmart + +smartPunctuation :: GenParser Char ParserState Inline +                 -> GenParser Char ParserState Inline +smartPunctuation inlineParser = do +  failUnlessSmart +  choice [ quoted inlineParser, apostrophe, dash, ellipses ] + +apostrophe :: GenParser Char ParserState Inline +apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe + +quoted :: GenParser Char ParserState Inline +       -> GenParser Char ParserState Inline +quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser + +withQuoteContext :: QuoteContext +                 -> (GenParser Char ParserState Inline) +                 -> GenParser Char ParserState Inline +withQuoteContext context parser = do +  oldState <- getState +  let oldQuoteContext = stateQuoteContext oldState +  setState oldState { stateQuoteContext = context } +  result <- parser +  newState <- getState +  setState newState { stateQuoteContext = oldQuoteContext } +  return result + +singleQuoted :: GenParser Char ParserState Inline +             -> GenParser Char ParserState Inline +singleQuoted inlineParser = try $ do +  singleQuoteStart +  withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= +    return . Quoted SingleQuote . normalizeSpaces + +doubleQuoted :: GenParser Char ParserState Inline +             -> GenParser Char ParserState Inline +doubleQuoted inlineParser = try $ do +  doubleQuoteStart +  withQuoteContext InDoubleQuote $ do +    contents <- manyTill inlineParser doubleQuoteEnd +    return . Quoted DoubleQuote . normalizeSpaces $ contents + +failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () +failIfInQuoteContext context = do +  st <- getState +  if stateQuoteContext st == context +     then fail "already inside quotes" +     else return () + +singleQuoteStart :: GenParser Char ParserState () +singleQuoteStart = do  +  failIfInQuoteContext InSingleQuote +  try $ do oneOf "'\8216" +           notFollowedBy (oneOf ")!],.;:-? \t\n") +           notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> +                               satisfy (not . isAlphaNum)))  +                               -- possess/contraction +           return () + +singleQuoteEnd :: GenParser Char st () +singleQuoteEnd = try $ do +  oneOf "'\8217" +  notFollowedBy alphaNum + +doubleQuoteStart :: GenParser Char ParserState () +doubleQuoteStart = do +  failIfInQuoteContext InDoubleQuote +  try $ do oneOf "\"\8220" +           notFollowedBy (oneOf " \t\n") + +doubleQuoteEnd :: GenParser Char st () +doubleQuoteEnd = oneOf "\"\8221" >> return () + +ellipses :: GenParser Char st Inline +ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses + +dash :: GenParser Char st Inline +dash = enDash <|> emDash + +enDash :: GenParser Char st Inline +enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash + +emDash :: GenParser Char st Inline +emDash = oneOfStrings ["---", "--"] >> return EmDash + + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ad7d2a0cc..accb4cdc4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -27,10 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  Conversion of markdown-formatted plain text to 'Pandoc' document.  -} -module Text.Pandoc.Readers.Markdown (  -                                     readMarkdown, -                                     smartPunctuation -                                    ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown ) where  import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate )  import qualified Data.Map as M @@ -108,12 +105,6 @@ failUnlessBeginningOfLine = do    pos <- getPosition    if sourceColumn pos == 1 then return () else fail "not beginning of line" --- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: GenParser tok ParserState () -failUnlessSmart = do -  state <- getState -  if stateSmart state then return () else pzero -  -- | Parse a sequence of inline elements between square brackets,  -- including inlines between balanced pairs of square brackets.  inlinesInBalancedBrackets :: GenParser Char ParserState Inline @@ -906,7 +897,7 @@ inline = choice inlineParsers <?> "inline"  inlineParsers :: [GenParser Char ParserState Inline]  inlineParsers = [ str -                , smartPunctuation +                , smartPunctuation inline                  , whitespace                  , endline                  , code @@ -1047,84 +1038,6 @@ subscript = failIfStrict >> enclosed (char '~') (char '~')              (notFollowedBy spaceChar >> inline) >>=  -- may not contain Space              return . Subscript  -smartPunctuation :: GenParser Char ParserState Inline -smartPunctuation = failUnlessSmart >>  -                   choice [ quoted, apostrophe, dash, ellipses ] - -apostrophe :: GenParser Char ParserState Inline -apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe - -quoted :: GenParser Char ParserState Inline -quoted = doubleQuoted <|> singleQuoted  - -withQuoteContext :: QuoteContext -                 -> (GenParser Char ParserState Inline) -                 -> GenParser Char ParserState Inline -withQuoteContext context parser = do -  oldState <- getState -  let oldQuoteContext = stateQuoteContext oldState -  setState oldState { stateQuoteContext = context } -  result <- parser -  newState <- getState -  setState newState { stateQuoteContext = oldQuoteContext } -  return result - -singleQuoted :: GenParser Char ParserState Inline -singleQuoted = try $ do -  singleQuoteStart -  withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= -    return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted :: GenParser Char ParserState Inline -doubleQuoted = try $ do -  doubleQuoteStart -  withQuoteContext InDoubleQuote $ do -    contents <- manyTill inline doubleQuoteEnd -    return . Quoted DoubleQuote . normalizeSpaces $ contents - -failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () -failIfInQuoteContext context = do -  st <- getState -  if stateQuoteContext st == context -     then fail "already inside quotes" -     else return () - -singleQuoteStart :: GenParser Char ParserState () -singleQuoteStart = do  -  failIfInQuoteContext InSingleQuote -  try $ do oneOf "'\8216" -           notFollowedBy (oneOf ")!],.;:-? \t\n") -           notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> -                               satisfy (not . isAlphaNum)))  -                               -- possess/contraction -           return () - -singleQuoteEnd :: GenParser Char st () -singleQuoteEnd = try $ do -  oneOf "'\8217" -  notFollowedBy alphaNum - -doubleQuoteStart :: GenParser Char ParserState () -doubleQuoteStart = do -  failIfInQuoteContext InDoubleQuote -  try $ do oneOf "\"\8220" -           notFollowedBy (oneOf " \t\n") - -doubleQuoteEnd :: GenParser Char st () -doubleQuoteEnd = oneOf "\"\8221" >> return () - -ellipses :: GenParser Char st Inline -ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses - -dash :: GenParser Char st Inline -dash = enDash <|> emDash - -enDash :: GenParser Char st Inline -enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash - -emDash :: GenParser Char st Inline -emDash = oneOfStrings ["---", "--"] >> return EmDash -  whitespace :: GenParser Char ParserState Inline  whitespace = spaceChar >>    (   (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak)) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index e1d608eed..4c655691a 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -34,21 +34,17 @@ Implemented and parsed:   - Lists   - blockquote   - Inlines : strong, emph, cite, code, deleted, superscript, -   subscript, links, smart punctuation +   subscript, links  Implemented but discarded:   - HTML-specific and CSS-specific attributes  Left to be implemented: - - Pandoc Meta Information (title, author, date)   - footnotes   - dimension sign - - uppercase + - all caps   - definition lists   - continued blocks (ex bq..) - -  - -  TODO : refactor common patterns across readers :   - autolink @@ -58,9 +54,8 @@ TODO : refactor common patterns across readers :  -} -module Text.Pandoc.Readers.Textile (  -                                readTextile -                               ) where +module Text.Pandoc.Readers.Textile ( readTextile) where +  import Text.Pandoc.Definition  import Text.Pandoc.Shared   import Text.Pandoc.Parsing @@ -313,7 +308,6 @@ inlineParsers = [ autoLink                  , mark                  , str                  , htmlSpan ---                , smartPunctuation -- from markdown reader                  , whitespace                  , endline                  , rawHtmlInline @@ -328,6 +322,7 @@ inlineParsers = [ autoLink                  , simpleInline (char '~') Subscript                  , link                  , image +                , smartPunctuation inline                  , symbol                  ] diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 613c6ac6b..f2024c4d8 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -1,5 +1,5 @@  Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]}) -[ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] +[ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Apostrophe,Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."]  , HorizontalRule  , Header 1 [Str "Headers"]  , Header 2 [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")] @@ -8,9 +8,9 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})  , Header 5 [Str "Level",Space,Str "5"]  , Header 6 [Str "Level",Space,Str "6"]  , Header 1 [Str "Paragraphs"] -, Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."] +, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."]  , Para [Str "Line",Space,Str "breaks",Space,Str "are",Space,Str "preserved",Space,Str "in",Space,Str "textile",Str ",",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "not",Space,Str "wrap",Space,Str "your",Space,Str "very",LineBreak,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor",Space,Str "and",Space,Str "have",Space,Str "it",Space,Str "rendered",LineBreak,Str "with",Space,Str "no",Space,Str "break",Str "."] -, Para [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."] +, Para [Str "Here",Apostrophe,Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."]  , BulletList    [ [ Plain [Str "criminey",Str "."] ]   ] @@ -56,9 +56,9 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})  , Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str ".",LineBreak,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str ".",LineBreak,Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."]  , Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em",Str "."]],LineBreak,Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str ".",LineBreak,Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]]  , Para [Str "Superscripts",Str ":",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str ".",LineBreak,Str "Subscripts",Str ":",Space,Str "H",Subscript [Str "2"],Str "O",Str ",",Space,Str "H",Subscript [Str "23"],Str "O",Str ",",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O",Str "."] -, Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,Str "-",Str "-",Space,Str "automatic",Space,Str "dashes",Str "."] -, Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Str ".",Str ".",Str ".",Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."] -, Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Str "\"",Str "I",Str "'",Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you",Str "\"",Space,Str "for",Space,Str "example",Str "."] +, Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,EmDash,Space,Str "automatic",Space,Str "dashes",Str "."] +, Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Ellipses,Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."] +, Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Quoted DoubleQuote [Str "I",Apostrophe,Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you"],Space,Str "for",Space,Str "example",Str "."]  , Header 1 [Str "Links"]  , Header 2 [Str "Explicit"]  , Para [Str "Just",Space,Str "a",Space,Link [Str "url"] ("http://www.url.com","")] @@ -82,7 +82,7 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})    [ [ Plain [Str "bella"] ]    , [ Plain [Str "45"] ]    , [ Plain [Str "f"] ] ] ] -, Para [Str "and",Space,Str "some",Space,Str "text",Space,Str "following",Space,Str ".",Str ".",Str "."] +, Para [Str "and",Space,Str "some",Space,Str "text",Space,Str "following",Space,Ellipses]  , Header 2 [Str "With",Space,Str "headers"]  , Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]    [ [ Plain [Str "name"] ] @@ -123,7 +123,7 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})  , RawHtml "</div>"  , Para [Str "as",Space,Str "well",Str "."]  , BulletList -  [ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ] +  [ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Apostrophe,Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ]    , [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ]  , Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]  , Header 1 [Str "Acronyms",Space,Str "and",Space,Str "marks"] | 
