From 0ccca94b4cb22e846289c004deaa023245e14981 Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Tue, 25 Mar 2014 18:22:23 +0000
Subject: Bugfix for #1175 and convert textile reader to use builder.

The reader did not correctly parse inline markup. The behavoir is now as follows.

(a) The markup must start at the start of a line, be inside previous
inline markup or be preceeded by whitespace.
(b) The markup can not span across paragraphs (delimited by \n\n)
(c) The markup can not be followed by a alphanumeric character.
(d) Square brackets can be placed around the markup to avoid having
to have white space before it.

In order to make these changes it was either necessary to convert the parser to return a list of inlines or to convert the whole reader to use the builder. The latter approach whilst more work makes a bit more sense as it becomes easy to arbitarily append and prepend elements without changing the type.

Tests are accordingly updated in a later commit to reflect the different normalisation behavoir specified by the builder monoid.
---
 src/Text/Pandoc/Readers/Textile.hs | 301 ++++++++++++++++++++-----------------
 1 file changed, 167 insertions(+), 134 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 93658cdea..ede50c6de 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -50,20 +50,20 @@ TODO : refactor common patterns across readers :
 
 
 module Text.Pandoc.Readers.Textile ( readTextile) where
-
 import Text.Pandoc.Definition
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
 import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Shared
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing
+import Text.Pandoc.Parsing 
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
-import Data.Char ( digitToInt, isUpper )
+import Data.Char ( digitToInt, isUpper)
 import Control.Monad ( guard, liftM )
 import Control.Applicative ((<$>), (*>), (<*))
+import Data.Monoid
 
 -- | Parse a Textile text and return a Pandoc document.
 readTextile :: ReaderOptions -- ^ Reader options
@@ -95,7 +95,7 @@ parseTextile = do
   updateState $ \s -> s { stateNotes = reverse reversedNotes }
   -- now parse it for real...
   blocks <- parseBlocks
-  return $ Pandoc nullMeta blocks -- FIXME
+  return $ Pandoc nullMeta (B.toList blocks) -- FIXME
 
 noteMarker :: Parser [Char] ParserState [Char]
 noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
@@ -115,11 +115,11 @@ noteBlock = try $ do
   return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
 
 -- | Parse document blocks
-parseBlocks :: Parser [Char] ParserState [Block]
-parseBlocks = manyTill block eof
+parseBlocks :: Parser [Char] ParserState Blocks
+parseBlocks = mconcat <$> manyTill block eof
 
 -- | Block parsers list tried in definition order
-blockParsers :: [Parser [Char] ParserState Block]
+blockParsers :: [Parser [Char] ParserState Blocks]
 blockParsers = [ codeBlock
                , header
                , blockQuote
@@ -130,29 +130,32 @@ blockParsers = [ codeBlock
                , rawLaTeXBlock'
                , maybeExplicitBlock "table" table
                , maybeExplicitBlock "p" para
+               , endBlock
                ]
 
+endBlock :: Parser [Char] ParserState Blocks
+endBlock = string "\n\n" >> return mempty
 -- | Any block in the order of definition of blockParsers
-block :: Parser [Char] ParserState Block
+block :: Parser [Char] ParserState Blocks
 block = choice blockParsers <?> "block"
 
-commentBlock :: Parser [Char] ParserState Block
+commentBlock :: Parser [Char] ParserState Blocks
 commentBlock = try $ do
   string "###."
   manyTill anyLine blanklines
-  return Null
+  return mempty
 
-codeBlock :: Parser [Char] ParserState Block
+codeBlock :: Parser [Char] ParserState Blocks
 codeBlock = codeBlockBc <|> codeBlockPre
 
-codeBlockBc :: Parser [Char] ParserState Block
+codeBlockBc :: Parser [Char] ParserState Blocks
 codeBlockBc = try $ do
   string "bc. "
   contents <- manyTill anyLine blanklines
-  return $ CodeBlock ("",[],[]) $ unlines contents
+  return $ B.codeBlock (unlines contents)
 
 -- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockPre :: Parser [Char] ParserState Block
+codeBlockPre :: Parser [Char] ParserState Blocks
 codeBlockPre = try $ do
   (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
   result' <- (innerText . parseTags) `fmap` -- remove internal tags
@@ -169,29 +172,29 @@ codeBlockPre = try $ do
   let classes = words $ fromAttrib "class" t
   let ident = fromAttrib "id" t
   let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-  return $ CodeBlock (ident,classes,kvs) result'''
+  return $ B.codeBlockWith (ident,classes,kvs) result'''
 
 -- | Header of the form "hN. content" with N in 1..6
-header :: Parser [Char] ParserState Block
+header :: Parser [Char] ParserState Blocks
 header = try $ do
   char 'h'
   level <- digitToInt <$> oneOf "123456"
   attr <- attributes
   char '.'
-  whitespace
-  name <- normalizeSpaces <$> manyTill inline blockBreak
-  attr' <- registerHeader attr (B.fromList name)
-  return $ Header level attr' name
+  lookAhead whitespace
+  name <- trimInlines . mconcat <$> manyTill inline blockBreak
+  attr' <- registerHeader attr name
+  return $ B.headerWith attr' level name
 
 -- | Blockquote of the form "bq. content"
-blockQuote :: Parser [Char] ParserState Block
+blockQuote :: Parser [Char] ParserState Blocks
 blockQuote = try $ do
   string "bq" >> attributes >> char '.' >> whitespace
-  BlockQuote . singleton <$> para
+  B.blockQuote <$> para
 
 -- Horizontal rule
 
-hrule :: Parser [Char] st Block
+hrule :: Parser [Char] st Blocks
 hrule = try $ do
   skipSpaces
   start <- oneOf "-*"
@@ -199,62 +202,62 @@ hrule = try $ do
   skipMany (spaceChar <|> char start)
   newline
   optional blanklines
-  return HorizontalRule
+  return B.horizontalRule
 
 -- Lists handling
 
 -- | Can be a bullet list or an ordered list. This implementation is
 -- strict in the nesting, sublist must start at exactly "parent depth
 -- plus one"
-anyList :: Parser [Char] ParserState Block
+anyList :: Parser [Char] ParserState Blocks
 anyList = try $ anyListAtDepth 1 <* blanklines
 
 -- | This allow one type of list to be nested into an other type,
 -- provided correct nesting
-anyListAtDepth :: Int -> Parser [Char] ParserState Block
+anyListAtDepth :: Int -> Parser [Char] ParserState Blocks
 anyListAtDepth depth = choice [ bulletListAtDepth depth,
                                 orderedListAtDepth depth,
                                 definitionList ]
 
 -- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: Int -> Parser [Char] ParserState Block
-bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth)
+bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks
+bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth depth)
 
 -- | Bullet List Item of given depth, depth being the number of
 -- leading '*'
-bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
+bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks 
 bulletListItemAtDepth = genericListItemAtDepth '*'
 
 -- | Ordered List of given depth, depth being the number of
 -- leading '#'
-orderedListAtDepth :: Int -> Parser [Char] ParserState Block
+orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks
 orderedListAtDepth depth = try $ do
   items <- many1 (orderedListItemAtDepth depth)
-  return (OrderedList (1, DefaultStyle, DefaultDelim) items)
+  return $ B.orderedList items
 
 -- | Ordered List Item of given depth, depth being the number of
 -- leading '#'
-orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
+orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks 
 orderedListItemAtDepth = genericListItemAtDepth '#'
 
 -- | Common implementation of list items
-genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block]
+genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks
 genericListItemAtDepth c depth = try $ do
   count depth (char c) >> attributes >> whitespace
-  p <- many listInline
+  p <- mconcat <$> many listInline
   newline
-  sublist <- option [] (singleton <$> anyListAtDepth (depth + 1))
-  return (Plain p : sublist)
+  sublist <- option mempty (anyListAtDepth (depth + 1))
+  return $ (B.plain p) <> sublist
 
 -- | A definition list is a set of consecutive definition items
-definitionList :: Parser [Char] ParserState Block
-definitionList = try $ DefinitionList <$> many1 definitionListItem
+definitionList :: Parser [Char] ParserState Blocks
+definitionList = try $ B.definitionList <$> many1 definitionListItem
 
 -- | List start character.
 listStart :: Parser [Char] st Char
 listStart = oneOf "*#-"
 
-listInline :: Parser [Char] ParserState Inline
+listInline :: Parser [Char] ParserState Inlines
 listInline = try (notFollowedBy newline >> inline)
          <|> try (endline <* notFollowedBy listStart)
 
@@ -262,16 +265,16 @@ listInline = try (notFollowedBy newline >> inline)
 -- the term defined, then spaces and ":=". The definition follows, on
 -- the same single line, or spaned on multiple line, after a line
 -- break.
-definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
 definitionListItem = try $ do
   string "- "
-  term <- many1Till inline (try (whitespace >> string ":="))
+  term <- mconcat <$> many1Till inline (try (whitespace >> string ":="))
   def' <- multilineDef <|> inlineDef
   return (term, def')
-  where inlineDef :: Parser [Char] ParserState [[Block]]
-        inlineDef = liftM (\d -> [[Plain d]])
-                    $ optional whitespace >> many listInline <* newline
-        multilineDef :: Parser [Char] ParserState [[Block]]
+  where inlineDef :: Parser [Char] ParserState [Blocks]
+        inlineDef = liftM (\d -> [B.plain d])
+                    $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
+        multilineDef :: Parser [Char] ParserState [Blocks] 
         multilineDef = try $ do
           optional whitespace >> newline
           s <- many1Till anyChar (try (string "=:" >> newline))
@@ -288,59 +291,59 @@ blockBreak = try (newline >> blanklines >> return ()) <|>
 -- raw content
 
 -- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: Parser [Char] ParserState Block
+rawHtmlBlock :: Parser [Char] ParserState Blocks
 rawHtmlBlock = try $ do
   (_,b) <- htmlTag isBlockTag
   optional blanklines
-  return $ RawBlock (Format "html") b
+  return $ B.rawBlock "html" b
 
 -- | Raw block of LaTeX content
-rawLaTeXBlock' :: Parser [Char] ParserState Block
+rawLaTeXBlock' :: Parser [Char] ParserState Blocks
 rawLaTeXBlock' = do
   guardEnabled Ext_raw_tex
-  RawBlock (Format "latex") <$> (rawLaTeXBlock <* spaces)
+  B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
 
 
 -- | In textile, paragraphs are separated by blank lines.
-para :: Parser [Char] ParserState Block
-para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
-
+para :: Parser [Char] ParserState Blocks
+para = do
+    a <- manyTill inline blockBreak 
+    return $ (B.para . trimInlines . mconcat) a
 
 -- Tables
 
 -- | A table cell spans until a pipe |
-tableCell :: Parser [Char] ParserState TableCell
+tableCell :: Parser [Char] ParserState Blocks
 tableCell = do
   c <- many1 (noneOf "|\n")
-  content <- parseFromString (many1 inline) c
-  return $ [ Plain $ normalizeSpaces content ]
+  content <- trimInlines . mconcat <$> parseFromString (many1 inline) c
+  return $ B.plain content
 
 -- | A table row is made of many table cells
-tableRow :: Parser [Char] ParserState [TableCell]
+tableRow :: Parser [Char] ParserState [Blocks]
 tableRow = try $ ( char '|' *>
   (endBy1 tableCell (optional blankline *> char '|')) <* newline)
 
 -- | Many table rows
-tableRows :: Parser [Char] ParserState [[TableCell]]
+tableRows :: Parser [Char] ParserState [[Blocks]]
 tableRows = many1 tableRow
 
 -- | Table headers are made of cells separated by a tag "|_."
-tableHeaders :: Parser [Char] ParserState [TableCell]
+tableHeaders :: Parser [Char] ParserState [Blocks]
 tableHeaders = let separator = (try $ string "|_.") in
   try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
 
 -- | A table with an optional header. Current implementation can
 -- handle tables with and without header, but will parse cells
 -- alignment attributes as content.
-table :: Parser [Char] ParserState Block
+table :: Parser [Char] ParserState Blocks
 table = try $ do
-  headers <- option [] tableHeaders
+  headers <- option mempty tableHeaders
   rows <- tableRows
   blanklines
   let nbOfCols = max (length headers) (length $ head rows)
-  return $ Table []
-    (replicate nbOfCols AlignDefault)
-    (replicate nbOfCols 0.0)
+  return $ B.table mempty
+    (zip (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0))
     headers
     rows
 
@@ -348,8 +351,8 @@ table = try $ do
 -- | Blocks like 'p' and 'table' do not need explicit block tag.
 -- However, they can be used to set HTML/CSS attributes when needed.
 maybeExplicitBlock :: String  -- ^ block tag name
-                    -> Parser [Char] ParserState Block -- ^ implicit block
-                    -> Parser [Char] ParserState Block
+                    -> Parser [Char] ParserState Blocks -- ^ implicit block
+                    -> Parser [Char] ParserState Blocks
 maybeExplicitBlock name blk = try $ do
   optional $ try $ string name >> attributes >> char '.' >>
     optional whitespace >> optional endline
@@ -363,12 +366,14 @@ maybeExplicitBlock name blk = try $ do
 
 
 -- | Any inline element
-inline :: Parser [Char] ParserState Inline
-inline = choice inlineParsers <?> "inline"
+inline :: Parser [Char] ParserState Inlines
+inline = do
+    choice inlineParsers <?> "inline"
 
 -- | Inline parsers tried in order
-inlineParsers :: [Parser [Char] ParserState Inline]
-inlineParsers = [ str
+inlineParsers :: [Parser [Char] ParserState Inlines]
+inlineParsers = [ inlineMarkup
+                , str
                 , whitespace
                 , endline
                 , code
@@ -378,58 +383,57 @@ inlineParsers = [ str
                 , rawLaTeXInline'
                 , note
                 , try $ (char '[' *> inlineMarkup <* char ']')
-                , inlineMarkup
                 , link
                 , image
                 , mark
-                , (Str . (:[])) <$> characterReference
+                , (B.str . (:[])) <$> characterReference
                 , smartPunctuation inline
                 , symbol
                 ]
 
 -- | Inline markups
-inlineMarkup :: Parser [Char] ParserState Inline
-inlineMarkup = choice [ simpleInline (string "??") (Cite [])
-                      , simpleInline (string "**") Strong
-                      , simpleInline (string "__") Emph
-                      , simpleInline (char '*') Strong
-                      , simpleInline (char '_') Emph
-                      , simpleInline (char '+') Emph  -- approximates underline
-                      , simpleInline (char '-' <* notFollowedBy (char '-')) Strikeout
-                      , simpleInline (char '^') Superscript
-                      , simpleInline (char '~') Subscript
+inlineMarkup :: Parser [Char] ParserState Inlines
+inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
+                      , simpleInline (string "**") B.strong
+                      , simpleInline (string "__") B.emph
+                      , simpleInline (char '*') B.strong
+                      , simpleInline (char '_') B.emph
+                      , simpleInline (char '+') B.emph  -- approximates underline
+                      , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
+                      , simpleInline (char '^') B.superscript
+                      , simpleInline (char '~') B.subscript
                       ]
 
 -- | Trademark, registered, copyright
-mark :: Parser [Char] st Inline
+mark :: Parser [Char] st Inlines
 mark = try $ char '(' >> (try tm <|> try reg <|> copy)
 
-reg :: Parser [Char] st Inline
+reg :: Parser [Char] st Inlines
 reg = do
   oneOf "Rr"
   char ')'
-  return $ Str "\174"
+  return $ B.str "\174"
 
-tm :: Parser [Char] st Inline
+tm :: Parser [Char] st Inlines
 tm = do
   oneOf "Tt"
   oneOf "Mm"
   char ')'
-  return $ Str "\8482"
+  return $ B.str "\8482"
 
-copy :: Parser [Char] st Inline
+copy :: Parser [Char] st Inlines
 copy = do
   oneOf "Cc"
   char ')'
-  return $ Str "\169"
+  return $ B.str "\169"
 
-note :: Parser [Char] ParserState Inline
+note :: Parser [Char] ParserState Inlines
 note = try $ do
   ref <- (char '[' *> many1 digit <* char ']')
   notes <- stateNotes <$> getState
   case lookup ref notes of
     Nothing   -> fail "note not found"
-    Just raw  -> liftM Note $ parseFromString parseBlocks raw
+    Just raw  -> B.note <$> parseFromString parseBlocks raw
 
 -- | Special chars
 markupChars :: [Char]
@@ -450,7 +454,7 @@ wordBoundaries = markupChars ++ stringBreakers
 hyphenedWords :: Parser [Char] ParserState String
 hyphenedWords = do
   x <- wordChunk
-  xs <- many (try $ char '-' >> wordChunk)
+  xs <-  many (try $ char '-' >> wordChunk)
   return $ intercalate "-" (x:xs)
 
 wordChunk :: Parser [Char] ParserState String
@@ -462,7 +466,7 @@ wordChunk = try $ do
   return $ hd:tl
 
 -- | Any string
-str :: Parser [Char] ParserState Inline
+str :: Parser [Char] ParserState Inlines
 str = do
   baseStr <- hyphenedWords
   -- RedCloth compliance : if parsed word is uppercase and immediatly
@@ -472,89 +476,89 @@ str = do
     acro <- enclosed (char '(') (char ')') anyChar
     return $ concat [baseStr, " (", acro, ")"]
   updateLastStrPos
-  return $ Str fullStr
+  return $ B.str fullStr
 
 -- | Textile allows HTML span infos, we discard them
-htmlSpan :: Parser [Char] ParserState Inline
-htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
+htmlSpan :: Parser [Char] ParserState Inlines
+htmlSpan = try $ B.str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
 
 -- | Some number of space chars
-whitespace :: Parser [Char] ParserState Inline
-whitespace = many1 spaceChar >> return Space <?> "whitespace"
+whitespace :: Parser [Char] ParserState Inlines
+whitespace = many1 spaceChar >> return B.space <?> "whitespace"
 
 -- | In Textile, an isolated endline character is a line break
-endline :: Parser [Char] ParserState Inline
+endline :: Parser [Char] ParserState Inlines
 endline = try $ do
   newline >> notFollowedBy blankline
-  return LineBreak
+  return B.linebreak
 
-rawHtmlInline :: Parser [Char] ParserState Inline
-rawHtmlInline = RawInline (Format "html") . snd <$> htmlTag isInlineTag
+rawHtmlInline :: Parser [Char] ParserState Inlines
+rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
-rawLaTeXInline' :: Parser [Char] ParserState Inline
+rawLaTeXInline' :: Parser [Char] ParserState Inlines
 rawLaTeXInline' = try $ do
   guardEnabled Ext_raw_tex
-  rawLaTeXInline
+  B.singleton <$> rawLaTeXInline
 
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
-link :: Parser [Char] ParserState Inline
+link :: Parser [Char] ParserState Inlines
 link = linkB <|> linkNoB
 
-linkNoB :: Parser [Char] ParserState Inline
+linkNoB :: Parser [Char] ParserState Inlines
 linkNoB = try $ do
-  name <- surrounded (char '"') inline
+  name <- mconcat <$> surrounded (char '"') (withQuoteContext InDoubleQuote inline)
   char ':'
   let stopChars = "!.,;:"
   url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline)))
-  let name' = if name == [Str "$"] then [Str url] else name
-  return $ Link name' (url, "")
+  let name' = if B.toList name == [Str "$"] then B.str url else name
+  return $ B.link url "" name'
 
-linkB :: Parser [Char] ParserState Inline
+linkB :: Parser [Char] ParserState Inlines
 linkB = try $ do
   char '['
-  name <- surrounded (char '"') inline
+  name <- mconcat <$> surrounded (char '"') inline
   char ':'
   url <- manyTill nonspaceChar (char ']')
-  let name' = if name == [Str "$"] then [Str url] else name
-  return $ Link name' (url, "")
+  let name' = if B.toList name == [Str "$"] then B.str url else name
+  return $ B.link url "" name'
 
 -- | image embedding
-image :: Parser [Char] ParserState Inline
+image :: Parser [Char] ParserState Inlines
 image = try $ do
   char '!' >> notFollowedBy space
   src <- manyTill anyChar (lookAhead $ oneOf "!(")
   alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')')))
   char '!'
-  return $ Image [Str alt] (src, alt)
+  return $ B.image src alt (B.str alt)
 
-escapedInline :: Parser [Char] ParserState Inline
+escapedInline :: Parser [Char] ParserState Inlines
 escapedInline = escapedEqs <|> escapedTag
 
-escapedEqs :: Parser [Char] ParserState Inline
-escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
+escapedEqs :: Parser [Char] ParserState Inlines
+escapedEqs = B.str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
 
 -- | literal text escaped btw <notextile> tags
-escapedTag :: Parser [Char] ParserState Inline
-escapedTag = Str <$>
+escapedTag :: Parser [Char] ParserState Inlines
+escapedTag = B.str <$>
   (try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>"))
 
 -- | Any special symbol defined in wordBoundaries
-symbol :: Parser [Char] ParserState Inline
-symbol = Str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
+symbol :: Parser [Char] ParserState Inlines
+symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
 
 -- | Inline code
-code :: Parser [Char] ParserState Inline
+code :: Parser [Char] ParserState Inlines
 code = code1 <|> code2
 
-code1 :: Parser [Char] ParserState Inline
-code1 = Code nullAttr <$> surrounded (char '@') anyChar
+code1 :: Parser [Char] ParserState Inlines
+code1 = B.code <$> surrounded (char '@') anyChar
 
-code2 :: Parser [Char] ParserState Inline
+code2 :: Parser [Char] ParserState Inlines
 code2 = do
   htmlTag (tagOpen (=="tt") null)
-  Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
+  B.code <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
 
 -- | Html / CSS attributes
 attributes :: Parser [Char] ParserState Attr
@@ -581,7 +585,7 @@ styleAttr = do
 
 langAttr :: Parser [Char] ParserState (Attr -> Attr)
 langAttr = do
-  lang <- try $ enclosed (char '[') (char ']') anyChar
+  lang <- try $ enclosed (char '[') (char ']') alphaNum
   return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
 
 -- | Parses material surrounded by a parser.
@@ -590,14 +594,43 @@ surrounded :: Parser [Char] st t   -- ^ surrounding parser
 	    -> Parser [Char] st [a]
 surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
--- | Inlines are most of the time of the same form
+
 simpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
-                -> ([Inline] -> Inline)       -- ^ Inline constructor
-                -> Parser [Char] ParserState Inline   -- ^ content parser (to be used repeatedly)
-simpleInline border construct = surrounded border inlineWithAttribute >>=
-                                return . construct . normalizeSpaces
-  where inlineWithAttribute = (try $ optional attributes) >> inline
+                -> (Inlines -> Inlines)       -- ^ Inline constructor
+                -> Parser [Char] ParserState Inlines   -- ^ content parser (to be used repeatedly)
+simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct 
+
+ungroupedSimpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
+                -> (Inlines -> Inlines)       -- ^ Inline constructor
+                -> Parser [Char] ParserState Inlines   -- ^ content parser (to be used repeatedly)
+ungroupedSimpleInline border construct = try $ do
+  st <- getState
+  pos <- getPosition
+  isWhitespace <- option False (whitespace >> return True)
+  guard $ (stateQuoteContext st /= NoQuote) 
+       || (sourceColumn pos == 1) 
+       || isWhitespace
+  body <- surrounded border inlineWithAttribute
+  lookAhead (notFollowedBy alphaNum)
+  let result = construct $ mconcat body 
+  return $ if isWhitespace then B.space <> result 
+                           else result
+  where 
+    inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n") 
+        >> (withQuoteContext InSingleQuote inline)
+
+
+groupedSimpleInline :: Parser [Char] ParserState t 
+                  -> (Inlines -> Inlines)
+                  -> Parser [Char] ParserState Inlines 
+groupedSimpleInline border construct = try $ do
+    char '['  
+    withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']'
+
+    
+
 
 -- | Create a singleton list
 singleton :: a -> [a]
 singleton x = [x]
+
-- 
cgit v1.2.3


From 9b5d474e79c0b508ac0da9943b9bb385671aad85 Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Thu, 27 Mar 2014 19:53:32 +0000
Subject: Converted HTML reader to use builder. Fixes #1162.

---
 src/Text/Pandoc/Readers/HTML.hs | 235 +++++++++++++++++++++-------------------
 1 file changed, 126 insertions(+), 109 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index d1e4d0024..4fab251bb 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -40,6 +40,7 @@ import Text.HTML.TagSoup
 import Text.HTML.TagSoup.Match
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
 import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
@@ -48,6 +49,8 @@ import Data.List ( intercalate )
 import Data.Char ( isDigit )
 import Control.Monad ( liftM, guard, when, mzero )
 import Control.Applicative ( (<$>), (<$), (<*) )
+import Data.Monoid
+import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl)
 
 isSpace :: Char -> Bool
 isSpace ' '  = True
@@ -66,30 +69,30 @@ readHtml opts inp =
     where tags = canonicalizeTags $
                    parseTagsOptions parseOptions{ optTagPosition = True } inp
           parseDoc = do
-             blocks <- (fixPlains False . concat) <$> manyTill block eof
+             blocks <- (fixPlains False) . mconcat <$> manyTill block eof
              meta <- stateMeta <$> getState
-             return $ Pandoc meta blocks
+             return $ Pandoc meta (B.toList blocks)
 
 type TagParser = Parser [Tag String] ParserState
 
-pBody :: TagParser [Block]
+pBody :: TagParser Blocks
 pBody = pInTags "body" block
 
-pHead :: TagParser [Block]
-pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag)
-  where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces
-        setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t))
+pHead :: TagParser Blocks
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag)
+  where pTitle = pInTags "title" inline >>= setTitle . trimInlines
+        setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
         pMetaTag = do
           mt <- pSatisfy (~== TagOpen "meta" [])
           let name = fromAttrib "name" mt
           if null name
-             then return []
+             then return mempty
              else do
                let content = fromAttrib "content" mt
                updateState $ B.setMeta name (B.text content)
-               return []
+               return mempty
 
-block :: TagParser [Block]
+block :: TagParser Blocks
 block = choice
             [ pPara
             , pHeader
@@ -105,10 +108,10 @@ block = choice
             , pRawHtmlBlock
             ]
 
-pList :: TagParser [Block]
+pList :: TagParser Blocks
 pList = pBulletList <|> pOrderedList <|> pDefinitionList
 
-pBulletList :: TagParser [Block]
+pBulletList :: TagParser Blocks
 pBulletList = try $ do
   pSatisfy (~== TagOpen "ul" [])
   let nonItem = pSatisfy (\t ->
@@ -118,9 +121,9 @@ pBulletList = try $ do
   -- treat it as a list item, though it's not valid xhtml...
   skipMany nonItem
   items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul")
-  return [BulletList $ map (fixPlains True) items]
+  return $ B.bulletList $ map (fixPlains True) items
 
-pOrderedList :: TagParser [Block]
+pOrderedList :: TagParser Blocks
 pOrderedList = try $ do
   TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
   let (start, style) = (sta', sty')
@@ -146,27 +149,27 @@ pOrderedList = try $ do
   -- treat it as a list item, though it's not valid xhtml...
   skipMany nonItem
   items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol")
-  return [OrderedList (start, style, DefaultDelim) $ map (fixPlains True) items]
+  return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
 
-pDefinitionList :: TagParser [Block]
+pDefinitionList :: TagParser Blocks
 pDefinitionList = try $ do
   pSatisfy (~== TagOpen "dl" [])
   items <- manyTill pDefListItem (pCloses "dl")
-  return [DefinitionList items]
+  return $ B.definitionList items
 
-pDefListItem :: TagParser ([Inline],[[Block]])
+pDefListItem :: TagParser (Inlines, [Blocks])
 pDefListItem = try $ do
   let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) &&
                   not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl"))
   terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
   defs  <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
   skipMany nonItem
-  let term = intercalate [LineBreak] terms
+  let term = foldl1 (\x y ->  x <> B.linebreak <> y) terms
   return (term, map (fixPlains True) defs)
 
-fixPlains :: Bool -> [Block] -> [Block]
-fixPlains inList bs = if any isParaish bs
-                         then map plainToPara bs
+fixPlains :: Bool -> Blocks -> Blocks
+fixPlains inList bs = if any isParaish bs'
+                         then B.fromList $ map plainToPara bs'
                          else bs
   where isParaish (Para _) = True
         isParaish (CodeBlock _ _) = True
@@ -178,6 +181,7 @@ fixPlains inList bs = if any isParaish bs
         isParaish _        = False
         plainToPara (Plain xs) = Para xs
         plainToPara x = x
+        bs' = B.toList bs
 
 pRawTag :: TagParser String
 pRawTag = do
@@ -187,20 +191,20 @@ pRawTag = do
      then return []
      else return $ renderTags' [tag]
 
-pDiv :: TagParser [Block]
+pDiv :: TagParser Blocks
 pDiv = try $ do
   getOption readerParseRaw >>= guard
   TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
   contents <- pInTags "div" block
-  return [Div (mkAttr attr) contents]
+  return $ B.divWith (mkAttr attr) contents
 
-pRawHtmlBlock :: TagParser [Block]
+pRawHtmlBlock :: TagParser Blocks
 pRawHtmlBlock = do
   raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
   parseRaw <- getOption readerParseRaw
   if parseRaw && not (null raw)
-     then return [RawBlock (Format "html") raw]
-     else return []
+     then return $ B.rawBlock "html" raw
+     else return mempty
 
 pHtmlBlock :: String -> TagParser String
 pHtmlBlock t = try $ do
@@ -208,35 +212,34 @@ pHtmlBlock t = try $ do
   contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
   return $ renderTags' $ [open] ++ contents ++ [TagClose t]
 
-pHeader :: TagParser [Block]
+pHeader :: TagParser Blocks
 pHeader = try $ do
   TagOpen tagtype attr <- pSatisfy $
                            tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
                            (const True)
   let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
   let level = read (drop 1 tagtype)
-  contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof)
+  contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
   let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
   return $ if bodyTitle
-              then []  -- skip a representation of the title in the body
-              else [Header level (ident, classes, keyvals) $
-                    normalizeSpaces contents]
+              then mempty  -- skip a representation of the title in the body
+              else B.headerWith (ident, classes, keyvals) level contents
 
-pHrule :: TagParser [Block]
+pHrule :: TagParser Blocks
 pHrule = do
   pSelfClosing (=="hr") (const True)
-  return [HorizontalRule]
+  return B.horizontalRule
 
-pTable :: TagParser [Block]
+pTable :: TagParser Blocks
 pTable = try $ do
   TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
   skipMany pBlank
-  caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank
+  caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank
   -- TODO actually read these and take width information from them
   widths' <- pColgroup <|> many pCol
-  head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
+  head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
   skipMany pBlank
   rows <- pOptInTag "tbody"
           $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
@@ -245,19 +248,21 @@ pTable = try $ do
   let isSinglePlain []        = True
       isSinglePlain [Plain _] = True
       isSinglePlain _         = False
-  let isSimple = all isSinglePlain $ concat (head':rows)
-  let cols = length $ if null head'
-                         then head rows
-                         else head'
+  let lHead = B.toList head'
+  let lRows = map B.toList rows
+  let isSimple = all isSinglePlain (lHead:lRows)
+  let cols = length $ if null lHead
+                         then head lRows
+                         else lHead
   -- fail if there are colspans or rowspans
-  guard $ all (\r -> length r == cols) rows
+  guard $ all (\r -> length r == cols) lRows
   let aligns = replicate cols AlignLeft
   let widths = if null widths'
                   then if isSimple
                        then replicate cols 0
                        else replicate cols (1.0 / fromIntegral cols)
                   else widths'
-  return [Table caption aligns widths head' rows]
+  return $ B.table caption (zip aligns widths) [head'] [rows]
 
 pCol :: TagParser Double
 pCol = try $ do
@@ -275,31 +280,31 @@ pColgroup = try $ do
   skipMany pBlank
   manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
 
-pCell :: String -> TagParser [TableCell]
+pCell :: String -> TagParser Blocks
 pCell celltype = try $ do
   skipMany pBlank
   res <- pInTags celltype block
   skipMany pBlank
-  return [res]
+  return res
 
-pBlockQuote :: TagParser [Block]
+pBlockQuote :: TagParser Blocks
 pBlockQuote = do
   contents <- pInTags "blockquote" block
-  return [BlockQuote $ fixPlains False contents]
+  return $ B.blockQuote $ fixPlains False contents
 
-pPlain :: TagParser [Block]
+pPlain :: TagParser Blocks
 pPlain = do
-  contents <- liftM (normalizeSpaces . concat) $ many1 inline
-  if null contents
-     then return []
-     else return [Plain contents]
+  contents <- trimInlines . mconcat <$> many1 inline
+  if B.isNull contents
+     then return mempty
+     else return $ B.plain contents
 
-pPara :: TagParser [Block]
+pPara :: TagParser Blocks
 pPara = do
-  contents <- pInTags "p" inline
-  return [Para $ normalizeSpaces contents]
+  contents <- trimInlines <$> pInTags "p" inline
+  return $ B.para contents
 
-pCodeBlock :: TagParser [Block]
+pCodeBlock :: TagParser Blocks
 pCodeBlock = try $ do
   TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
   contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
@@ -312,9 +317,9 @@ pCodeBlock = try $ do
   let result = case reverse result' of
                     '\n':_   -> init result'
                     _        -> result'
-  return [CodeBlock (mkAttr attr) result]
+  return $ B.codeBlockWith (mkAttr attr) result
 
-inline :: TagParser [Inline]
+inline :: TagParser Inlines
 inline = choice
            [ pTagText
            , pQ
@@ -354,7 +359,7 @@ pSelfClosing f g = do
   optional $ pSatisfy (tagClose f)
   return open
 
-pQ :: TagParser [Inline]
+pQ :: TagParser Inlines
 pQ = do
   quoteContext <- stateQuoteContext `fmap` getState
   let quoteType = case quoteContext of
@@ -363,82 +368,93 @@ pQ = do
   let innerQuoteContext = if quoteType == SingleQuote
                              then InSingleQuote
                              else InDoubleQuote
-  withQuoteContext innerQuoteContext $ pInlinesInTags "q" (Quoted quoteType)
+  let constructor = case quoteType of
+                            SingleQuote -> B.singleQuoted 
+                            DoubleQuote -> B.doubleQuoted
+  withQuoteContext innerQuoteContext $ 
+    pInlinesInTags "q" constructor
 
-pEmph :: TagParser [Inline]
-pEmph = pInlinesInTags "em" Emph <|> pInlinesInTags "i" Emph
+pEmph :: TagParser Inlines
+pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
 
-pStrong :: TagParser [Inline]
-pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong
+pStrong :: TagParser Inlines
+pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong
 
-pSuperscript :: TagParser [Inline]
-pSuperscript = pInlinesInTags "sup" Superscript
+pSuperscript :: TagParser Inlines
+pSuperscript = pInlinesInTags "sup" B.superscript
 
-pSubscript :: TagParser [Inline]
-pSubscript = pInlinesInTags "sub" Subscript
+pSubscript :: TagParser Inlines
+pSubscript = pInlinesInTags "sub" B.subscript
 
-pStrikeout :: TagParser [Inline]
+pStrikeout :: TagParser Inlines
 pStrikeout = do
-  pInlinesInTags "s" Strikeout <|>
-    pInlinesInTags "strike" Strikeout <|>
-    pInlinesInTags "del" Strikeout <|>
+  pInlinesInTags "s" B.strikeout <|>
+    pInlinesInTags "strike" B.strikeout <|>
+    pInlinesInTags "del" B.strikeout <|>
     try (do pSatisfy (~== TagOpen "span" [("class","strikeout")])
-            contents <- liftM concat $ manyTill inline (pCloses "span")
-            return [Strikeout contents])
+            contents <- mconcat <$> manyTill inline (pCloses "span")
+            return $ B.strikeout contents)
 
-pLineBreak :: TagParser [Inline]
+pLineBreak :: TagParser Inlines
 pLineBreak = do
   pSelfClosing (=="br") (const True)
-  return [LineBreak]
+  return B.linebreak
 
-pLink :: TagParser [Inline]
+pLink :: TagParser Inlines
 pLink = try $ do
   tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
   let url = fromAttrib "href" tag
   let title = fromAttrib "title" tag
-  lab <- liftM concat $ manyTill inline (pCloses "a")
-  return [Link (normalizeSpaces lab) (escapeURI url, title)]
+  lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
+  return $ B.link (escapeURI url) title lab 
 
-pImage :: TagParser [Inline]
+pImage :: TagParser Inlines
 pImage = do
   tag <- pSelfClosing (=="img") (isJust . lookup "src")
   let url = fromAttrib "src" tag
   let title = fromAttrib "title" tag
   let alt = fromAttrib "alt" tag
-  return [Image (B.toList $ B.text alt) (escapeURI url, title)]
+  return $ B.image (escapeURI url) title (B.text alt)
 
-pCode :: TagParser [Inline]
+pCode :: TagParser Inlines
 pCode = try $ do
   (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
   result <- manyTill pAnyTag (pCloses open)
-  return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result]
+  return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result
 
-pSpan :: TagParser [Inline]
+pSpan :: TagParser Inlines
 pSpan = try $ do
   getOption readerParseRaw >>= guard
   TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
   contents <- pInTags "span" inline
-  return [Span (mkAttr attr) contents]
+  return $ B.spanWith (mkAttr attr) contents
 
-pRawHtmlInline :: TagParser [Inline]
+pRawHtmlInline :: TagParser Inlines
 pRawHtmlInline = do
   result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag
   parseRaw <- getOption readerParseRaw
   if parseRaw
-     then return [RawInline (Format "html") $ renderTags' [result]]
-     else return []
+     then return $ B.rawInline "html" $ renderTags' [result]
+     else return mempty
 
-pInlinesInTags :: String -> ([Inline] -> Inline)
-               -> TagParser [Inline]
+pInlinesInTags :: String -> (Inlines -> Inlines)
+               -> TagParser Inlines
 pInlinesInTags tagtype f = do
-  contents <- pInTags tagtype inline
-  return [f $ normalizeSpaces contents]
-
-pInTags :: String -> TagParser [a]
-        -> TagParser [a]
+  contents <- B.unMany <$> pInTags tagtype inline
+  let left  = case viewl contents of
+                    EmptyL -> mempty
+                    (a :< _) -> padSpace a
+  let right = case viewr contents of
+                    EmptyR   -> mempty
+                    (_ :> a) -> padSpace a
+  return (left <> f (trimInlines . B.Many $ contents) <> right)
+  where padSpace a = if a == Space then B.space else mempty
+
+pInTags :: (Monoid a) => String -> TagParser a
+        -> TagParser a
 pInTags tagtype parser = try $ do
   pSatisfy (~== TagOpen tagtype [])
-  liftM concat $ manyTill parser (pCloses tagtype <|> eof)
+  mconcat <$> manyTill parser (pCloses tagtype <|> eof)
 
 pOptInTag :: String -> TagParser a
           -> TagParser a
@@ -461,36 +477,36 @@ pCloses tagtype = try $ do
        (TagClose "dl") | tagtype == "li" -> return ()
        _ -> mzero
 
-pTagText :: TagParser [Inline]
+pTagText :: TagParser Inlines
 pTagText = try $ do
   (TagText str) <- pSatisfy isTagText
   st <- getState
   case runParser (many pTagContents) st "text" str of
        Left _        -> fail $ "Could not parse `" ++ str ++ "'"
-       Right result  -> return result
+       Right result  -> return $ mconcat result
 
 pBlank :: TagParser ()
 pBlank = try $ do
   (TagText str) <- pSatisfy isTagText
   guard $ all isSpace str
 
-pTagContents :: Parser [Char] ParserState Inline
+pTagContents :: Parser [Char] ParserState Inlines
 pTagContents =
-      Math DisplayMath `fmap` mathDisplay
-  <|> Math InlineMath  `fmap` mathInline
+      B.displayMath <$> mathDisplay
+  <|> B.math        <$> mathInline
   <|> pStr
   <|> pSpace
   <|> smartPunctuation pTagContents
   <|> pSymbol
   <|> pBad
 
-pStr :: Parser [Char] ParserState Inline
+pStr :: Parser [Char] ParserState Inlines
 pStr = do
   result <- many1 $ satisfy $ \c ->
                      not (isSpace c) && not (isSpecial c) && not (isBad c)
   pos <- getPosition
   updateState $ \s -> s{ stateLastStrPos = Just pos }
-  return $ Str result
+  return $ B.str result
 
 isSpecial :: Char -> Bool
 isSpecial '"' = True
@@ -504,13 +520,13 @@ isSpecial '\8220' = True
 isSpecial '\8221' = True
 isSpecial _ = False
 
-pSymbol :: Parser [Char] ParserState Inline
-pSymbol = satisfy isSpecial >>= return . Str . (:[])
+pSymbol :: Parser [Char] ParserState Inlines
+pSymbol = satisfy isSpecial >>= return . B.str . (:[])
 
 isBad :: Char -> Bool
 isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
 
-pBad :: Parser [Char] ParserState Inline
+pBad :: Parser [Char] ParserState Inlines
 pBad = do
   c <- satisfy isBad
   let c' = case c of
@@ -542,10 +558,10 @@ pBad = do
                 '\158' -> '\382'
                 '\159' -> '\376'
                 _      -> '?'
-  return $ Str [c']
+  return $ B.str [c']
 
-pSpace :: Parser [Char] ParserState Inline
-pSpace = many1 (satisfy isSpace) >> return Space
+pSpace :: Parser [Char] ParserState Inlines
+pSpace = many1 (satisfy isSpace) >> return B.space
 
 --
 -- Constants
@@ -679,3 +695,4 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV)
         attribsClasses = words $ fromMaybe "" $ lookup "class" attr
         attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
 
+
-- 
cgit v1.2.3


From 5a51a67abda59c177f3a6d0f6cba59d41e866287 Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Thu, 27 Mar 2014 19:56:47 +0000
Subject: Changed the smart punctuation parser to return Inlines rather than an
 Inline element and updated files accordingly

---
 src/Text/Pandoc/Parsing.hs          | 43 ++++++++++++++++++-------------------
 src/Text/Pandoc/Readers/Markdown.hs |  2 +-
 src/Text/Pandoc/Readers/RST.hs      |  2 +-
 src/Text/Pandoc/Readers/Textile.hs  |  4 +---
 4 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 68d4605ee..a9009eaa2 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -997,17 +997,17 @@ registerHeader (ident,classes,kvs) header' = do
 failUnlessSmart :: HasReaderOptions st => Parser s st ()
 failUnlessSmart = getOption readerSmart >>= guard
 
-smartPunctuation :: Parser [Char] ParserState Inline
-                 -> Parser [Char] ParserState Inline
+smartPunctuation :: Parser [Char] ParserState Inlines
+                 -> Parser [Char] ParserState Inlines
 smartPunctuation inlineParser = do
   failUnlessSmart
   choice [ quoted inlineParser, apostrophe, dash, ellipses ]
 
-apostrophe :: Parser [Char] ParserState Inline
-apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019")
+apostrophe :: Parser [Char] ParserState Inlines
+apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
 
-quoted :: Parser [Char] ParserState Inline
-       -> Parser [Char] ParserState Inline
+quoted :: Parser [Char] ParserState Inlines
+       -> Parser [Char] ParserState Inlines
 quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 
 withQuoteContext :: QuoteContext
@@ -1022,20 +1022,19 @@ withQuoteContext context parser = do
   setState newState { stateQuoteContext = oldQuoteContext }
   return result
 
-singleQuoted :: Parser [Char] ParserState Inline
-             -> Parser [Char] ParserState Inline
+singleQuoted :: Parser [Char] ParserState Inlines
+             -> Parser [Char] ParserState Inlines
 singleQuoted inlineParser = try $ do
   singleQuoteStart
   withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
-    return . Quoted SingleQuote . normalizeSpaces
+    return . B.singleQuoted . mconcat
 
-doubleQuoted :: Parser [Char] ParserState Inline
-             -> Parser [Char] ParserState Inline
+doubleQuoted :: Parser [Char] ParserState Inlines
+             -> Parser [Char] ParserState Inlines
 doubleQuoted inlineParser = try $ do
   doubleQuoteStart
-  withQuoteContext InDoubleQuote $ do
-    contents <- manyTill inlineParser doubleQuoteEnd
-    return . Quoted DoubleQuote . normalizeSpaces $ contents
+  withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
+    return . B.doubleQuoted . mconcat
 
 failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState ()
 failIfInQuoteContext context = do
@@ -1079,17 +1078,17 @@ doubleQuoteEnd = do
   charOrRef "\"\8221\148"
   return ()
 
-ellipses :: Parser [Char] st Inline
+ellipses :: Parser [Char] st Inlines
 ellipses = do
   try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
-  return (Str "\8230")
+  return (B.str "\8230")
 
-dash :: Parser [Char] ParserState Inline
+dash :: Parser [Char] ParserState Inlines
 dash = do
   oldDashes <- getOption readerOldDashes
   if oldDashes
      then emDashOld <|> enDashOld
-     else Str `fmap` (hyphenDash <|> emDash <|> enDash)
+     else B.str `fmap` (hyphenDash <|> emDash <|> enDash)
 
 -- Two hyphens = en-dash, three = em-dash
 hyphenDash :: Parser [Char] st String
@@ -1107,16 +1106,16 @@ enDash = do
   try (charOrRef "\8212\151")
   return "\8211"
 
-enDashOld :: Parser [Char] st Inline
+enDashOld :: Parser [Char] st Inlines
 enDashOld = do
   try (charOrRef "\8211\150") <|>
     try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
-  return (Str "\8211")
+  return (B.str "\8211")
 
-emDashOld :: Parser [Char] st Inline
+emDashOld :: Parser [Char] st Inlines
 emDashOld = do
   try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
-  return (Str "\8212")
+  return (B.str "\8212")
 
 -- This is used to prevent exponential blowups for things like:
 -- a**a*a**a*a**a*a**a*a**a*a**a*a**
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index aa0252266..57e1ca560 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1873,7 +1873,7 @@ smart :: MarkdownParser (F Inlines)
 smart = do
   getOption readerSmart >>= guard
   doubleQuoted <|> singleQuoted <|>
-    choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses])
+    choice (map (return <$>) [apostrophe, dash, ellipses])
 
 singleQuoted :: MarkdownParser (F Inlines)
 singleQuoted = try $ do
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 127eae167..a574f343a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1140,7 +1140,7 @@ smart :: RSTParser Inlines
 smart = do
   getOption readerSmart >>= guard
   doubleQuoted <|> singleQuoted <|>
-    choice (map (B.singleton <$>) [apostrophe, dash, ellipses])
+    choice [apostrophe, dash, ellipses]
 
 singleQuoted :: RSTParser Inlines
 singleQuoted = try $ do
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index ede50c6de..c6f992275 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -306,9 +306,7 @@ rawLaTeXBlock' = do
 
 -- | In textile, paragraphs are separated by blank lines.
 para :: Parser [Char] ParserState Blocks
-para = do
-    a <- manyTill inline blockBreak 
-    return $ (B.para . trimInlines . mconcat) a
+para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak
 
 -- Tables
 
-- 
cgit v1.2.3