RST reader: Use Text.Pandoc.Builder.

This will give us more flexibility in the future. It also gives built-in normalization. Performance slightly better.
author: John MacFarlane <jgm@berkeley.edu> 2012-09-27 16:45:06 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2012-09-27 17:52:48 -0700
commit: ab17faf49709bca8a864d1b80f8c8456865fef0c (patch)
tree: c2e85a41cf0eef84907e59c2ed5b81261e78a501 /src/Text
parent: 5c06322ab2cc6707ec1e00f9b6c17283cd0fb347 (diff)
download: pandoc-ab17faf49709bca8a864d1b80f8c8456865fef0c.tar.gz
1 files changed, 180 insertions, 174 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 90f222aa4..0ef2dbc4f 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -35,11 +35,14 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Parsing
 import Text.Pandoc.Options
 import Control.Monad ( when, liftM, guard, mzero )
-import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
+import Data.List ( findIndex, intersperse, intercalate, transpose, sort, deleteFirstsBy )
 import qualified Data.Map as M
 import Text.Printf ( printf )
 import Data.Maybe ( catMaybes )
-import Control.Applicative ((<$>))
+import Control.Applicative ((<$>), (<$), (<*), (*>))
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import qualified Text.Pandoc.Builder as B
+import Data.Monoid (mconcat, mempty)
 
 -- | Parse reStructuredText string and return Pandoc document.
 readRST :: ReaderOptions -- ^ Reader options
@@ -104,26 +107,25 @@ parseRST = do
   let reversedNotes = stateNotes st'
   updateState $ \s -> s { stateNotes = reverse reversedNotes }
   -- now parse it for real...
-  blocks <- parseBlocks
-  let blocks' = filter (/= Null) blocks
+  blocks <- B.toList <$> parseBlocks
   standalone <- getOption readerStandalone
-  let (blocks'', title) = if standalone
-                              then titleTransform blocks'
-                              else (blocks', [])
+  let (blocks', title) = if standalone
+                            then titleTransform blocks
+                            else (blocks, [])
   state <- getState
   let authors = stateAuthors state
   let date = stateDate state
-  let title' = if (null title) then (stateTitle state) else title
-  return $ Pandoc (Meta title' authors date) blocks''
+  let title' = if null title then stateTitle state else title
+  return $ Pandoc (Meta title' authors date) blocks'
 
 --
 -- parsing blocks
 --
 
-parseBlocks :: Parser [Char] ParserState [Block]
-parseBlocks = manyTill block eof
+parseBlocks :: Parser [Char] ParserState Blocks
+parseBlocks = mconcat <$> manyTill block eof
 
-block :: Parser [Char] ParserState Block
+block :: Parser [Char] ParserState Blocks
 block = choice [ codeBlock
                , rawBlock
                , blockQuote
@@ -142,7 +144,7 @@ block = choice [ codeBlock
                , lhsCodeBlock
                , para
                , plain
-               , nullBlock ] <?> "block"
+               ] <?> "block"
 
 --
 -- field list
@@ -161,13 +163,13 @@ rawFieldListItem indent = try $ do
   return (name, raw)
 
 fieldListItem :: String
-              -> Parser [Char] ParserState (Maybe ([Inline], [[Block]]))
+              -> Parser [Char] ParserState (Maybe (Inlines, [Blocks]))
 fieldListItem indent = try $ do
   (name, raw) <- rawFieldListItem indent
-  let term = [Str name]
-  contents <- parseFromString (many block) raw
+  let term = B.str name
+  contents <- parseFromString parseBlocks raw
   optional blanklines
-  case (name, contents) of
+  case (name, B.toList contents) of
        ("Author", x) -> do
            updateState $ \st ->
              st{ stateAuthors = stateAuthors st ++ [extractContents x] }
@@ -188,19 +190,19 @@ extractContents [Plain auth] = auth
 extractContents [Para auth]  = auth
 extractContents _            = []
 
-fieldList :: Parser [Char] ParserState Block
+fieldList :: Parser [Char] ParserState Blocks
 fieldList = try $ do
   indent <- lookAhead $ many spaceChar
   items <- many1 $ fieldListItem indent
   if null items
-     then return Null
-     else return $ DefinitionList $ catMaybes items
+     then return mempty
+     else return $ B.definitionList $ catMaybes items
 
 --
 -- line block
 --
 
-lineBlockLine :: Parser [Char] ParserState [Inline]
+lineBlockLine :: Parser [Char] ParserState Inlines
 lineBlockLine = try $ do
   char '|'
   char ' ' <|> lookAhead (char '\n')
@@ -208,87 +210,74 @@ lineBlockLine = try $ do
   line <- many $ (notFollowedBy newline >> inline) <|> (try $ endline >>~ char ' ')
   optional endline
   return $ if null white
-              then normalizeSpaces line
-              else Str white : normalizeSpaces line
+              then mconcat line
+              else B.str white <> mconcat line
 
-lineBlock :: Parser [Char] ParserState Block
+lineBlock :: Parser [Char] ParserState Blocks
 lineBlock = try $ do
   lines' <- many1 lineBlockLine
   blanklines
-  return $ Para (intercalate [LineBreak] lines')
+  return $ B.para (mconcat $ intersperse B.linebreak lines')
 
 --
 -- paragraph block
 --
 
-para :: Parser [Char] ParserState Block
-para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
-
 codeBlockStart :: Parser [Char] st Char
 codeBlockStart = string "::" >> blankline >> blankline
 
--- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock :: Parser [Char] ParserState Block
-paraBeforeCodeBlock = try $ do
-  result <- many1 (notFollowedBy' codeBlockStart >> inline)
-  lookAhead (string "::")
-  return $ Para $ if last result == Space
-                     then normalizeSpaces result
-                     else (normalizeSpaces result) ++ [Str ":"]
-
--- regular paragraph
-paraNormal :: Parser [Char] ParserState Block
-paraNormal = try $ do
-  result <- many1 inline
-  newline
-  blanklines
-  return $ Para $ normalizeSpaces result
+-- note: paragraph can end in a :: starting a code block
+para :: Parser [Char] ParserState Blocks
+para = try $ do
+  result <- trimInlines . mconcat <$>
+                many1 (notFollowedBy' codeBlockStart >> inline)
+  (lookAhead codeBlockStart >> return (B.para $ result <> B.str ":"))
+    <|> (newline >> blanklines >> return (B.para result))
 
-plain :: Parser [Char] ParserState Block
-plain = many1 inline >>= return . Plain . normalizeSpaces
+plain :: Parser [Char] ParserState Blocks
+plain = B.plain . trimInlines . mconcat <$> many1 inline
 
 --
 -- image block
 --
 
-imageBlock :: Parser [Char] ParserState Block
+imageBlock :: Parser [Char] ParserState Blocks
 imageBlock = try $ do
   string ".. "
-  res <- imageDef [Str "image"]
-  return $ Para [res]
+  res <- imageDef (B.str "image")
+  return $ B.para res
 
-imageDef :: [Inline] -> Parser [Char] ParserState Inline
+imageDef :: Inlines -> Parser [Char] ParserState Inlines
 imageDef defaultAlt = try $ do
   string "image:: "
   src <- escapeURI . removeLeadingTrailingSpace <$> manyTill anyChar newline
   fields <- try $ do indent <- lookAhead $ many (oneOf " /t")
                      many $ rawFieldListItem indent
   optional blanklines
-  let alt = maybe defaultAlt (\x -> [Str $ removeTrailingSpace x])
+  let alt = maybe defaultAlt (\x -> B.str $ removeTrailingSpace x)
             $ lookup "alt" fields
-  let img = Image alt (src,"")
+  let img = B.image src "" alt
   return $ case lookup "target" fields of
-                 Just t  -> Link [img]
-                            (escapeURI $ removeLeadingTrailingSpace t,"")
+                 Just t  -> B.link (escapeURI $ removeLeadingTrailingSpace t)
+                              "" img
                  Nothing -> img
 
-
 --
 -- header blocks
 --
 
-header :: Parser [Char] ParserState Block
+header :: Parser [Char] ParserState Blocks
 header = doubleHeader <|> singleHeader <?> "header"
 
 -- a header with lines on top and bottom
-doubleHeader :: Parser [Char] ParserState Block
+doubleHeader :: Parser [Char] ParserState Blocks
 doubleHeader = try $ do
   c <- oneOf underlineChars
   rest <- many (char c)  -- the top line
   let lenTop = length (c:rest)
   skipSpaces
   newline
-  txt <- many1 (notFollowedBy blankline >> inline)
+  txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline)
   pos <- getPosition
   let len = (sourceColumn pos) - 1
   if (len > lenTop) then fail "title longer than border" else return ()
@@ -303,13 +292,13 @@ doubleHeader = try $ do
         Just ind -> (headerTable, ind + 1)
         Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
   setState (state { stateHeaderTable = headerTable' })
-  return $ Header level (normalizeSpaces txt)
+  return $ B.header level txt
 
 -- a header with line on the bottom only
-singleHeader :: Parser [Char] ParserState Block
+singleHeader :: Parser [Char] ParserState Blocks
 singleHeader = try $ do
   notFollowedBy' whitespace
-  txt <- many1 (do {notFollowedBy blankline; inline})
+  txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline})
   pos <- getPosition
   let len = (sourceColumn pos) - 1
   blankline
@@ -323,20 +312,20 @@ singleHeader = try $ do
         Just ind -> (headerTable, ind + 1)
         Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
   setState (state { stateHeaderTable = headerTable' })
-  return $ Header level (normalizeSpaces txt)
+  return $ B.header level txt
 
 --
 -- hrule block
 --
 
-hrule :: Parser [Char] st Block
+hrule :: Parser [Char] st Blocks
 hrule = try $ do
   chr <- oneOf underlineChars
   count 3 (char chr)
   skipMany (char chr)
   blankline
   blanklines
-  return HorizontalRule
+  return B.horizontalRule
 
 --
 -- code blocks
@@ -359,49 +348,49 @@ indentedBlock = try $ do
   optional blanklines
   return $ unlines lns
 
-codeBlock :: Parser [Char] st Block
+codeBlock :: Parser [Char] st Blocks
 codeBlock = try $ do
   codeBlockStart
   result <- indentedBlock
-  return $ CodeBlock ("",[],[]) $ stripTrailingNewlines result
+  return $ B.codeBlock $ stripTrailingNewlines result
 
 -- | The 'code-block' directive (from Sphinx) that allows a language to be
 -- specified.
-customCodeBlock :: Parser [Char] st Block
+customCodeBlock :: Parser [Char] st Blocks
 customCodeBlock = try $ do
   string ".. code-block:: "
   language <- manyTill anyChar newline
   blanklines
   result <- indentedBlock
-  return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result
-
+  return $ B.codeBlockWith ("", ["sourceCode", language], [])
+         $ stripTrailingNewlines result
 
-figureBlock :: Parser [Char] ParserState Block
+figureBlock :: Parser [Char] ParserState Blocks
 figureBlock = try $ do
   string ".. figure::"
-  src <- removeLeadingTrailingSpace `fmap` manyTill anyChar newline
+  src <- escapeURI . removeLeadingTrailingSpace <$> manyTill anyChar newline
   body <- indentedBlock
   caption <- parseFromString extractCaption body
-  return $ Para [Image caption (src,"")]
+  return $ B.para $ B.image src "" caption
 
-extractCaption :: Parser [Char] ParserState [Inline]
+extractCaption :: Parser [Char] ParserState Inlines
 extractCaption = try $ do
   manyTill anyLine blanklines
-  many inline
+  trimInlines . mconcat <$> many inline
 
 -- | The 'math' directive (from Sphinx) for display math.
-mathBlock :: Parser [Char] st Block
+mathBlock :: Parser [Char] st Blocks
 mathBlock = try $ do
   string ".. math::"
   mathBlockMultiline <|> mathBlockOneLine
 
-mathBlockOneLine :: Parser [Char] st Block
+mathBlockOneLine :: Parser [Char] st Blocks
 mathBlockOneLine = try $ do
   result <- manyTill anyChar newline
   blanklines
-  return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result]
+  return $ B.para $ B.displayMath $ removeLeadingTrailingSpace result
 
-mathBlockMultiline :: Parser [Char] st Block
+mathBlockMultiline :: Parser [Char] st Blocks
 mathBlockMultiline = try $ do
   blanklines
   result <- indentedBlock
@@ -414,9 +403,9 @@ mathBlockMultiline = try $ do
   let lns' = dropWhile startsWithColon lns
   let eqs = map (removeLeadingTrailingSpace . unlines)
             $ filter (not . null) $ splitBy null lns'
-  return $ Para $ map (Math DisplayMath) eqs
+  return $ B.para $ mconcat $ map B.displayMath eqs
 
-lhsCodeBlock :: Parser [Char] ParserState Block
+lhsCodeBlock :: Parser [Char] ParserState Blocks
 lhsCodeBlock = try $ do
   guardEnabled Ext_literate_haskell
   optional codeBlockStart
@@ -428,55 +417,54 @@ lhsCodeBlock = try $ do
                 then map (drop 1) lns
                 else lns
   blanklines
-  return $ CodeBlock ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns'
+  return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], [])
+         $ intercalate "\n" lns'
 
 birdTrackLine :: Parser [Char] st [Char]
-birdTrackLine = do
-  char '>'
-  manyTill anyChar newline
+birdTrackLine = char '>' >> manyTill anyChar newline
 
 --
 -- raw html/latex/etc
 --
 
-rawBlock :: Parser [Char] st Block
+rawBlock :: Parser [Char] st Blocks
 rawBlock = try $ do
   string ".. raw:: "
   lang <- many1 (letter <|> digit)
   blanklines
   result <- indentedBlock
-  return $ RawBlock lang result
+  return $ B.rawBlock lang result
 
 --
 -- block quotes
 --
 
-blockQuote :: Parser [Char] ParserState Block
+blockQuote :: Parser [Char] ParserState Blocks
 blockQuote = do
   raw <- indentedBlock
   -- parse the extracted block, which may contain various block elements:
   contents <- parseFromString parseBlocks $ raw ++ "\n\n"
-  return $ BlockQuote contents
+  return $ B.blockQuote contents
 
 --
 -- list blocks
 --
 
-list :: Parser [Char] ParserState Block
+list :: Parser [Char] ParserState Blocks
 list = choice [ bulletList, orderedList, definitionList ] <?> "list"
 
-definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
 definitionListItem = try $ do
   -- avoid capturing a directive or comment
   notFollowedBy (try $ char '.' >> char '.')
-  term <- many1Till inline endline
+  term <- trimInlines . mconcat <$> many1Till inline endline
   raw <- indentedBlock
   -- parse the extracted block, which may contain various block elements:
   contents <- parseFromString parseBlocks $ raw ++ "\n"
-  return (normalizeSpaces term, [contents])
+  return (term, [contents])
 
-definitionList :: Parser [Char] ParserState Block
-definitionList = many1 definitionListItem >>= return . DefinitionList
+definitionList :: Parser [Char] ParserState Blocks
+definitionList = B.definitionList <$> many1 definitionListItem
 
 -- parses bullet list start and returns its length (inc. following whitespace)
 bulletListStart :: Parser [Char] st Int
@@ -531,7 +519,7 @@ listContinuation markerLength = try $ do
   return $ blanks ++ concat result
 
 listItem :: Parser [Char] ParserState Int
-         -> Parser [Char] ParserState [Block]
+         -> Parser [Char] ParserState Blocks
 listItem start = try $ do
   (markerLength, first) <- rawListItem start
   rest <- many (listContinuation markerLength)
@@ -548,22 +536,21 @@ listItem start = try $ do
   updateState (\st -> st {stateParserContext = oldContext})
   return parsed
 
-orderedList :: Parser [Char] ParserState Block
+orderedList :: Parser [Char] ParserState Blocks
 orderedList = try $ do
   (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
   items <- many1 (listItem (orderedListStart style delim))
-  let items' = compactify items
-  return $ OrderedList (start, style, delim) items'
+  let items' = compactify' items
+  return $ B.orderedListWith (start, style, delim) items'
 
-bulletList :: Parser [Char] ParserState Block
-bulletList = many1 (listItem bulletListStart) >>=
-             return . BulletList . compactify
+bulletList :: Parser [Char] ParserState Blocks
+bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
 
 --
 -- default-role block
 --
 
-defaultRoleBlock :: Parser [Char] ParserState Block
+defaultRoleBlock :: Parser [Char] ParserState Blocks
 defaultRoleBlock = try $ do
     string ".. default-role::"
     -- doesn't enforce any restrictions on the role name; embedded spaces shouldn't be allowed, for one
@@ -574,20 +561,20 @@ defaultRoleBlock = try $ do
            else role
         }
     -- skip body of the directive if it exists
-    many $ blanklines <|> (spaceChar >> manyTill anyChar newline)
-    return Null
+    skipMany $ blanklines <|> (spaceChar >> manyTill anyChar newline)
+    return mempty
 
 --
 -- unknown directive (e.g. comment)
 --
 
-unknownDirective :: Parser [Char] st Block
+unknownDirective :: Parser [Char] st Blocks
 unknownDirective = try $ do
   string ".."
   notFollowedBy (noneOf " \t\n")
   manyTill anyChar newline
   many $ blanklines <|> (spaceChar >> manyTill anyChar newline)
-  return Null
+  return mempty
 
 ---
 --- note block
@@ -625,15 +612,15 @@ noteMarker = do
 -- reference key
 --
 
-quotedReferenceName :: Parser [Char] ParserState [Inline]
+quotedReferenceName :: Parser [Char] ParserState Inlines
 quotedReferenceName = try $ do
   char '`' >> notFollowedBy (char '`') -- `` means inline code!
-  label' <- many1Till inline (char '`')
+  label' <- trimInlines . mconcat <$> many1Till inline (char '`')
   return label'
 
-unquotedReferenceName :: Parser [Char] ParserState [Inline]
+unquotedReferenceName :: Parser [Char] ParserState Inlines
 unquotedReferenceName = try $ do
-  label' <- many1Till inline (lookAhead $ char ':')
+  label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
   return label'
 
 -- Simple reference names are single words consisting of alphanumerics
@@ -647,12 +634,12 @@ simpleReferenceName' = do
             <|> (try $ oneOf "-_:+." >> lookAhead alphaNum)
   return (x:xs)
 
-simpleReferenceName :: Parser [Char] st [Inline]
+simpleReferenceName :: Parser [Char] st Inlines
 simpleReferenceName = do
   raw <- simpleReferenceName'
-  return [Str raw]
+  return $ B.str raw
 
-referenceName :: Parser [Char] ParserState [Inline]
+referenceName :: Parser [Char] ParserState Inlines
 referenceName = quotedReferenceName <|>
                 (try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
                 unquotedReferenceName
@@ -678,7 +665,7 @@ targetURI = do
 imageKey :: Parser [Char] ParserState ()
 imageKey = try $ do
   string ".. |"
-  (alt,ref) <- withRaw (manyTill inline (char '|'))
+  (alt,ref) <- withRaw (trimInlines . mconcat <$> manyTill inline (char '|'))
   skipSpaces
   img <- imageDef alt
   let key = toKey $ init ref
@@ -753,7 +740,7 @@ simpleTableRow indices = do
   firstLine <- simpleTableRawLine indices
   colLines  <- return [] -- TODO
   let cols = map unlines . transpose $ firstLine : colLines
-  mapM (parseFromString (many plain)) cols
+  mapM (parseFromString (B.toList . mconcat <$> many plain)) cols
 
 simpleTableSplitLine :: [Int] -> String -> [String]
 simpleTableSplitLine indices line =
@@ -775,34 +762,34 @@ simpleTableHeader headless = try $ do
   let rawHeads = if headless
                     then replicate (length dashes) ""
                     else simpleTableSplitLine indices rawContent
-  heads <- mapM (parseFromString (many plain)) $
+  heads <- mapM (parseFromString (B.toList . mconcat <$> many plain)) $
              map removeLeadingTrailingSpace rawHeads
   return (heads, aligns, indices)
 
 -- Parse a simple table.
 simpleTable :: Bool  -- ^ Headerless table
-            -> Parser [Char] ParserState Block
+            -> Parser [Char] ParserState Blocks
 simpleTable headless = do
   Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
   -- Simple tables get 0s for relative column widths (i.e., use default)
-  return $ Table c a (replicate (length a) 0) h l
+  return $ B.singleton $ Table c a (replicate (length a) 0) h l
  where
   sep = return () -- optional (simpleTableSep '-')
 
 gridTable :: Bool -- ^ Headerless table
-          -> Parser [Char] ParserState Block
-gridTable = gridTableWith parseBlocks
+          -> Parser [Char] ParserState Blocks
+gridTable headerless = B.singleton
+  <$> gridTableWith (B.toList <$> parseBlocks) headerless
 
-table :: Parser [Char] ParserState Block
+table :: Parser [Char] ParserState Blocks
 table = gridTable False <|> simpleTable False <|>
         gridTable True  <|> simpleTable True <?> "table"
 
+--
+-- inline
+--
 
- --
- -- inline
- --
-
-inline :: Parser [Char] ParserState Inline
+inline :: Parser [Char] ParserState Inlines
 inline = choice [ whitespace
                 , link
                 , str
@@ -815,36 +802,36 @@ inline = choice [ whitespace
                 , subscript
                 , math
                 , note
-                , smartPunctuation inline
+                , smart
                 , hyphens
                 , escapedChar
                 , symbol ] <?> "inline"
 
-hyphens :: Parser [Char] ParserState Inline
+hyphens :: Parser [Char] ParserState Inlines
 hyphens = do
   result <- many1 (char '-')
-  option Space endline
+  optional endline
   -- don't want to treat endline after hyphen or dash as a space
-  return $ Str result
+  return $ B.str result
 
-escapedChar :: Parser [Char] st Inline
+escapedChar :: Parser [Char] st Inlines
 escapedChar = do c <- escaped anyChar
                  return $ if c == ' '  -- '\ ' is null in RST
-                             then Str ""
-                             else Str [c]
+                             then mempty
+                             else B.str [c]
 
-symbol :: Parser [Char] ParserState Inline
+symbol :: Parser [Char] ParserState Inlines
 symbol = do
   result <- oneOf specialChars
-  return $ Str [result]
+  return $ B.str [result]
 
 -- parses inline code, between codeStart and codeEnd
-code :: Parser [Char] ParserState Inline
+code :: Parser [Char] ParserState Inlines
 code = try $ do
   string "``"
   result <- manyTill anyChar (try (string "``"))
-  return $ Code nullAttr
-         $ removeLeadingTrailingSpace $ intercalate " " $ lines result
+  return $ B.code
+         $ removeLeadingTrailingSpace $ unwords $ lines result
 
 -- succeeds only if we're not right after a str (ie. in middle of word)
 atStart :: Parser [Char] ParserState a -> Parser [Char] ParserState a
@@ -855,13 +842,13 @@ atStart p = do
   guard $ stateLastStrPos st /= Just pos
   p
 
-emph :: Parser [Char] ParserState Inline
-emph = enclosed (atStart $ char '*') (char '*') inline >>=
-       return . Emph . normalizeSpaces
+emph :: Parser [Char] ParserState Inlines
+emph = B.emph . trimInlines . mconcat <$>
+         enclosed (atStart $ char '*') (char '*') inline
 
-strong :: Parser [Char] ParserState Inline
-strong = enclosed (atStart $ string "**") (try $ string "**") inline >>=
-         return . Strong . normalizeSpaces
+strong :: Parser [Char] ParserState Inlines
+strong = B.strong . trimInlines . mconcat <$>
+          enclosed (atStart $ string "**") (try $ string "**") inline
 
 -- Parses inline interpreted text which is required to have the given role.
 -- This decision is based on the role marker (if present),
@@ -873,8 +860,8 @@ interpreted role = try $ do
      then try markedInterpretedText <|> unmarkedInterpretedText
      else     markedInterpretedText
  where
-  markedInterpretedText = try (roleMarker >> unmarkedInterpretedText)
-                          <|> (unmarkedInterpretedText >>= (\txt -> roleMarker >> return txt))
+  markedInterpretedText = try (roleMarker *> unmarkedInterpretedText)
+                          <|> (unmarkedInterpretedText <* roleMarker)
   roleMarker = string $ ":" ++ role ++ ":"
   -- Note, this doesn't precisely implement the complex rule in
   -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
@@ -883,27 +870,27 @@ interpreted role = try $ do
       result <- enclosed (atStart $ char '`') (char '`') anyChar
       return result
 
-superscript :: Parser [Char] ParserState Inline
-superscript = interpreted "sup" >>= \x -> return (Superscript [Str x])
+superscript :: Parser [Char] ParserState Inlines
+superscript = B.superscript . B.str <$> interpreted "sup"
 
-subscript :: Parser [Char] ParserState Inline
-subscript = interpreted "sub" >>= \x -> return (Subscript [Str x])
+subscript :: Parser [Char] ParserState Inlines
+subscript = B.subscript . B.str <$> interpreted "sub"
 
-math :: Parser [Char] ParserState Inline
-math = interpreted "math" >>= \x -> return (Math InlineMath x)
+math :: Parser [Char] ParserState Inlines
+math = B.math <$> interpreted "math"
 
-whitespace :: Parser [Char] ParserState Inline
-whitespace = many1 spaceChar >> return Space <?> "whitespace"
+whitespace :: Parser [Char] ParserState Inlines
+whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
 
-str :: Parser [Char] ParserState Inline
+str :: Parser [Char] ParserState Inlines
 str = do
   let strChar = noneOf ("\t\n " ++ specialChars)
   result <- many1 strChar
   updateLastStrPos
-  return $ Str result
+  return $ B.str result
 
 -- an endline character that can be treated as a space, not a structural break
-endline :: Parser [Char] ParserState Inline
+endline :: Parser [Char] ParserState Inlines
 endline = try $ do
   newline
   notFollowedBy blankline
@@ -913,28 +900,27 @@ endline = try $ do
      then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
           notFollowedBy' bulletListStart
      else return ()
-  return Space
+  return B.space
 
 --
 -- links
 --
 
-link :: Parser [Char] ParserState Inline
+link :: Parser [Char] ParserState Inlines
 link = choice [explicitLink, referenceLink, autoLink]  <?> "link"
 
-explicitLink :: Parser [Char] ParserState Inline
+explicitLink :: Parser [Char] ParserState Inlines
 explicitLink = try $ do
   char '`'
   notFollowedBy (char '`') -- `` marks start of inline code
-  label' <- manyTill (notFollowedBy (char '`') >> inline)
-                    (try (spaces >> char '<'))
+  label' <- trimInlines . mconcat <$>
+             manyTill (notFollowedBy (char '`') >> inline) (char '<')
   src <- manyTill (noneOf ">\n") (char '>')
   skipSpaces
   string "`_"
-  return $ Link (normalizeSpaces label')
-            (escapeURI $ removeLeadingTrailingSpace src, "")
+  return $ B.link (escapeURI $ removeLeadingTrailingSpace src) "" label'
 
-referenceLink :: Parser [Char] ParserState Inline
+referenceLink :: Parser [Char] ParserState Inlines
 referenceLink = try $ do
   (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~
                    char '_'
@@ -953,23 +939,23 @@ referenceLink = try $ do
                     Just target -> return target
   -- if anonymous link, remove key so it won't be used again
   when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
-  return $ Link (normalizeSpaces label') (src, tit)
+  return $ B.link src tit label'
 
-autoURI :: Parser [Char] ParserState Inline
+autoURI :: Parser [Char] ParserState Inlines
 autoURI = do
   (orig, src) <- uri
-  return $ Link [Str orig] (src, "")
+  return $ B.link src "" $ B.str orig
 
-autoEmail :: Parser [Char] ParserState Inline
+autoEmail :: Parser [Char] ParserState Inlines
 autoEmail = do
   (orig, src) <- emailAddress
-  return $ Link [Str orig] (src, "")
+  return $ B.link src "" $ B.str orig
 
-autoLink :: Parser [Char] ParserState Inline
+autoLink :: Parser [Char] ParserState Inlines
 autoLink = autoURI <|> autoEmail
 
 -- For now, we assume that all substitution references are for images.
-image :: Parser [Char] ParserState Inline
+image :: Parser [Char] ParserState Inlines
 image = try $ do
   char '|'
   (_,ref) <- withRaw (manyTill inline (char '|'))
@@ -979,7 +965,7 @@ image = try $ do
        Nothing     -> fail "no corresponding key"
        Just target -> return target
 
-note :: Parser [Char] ParserState Inline
+note :: Parser [Char] ParserState Inlines
 note = try $ do
   ref <- noteMarker
   char '_'
@@ -1000,4 +986,24 @@ note = try $ do
                         then deleteFirstsBy (==) notes [(ref,raw)]
                         else notes
       updateState $ \st -> st{ stateNotes = newnotes }
-      return $ Note contents
+      return $ B.note contents
+
+smart :: Parser [Char] ParserState Inlines
+smart = do
+  getOption readerSmart >>= guard
+  doubleQuoted <|> singleQuoted <|>
+    choice (map (B.singleton <$>) [apostrophe, dash, ellipses])
+
+singleQuoted :: Parser [Char] ParserState Inlines
+singleQuoted = try $ do
+  singleQuoteStart
+  withQuoteContext InSingleQuote $
+    B.singleQuoted . trimInlines . mconcat <$>
+      many1Till inline singleQuoteEnd
+
+doubleQuoted :: Parser [Char] ParserState Inlines
+doubleQuoted = try $ do
+  doubleQuoteStart
+  withQuoteContext InDoubleQuote $
+    B.doubleQuoted . trimInlines . mconcat <$>
+      many1Till inline doubleQuoteEnd
author	John MacFarlane <jgm@berkeley.edu>	2012-09-27 16:45:06 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2012-09-27 17:52:48 -0700
commit	ab17faf49709bca8a864d1b80f8c8456865fef0c (patch)
tree	c2e85a41cf0eef84907e59c2ed5b81261e78a501 /src/Text
parent	5c06322ab2cc6707ec1e00f9b6c17283cd0fb347 (diff)
download	pandoc-ab17faf49709bca8a864d1b80f8c8456865fef0c.tar.gz