1 files changed, 160 insertions, 82 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index a6d3cd46a..cd51bff69 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -7,7 +7,7 @@
 , IncoherentInstances #-}
 
 {-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -26,7 +26,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Parsing
-   Copyright   : Copyright (C) 2006-2016 John MacFarlane
+   Copyright   : Copyright (C) 2006-2017 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -36,6 +36,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 A utility library with parsers used in pandoc readers.
 -}
 module Text.Pandoc.Parsing ( anyLine,
+                             anyLineNewline,
+                             indentWith,
                              many1Till,
                              notFollowedBy',
                              oneOfStrings,
@@ -48,6 +50,7 @@ module Text.Pandoc.Parsing ( anyLine,
                              enclosed,
                              stringAnyCase,
                              parseFromString,
+                             parseFromString',
                              lineClump,
                              charsInBalanced,
                              romanNumeral,
@@ -66,6 +69,7 @@ module Text.Pandoc.Parsing ( anyLine,
                              tableWith,
                              widthsFromIndices,
                              gridTableWith,
+                             gridTableWith',
                              readWith,
                              readWithM,
                              testStringWith,
@@ -82,6 +86,7 @@ module Text.Pandoc.Parsing ( anyLine,
                              HasMacros (..),
                              HasLogMessages (..),
                              HasLastStrPosition (..),
+                             HasIncludeFiles (..),
                              defaultParserState,
                              HeaderType (..),
                              ParserContext (..),
@@ -119,6 +124,7 @@ module Text.Pandoc.Parsing ( anyLine,
                              (<+?>),
                              extractIdClass,
                              insertIncludedFile,
+                             insertIncludedFileF,
                              -- * Re-exports from Text.Pandoc.Parsec
                              Stream,
                              runParser,
@@ -252,12 +258,28 @@ anyLine = do
          return this
        _ -> mzero
 
+-- | Parse any line, include the final newline in the output
+anyLineNewline :: Stream [Char] m Char => ParserT [Char] st m [Char]
+anyLineNewline = (++ "\n") <$> anyLine
+
+-- | Parse indent by specified number of spaces (or equiv. tabs)
+indentWith :: Stream [Char] m Char
+           => HasReaderOptions st
+           => Int -> ParserT [Char] st m [Char]
+indentWith num = do
+  tabStop <- getOption readerTabStop
+  if (num < tabStop)
+     then count num (char ' ')
+     else choice [ try (count num (char ' '))
+                 , try (char '\t' >> indentWith (num - tabStop)) ]
+
 -- | Like @manyTill@, but reads at least one item.
-many1Till :: Stream s m t
+many1Till :: (Show end, Stream s m t)
           => ParserT s st m a
           -> ParserT s st m end
           -> ParserT s st m [a]
 many1Till p end = do
+         notFollowedBy' end
          first <- p
          rest <- manyTill p end
          return (first:rest)
@@ -322,7 +344,7 @@ blanklines :: Stream s m Char => ParserT s st m [Char]
 blanklines = many1 blankline
 
 -- | Parses material enclosed between start and end parsers.
-enclosed :: Stream s  m Char => ParserT s st m t   -- ^ start parser
+enclosed :: (Show end, Stream s  m Char) => ParserT s st m t   -- ^ start parser
          -> ParserT s st m end  -- ^ end parser
          -> ParserT s st m a    -- ^ content parser (to be used repeatedly)
          -> ParserT s st m [a]
@@ -338,7 +360,10 @@ stringAnyCase (x:xs) = do
   return (firstChar:rest)
 
 -- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a
+parseFromString :: Monad m
+                => ParserT String st m a
+                -> String
+                -> ParserT String st m a
 parseFromString parser str = do
   oldPos <- getPosition
   oldInput <- getInput
@@ -350,6 +375,18 @@ parseFromString parser str = do
   setPosition oldPos
   return result
 
+-- | Like 'parseFromString' but specialized for 'ParserState'.
+-- This resets 'stateLastStrPos', which is almost always what we want.
+parseFromString' :: Monad m
+                 => ParserT String ParserState m a
+                 -> String
+                 -> ParserT String ParserState m a
+parseFromString' parser str = do
+  oldStrPos <- stateLastStrPos <$> getState
+  res <- parseFromString parser str
+  updateState $ \st -> st{ stateLastStrPos = oldStrPos }
+  return res
+
 -- | Parse raw line block up to and including blank lines.
 lineClump :: Stream [Char] m Char => ParserT [Char] st m String
 lineClump = blanklines
@@ -445,33 +482,8 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
        sepby1 p sep      = (:) <$> p <*> (many (try $ sep >> p))
 
 
--- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
--- the unofficial schemes coap, doi, javascript, isbn, pmid
-schemes :: [String]
-schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
-           "crid","data","dav","dict","dns","file","ftp","geo","go","gopher",
-           "h323","http","https","iax","icap","im","imap","info","ipp","iris",
-           "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid",
-           "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp",
-           "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve",
-           "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet",
-           "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon",
-           "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s",
-           "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin",
-           "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee",
-           "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb",
-           "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject",
-           "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms",
-           "keyparc","lastfm","ldaps","magnet","maps","market","message","mms",
-           "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi",
-           "platform","proxy","psyc","query","res","resource","rmi","rsync",
-           "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify",
-           "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004",
-           "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
-           "ymsgr", "isbn", "pmid"]
-
 uriScheme :: Stream s m Char => ParserT s st m String
-uriScheme = oneOfStringsCI schemes
+uriScheme = oneOfStringsCI (Set.toList schemes)
 
 -- | Parses a URI. Returns pair of original and URI-escaped version.
 uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
@@ -762,21 +774,36 @@ lineBlockLines = try $ do
 
 -- | Parse a table using 'headerParser', 'rowParser',
 -- 'lineParser', and 'footerParser'.
-tableWith :: Stream s m Char
-          => ParserT s ParserState m ([Blocks], [Alignment], [Int])
-          -> ([Int] -> ParserT s ParserState m [Blocks])
-          -> ParserT s ParserState m sep
-          -> ParserT s ParserState m end
-          -> ParserT s ParserState m Blocks
+tableWith :: (Stream s m Char, HasReaderOptions st,
+              Functor mf, Applicative mf, Monad mf)
+          => ParserT s st m (mf [Blocks], [Alignment], [Int])
+          -> ([Int] -> ParserT s st m (mf [Blocks]))
+          -> ParserT s st m sep
+          -> ParserT s st m end
+          -> ParserT s st m (mf Blocks)
 tableWith headerParser rowParser lineParser footerParser = try $ do
+  (aligns, widths, heads, rows) <- tableWith' headerParser rowParser
+                                                lineParser footerParser
+  return $ B.table mempty (zip aligns widths) <$> heads <*> rows
+
+type TableComponents mf = ([Alignment], [Double], mf [Blocks], mf [[Blocks]])
+
+tableWith' :: (Stream s m Char, HasReaderOptions st,
+               Functor mf, Applicative mf, Monad mf)
+           => ParserT s st m (mf [Blocks], [Alignment], [Int])
+           -> ([Int] -> ParserT s st m (mf [Blocks]))
+           -> ParserT s st m sep
+           -> ParserT s st m end
+           -> ParserT s st m (TableComponents mf)
+tableWith' headerParser rowParser lineParser footerParser = try $ do
     (heads, aligns, indices) <- headerParser
-    lines' <- rowParser indices `sepEndBy1` lineParser
+    lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
     footerParser
     numColumns <- getOption readerColumns
     let widths = if (indices == [])
                     then replicate (length aligns) 0.0
                     else widthsFromIndices numColumns indices
-    return $ B.table mempty (zip aligns widths) heads lines'
+    return $ (aligns, widths, heads, lines')
 
 -- Calculate relative widths of table columns, based on indices
 widthsFromIndices :: Int      -- Number of columns on terminal
@@ -809,25 +836,44 @@ widthsFromIndices numColumns' indices =
 -- (which may be grid), then the rows,
 -- which may be grid, separated by blank lines, and
 -- ending with a footer (dashed line followed by blank line).
-gridTableWith :: Stream [Char] m Char
-              => ParserT [Char] ParserState m Blocks   -- ^ Block list parser
-              -> Bool                                -- ^ Headerless table
-              -> ParserT [Char] ParserState m Blocks
+gridTableWith :: (Stream [Char] m Char, HasReaderOptions st,
+                  Functor mf, Applicative mf, Monad mf)
+              => ParserT [Char] st m (mf Blocks)  -- ^ Block list parser
+              -> Bool                             -- ^ Headerless table
+              -> ParserT [Char] st m (mf Blocks)
 gridTableWith blocks headless =
   tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
             (gridTableSep '-') gridTableFooter
 
+gridTableWith' :: (Stream [Char] m Char, HasReaderOptions st,
+                   Functor mf, Applicative mf, Monad mf)
+               => ParserT [Char] st m (mf Blocks)  -- ^ Block list parser
+               -> Bool                             -- ^ Headerless table
+               -> ParserT [Char] st m (TableComponents mf)
+gridTableWith' blocks headless =
+  tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
+             (gridTableSep '-') gridTableFooter
+
 gridTableSplitLine :: [Int] -> String -> [String]
 gridTableSplitLine indices line = map removeFinalBar $ tail $
   splitStringByIndices (init indices) $ trimr line
 
-gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int)
+gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
 gridPart ch = do
+  leftColon <- option False (True <$ char ':')
   dashes <- many1 (char ch)
+  rightColon <- option False (True <$ char ':')
   char '+'
-  return (length dashes, length dashes + 1)
-
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)]
+  let lengthDashes = length dashes + (if leftColon then 1 else 0) +
+                       (if rightColon then 1 else 0)
+  let alignment = case (leftColon, rightColon) of
+                       (True, True)   -> AlignCenter
+                       (True, False)  -> AlignLeft
+                       (False, True)  -> AlignRight
+                       (False, False) -> AlignDefault
+  return ((lengthDashes, lengthDashes + 1), alignment)
+
+gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
 gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
 
 removeFinalBar :: String -> String
@@ -835,14 +881,14 @@ removeFinalBar =
   reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
 
 -- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char
+gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
 gridTableSep ch = try $ gridDashedLines ch >> return '\n'
 
 -- | Parse header for a grid table.
-gridTableHeader :: Stream [Char] m Char
+gridTableHeader :: (Stream [Char] m Char, Functor mf, Applicative mf, Monad mf)
                 => Bool -- ^ Headerless table
-                -> ParserT [Char] ParserState m Blocks
-                -> ParserT [Char] ParserState m ([Blocks], [Alignment], [Int])
+                -> ParserT [Char] st m (mf Blocks)
+                -> ParserT [Char] st m (mf [Blocks], [Alignment], [Int])
 gridTableHeader headless blocks = try $ do
   optional blanklines
   dashes <- gridDashedLines '-'
@@ -851,36 +897,40 @@ gridTableHeader headless blocks = try $ do
                     else many1
                          (notFollowedBy (gridTableSep '=') >> char '|' >>
                            many1Till anyChar newline)
-  if headless
-     then return ()
-     else gridTableSep '=' >> return ()
-  let lines'   = map snd dashes
+  underDashes <- if headless
+                    then return dashes
+                    else gridDashedLines '='
+  guard $ length dashes == length underDashes
+  let lines'   = map (snd . fst) underDashes
   let indices  = scanl (+) 0 lines'
-  let aligns   = replicate (length lines') AlignDefault
-  -- RST does not have a notion of alignments
+  let aligns   = map snd underDashes
   let rawHeads = if headless
-                    then replicate (length dashes) ""
-                    else map (intercalate " ") $ transpose
+                    then replicate (length underDashes) ""
+                    else map (unlines . map trim) $ transpose
                        $ map (gridTableSplitLine indices) rawContent
-  heads <- mapM (parseFromString blocks) $ map trim rawHeads
+  heads <- fmap sequence $ mapM (parseFromString blocks . trim) rawHeads
   return (heads, aligns, indices)
 
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String]
+gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String]
 gridTableRawLine indices = do
   char '|'
   line <- many1Till anyChar newline
   return (gridTableSplitLine indices line)
 
 -- | Parse row of grid table.
-gridTableRow :: Stream [Char]  m Char
-             => ParserT [Char] ParserState m Blocks
+gridTableRow :: (Stream [Char]  m Char, Functor mf, Applicative mf, Monad mf)
+             => ParserT [Char] st m (mf Blocks)
              -> [Int]
-             -> ParserT [Char] ParserState m [Blocks]
+             -> ParserT [Char] st m (mf [Blocks])
 gridTableRow blocks indices = do
   colLines <- many1 (gridTableRawLine indices)
   let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
                transpose colLines
-  mapM (liftM compactifyCell . parseFromString blocks) cols
+      compactifyCell bs = case compactify [bs] of
+                            []  -> mempty
+                            x:_ -> x
+  cells <- sequence <$> mapM (parseFromString blocks) cols
+  return $ fmap (map compactifyCell) cells
 
 removeOneLeadingSpace :: [String] -> [String]
 removeOneLeadingSpace xs =
@@ -890,11 +940,8 @@ removeOneLeadingSpace xs =
    where startsWithSpace ""     = True
          startsWithSpace (y:_) = y == ' '
 
-compactifyCell :: Blocks -> Blocks
-compactifyCell bs = head $ compactify [bs]
-
 -- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char]
+gridTableFooter :: Stream s m Char => ParserT s st m [Char]
 gridTableFooter = blanklines
 
 ---
@@ -937,6 +984,7 @@ data ParserState = ParserState
       stateSubstitutions   :: SubstTable,    -- ^ List of substitution references
       stateNotes           :: NoteTable,     -- ^ List of notes (raw bodies)
       stateNotes'          :: NoteTable',    -- ^ List of notes (parsed bodies)
+      stateNoteRefs        :: Set.Set String, -- ^ List of note references used
       stateMeta            :: Meta,          -- ^ Document metadata
       stateMeta'           :: F Meta,        -- ^ Document metadata
       stateCitations       :: M.Map String String, -- ^ RST-style citations
@@ -972,6 +1020,9 @@ class HasReaderOptions st where
   -- default
   getOption  f         = (f . extractReaderOptions) <$> getState
 
+instance HasReaderOptions ParserState where
+  extractReaderOptions = stateOptions
+
 class HasQuoteContext st m where
   getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
   withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
@@ -987,9 +1038,6 @@ instance Monad m => HasQuoteContext ParserState m where
     setState newState { stateQuoteContext = oldQuoteContext }
     return result
 
-instance HasReaderOptions ParserState where
-  extractReaderOptions = stateOptions
-
 class HasHeaderMap st where
   extractHeaderMap  :: st -> M.Map Inlines String
   updateHeaderMap   :: (M.Map Inlines String -> M.Map Inlines String) ->
@@ -1031,6 +1079,16 @@ instance HasLogMessages ParserState where
   addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
   getLogMessages st = reverse $ stateLogMessages st
 
+class HasIncludeFiles st where
+  getIncludeFiles :: st -> [String]
+  addIncludeFile :: String -> st -> st
+  dropLatestIncludeFile :: st -> st
+
+instance HasIncludeFiles ParserState where
+  getIncludeFiles = stateContainers
+  addIncludeFile f s = s{ stateContainers = f : stateContainers s }
+  dropLatestIncludeFile s = s { stateContainers = drop 1 $ stateContainers s }
+
 defaultParserState :: ParserState
 defaultParserState =
     ParserState { stateOptions         = def,
@@ -1043,7 +1101,8 @@ defaultParserState =
                   stateHeaderKeys      = M.empty,
                   stateSubstitutions   = M.empty,
                   stateNotes           = [],
-                  stateNotes'          = [],
+                  stateNotes'          = M.empty,
+                  stateNoteRefs        = Set.empty,
                   stateMeta            = nullMeta,
                   stateMeta'           = return nullMeta,
                   stateCitations       = M.empty,
@@ -1110,7 +1169,8 @@ data QuoteContext
 
 type NoteTable = [(String, String)]
 
-type NoteTable' = [(String, F Blocks)]  -- used in markdown reader
+type NoteTable' = M.Map String (SourcePos, F Blocks)
+-- used in markdown reader
 
 newtype Key = Key String deriving (Show, Read, Eq, Ord)
 
@@ -1322,17 +1382,18 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
                Nothing -> cls
     kvs'  = filter (\(k,_) -> k /= "id" || k /= "class") kvs
 
-insertIncludedFile :: PandocMonad m
-                   => ParserT String ParserState m Blocks
-                   -> [FilePath] -> FilePath
-                   -> ParserT String ParserState m Blocks
-insertIncludedFile blocks dirs f = do
+insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st,
+                        Functor mf, Applicative mf, Monad mf)
+                    => ParserT String st m (mf Blocks)
+                    -> [FilePath] -> FilePath
+                    -> ParserT String st m (mf Blocks)
+insertIncludedFile' blocks dirs f = do
   oldPos <- getPosition
   oldInput <- getInput
-  containers <- stateContainers <$> getState
+  containers <- getIncludeFiles <$> getState
   when (f `elem` containers) $
     throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
-  updateState $ \s -> s{ stateContainers = f : stateContainers s }
+  updateState $ addIncludeFile f
   mbcontents <- readFileFromDirs dirs f
   contents <- case mbcontents of
                    Just s -> return s
@@ -1344,5 +1405,22 @@ insertIncludedFile blocks dirs f = do
   bs <- blocks
   setInput oldInput
   setPosition oldPos
-  updateState $ \s -> s{ stateContainers = tail $ stateContainers s }
+  updateState dropLatestIncludeFile
   return bs
+
+-- | Parse content of include file as blocks. Circular includes result in an
+-- @PandocParseError@.
+insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
+                   => ParserT String st m Blocks
+                   -> [FilePath] -> FilePath
+                   -> ParserT String st m Blocks
+insertIncludedFile blocks dirs f =
+  runIdentity <$> insertIncludedFile' (Identity <$> blocks) dirs f
+
+-- | Parse content of include file as future blocks. Circular includes result in
+-- an @PandocParseError@.
+insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
+                    => ParserT String st m (Future st Blocks)
+                    -> [FilePath] -> FilePath
+                    -> ParserT String st m (Future st Blocks)
+insertIncludedFileF = insertIncludedFile'