1 files changed, 62 insertions, 46 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2dc7ddf52..b5017a433 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts    #-}
 {-# LANGUAGE TupleSections       #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ViewPatterns        #-}
@@ -21,14 +22,14 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
+import Text.DocLayout (realLength)
 import Data.List (transpose, elemIndex, sortOn, foldl')
-import Data.List.NonEmpty (NonEmpty(..))
 import qualified Data.Map as M
 import Data.Maybe
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as BS
 import System.FilePath (addExtension, takeExtension, takeDirectory)
 import qualified System.FilePath.Windows as Windows
 import qualified System.FilePath.Posix as Posix
@@ -39,6 +40,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), report)
 import Text.Pandoc.Definition as Pandoc
 import Text.Pandoc.Emoji (emojiToInline)
 import Text.Pandoc.Error
+import Safe.Foldable (maximumBounded)
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Walk (walk)
@@ -72,14 +74,12 @@ readMarkdown opts s = do
 yamlToMeta :: PandocMonad m
            => ReaderOptions
            -> Maybe FilePath
-           -> BL.ByteString
+           -> BS.ByteString
            -> m Meta
 yamlToMeta opts mbfp bstr = do
   let parser = do
         oldPos <- getPosition
-        case mbfp of
-          Nothing -> return ()
-          Just fp -> setPosition $ initialPos fp
+        setPosition $ initialPos (fromMaybe "" mbfp)
         meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
         setPosition oldPos
         return $ runF meta defaultParserState
@@ -95,7 +95,7 @@ yamlToRefs :: PandocMonad m
            => (Text -> Bool)
            -> ReaderOptions
            -> Maybe FilePath
-           -> BL.ByteString
+           -> BS.ByteString
            -> m [MetaValue]
 yamlToRefs idpred opts mbfp bstr = do
   let parser = do
@@ -198,6 +198,7 @@ inlinesInBalancedBrackets =
         go openBrackets =
           (() <$ (escapedChar <|>
                 code <|>
+                math <|>
                 rawHtmlInline <|>
                 rawLaTeXInline') >> go openBrackets)
           <|>
@@ -326,6 +327,7 @@ referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
 referenceKey = try $ do
   pos <- getPosition
   skipNonindentSpaces
+  notFollowedBy (void cite)
   (_,raw) <- reference
   char ':'
   skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
@@ -829,7 +831,7 @@ listLineCommon :: PandocMonad m => MarkdownParser m Text
 listLineCommon = T.concat <$> manyTill
               (  many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
              <|> fmap snd (withRaw code)
-             <|> fmap snd (htmlTag isCommentTag)
+             <|> fmap (renderTags . (:[]) . fst) (htmlTag isCommentTag)
              <|> countChar 1 anyChar
               ) newline
 
@@ -1013,19 +1015,18 @@ normalDefinitionList = do
 para :: PandocMonad m => MarkdownParser m (F Blocks)
 para = try $ do
   exts <- getOption readerExtensions
-  let implicitFigures x
-       | extensionEnabled Ext_implicit_figures exts = do
-         x' <- x
-         case B.toList x' of
-               [Image attr alt (src,tit)]
-                 | not (null alt) ->
-                    -- the fig: at beginning of title indicates a figure
-                    return $ B.singleton
-                           $ Image attr alt (src, "fig:" <> tit)
-               _ -> return x'
-       | otherwise = x
-  result <- implicitFigures . trimInlinesF <$> inlines1
-  option (B.plain <$> result)
+
+  result <- trimInlinesF <$> inlines1
+  let figureOr constr inlns =
+        case B.toList inlns of
+          [Image attr figCaption (src, tit)]
+            | extensionEnabled Ext_implicit_figures exts
+            , not (null figCaption) -> do
+                B.simpleFigureWith attr (B.fromList figCaption) src tit
+
+          _ -> constr inlns
+
+  option (figureOr B.plain <$> result)
     $ try $ do
             newline
             (mempty <$ blanklines)
@@ -1047,7 +1048,7 @@ para = try $ do
                      if divLevel > 0
                         then lookAhead divFenceEnd
                         else mzero
-            return $ B.para <$> result
+            return $ figureOr B.para <$> result
 
 plain :: PandocMonad m => MarkdownParser m (F Blocks)
 plain = fmap B.plain . trimInlinesF <$> inlines1
@@ -1124,7 +1125,12 @@ rawHtmlBlocks = do
   let selfClosing = "/>" `T.isSuffixOf` raw
   -- we don't want '<td>    text' to be a code block:
   skipMany spaceChar
-  indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
+  tabStop <- getOption readerTabStop
+  indentlevel <- option 0 $
+                 do blankline
+                    sum <$> many ( (1 <$ char ' ')
+                                   <|>
+                                   (tabStop <$ char '\t') )
   -- try to find closing tag
   -- we set stateInHtmlBlock so that closing tags that can be either block or
   -- inline will not be parsed as inline tags
@@ -1355,26 +1361,30 @@ pipeTable = try $ do
   nonindentSpaces
   lookAhead nonspaceChar
   (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
-  let heads' = take (length aligns) <$> heads
+  let cellContents = parseFromString' pipeTableCell . trim
+  let numcols = length aligns
+  let heads' = take numcols heads
   lines' <- many pipeTableRow
-  let lines'' = map (take (length aligns) <$>) lines'
-  let maxlength = maximum $
-       fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'')
-  numColumns <- getOption readerColumns
-  let widths = if maxlength > numColumns
+  let lines'' = map (take numcols) lines'
+  let lineWidths = map (sum . map realLength) (heads' : lines'')
+  columns <- getOption readerColumns
+  -- add numcols + 1 for the pipes themselves
+  let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1) > columns
                   then map (\len ->
                          fromIntegral len / fromIntegral (sum seplengths))
                          seplengths
                   else replicate (length aligns) 0.0
-  return (aligns, widths, toHeaderRow <$> heads', map toRow <$> sequence lines'')
+  (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads'
+  (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines''
+  return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows)
 
 sepPipe :: PandocMonad m => MarkdownParser m ()
 sepPipe = try $ do
   char '|' <|> char '+'
   notFollowedBy blankline
 
--- parse a row, also returning probable alignments for org-table cells
-pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
+-- parse a row, returning raw cell contents
+pipeTableRow :: PandocMonad m => MarkdownParser m [Text]
 pipeTableRow = try $ do
   scanForPipe
   skipMany spaceChar
@@ -1382,13 +1392,11 @@ pipeTableRow = try $ do
   -- split into cells
   let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
        <|> void (noneOf "|\n\r")
-  let cellContents = withRaw (many chunk) >>=
-        parseFromString' pipeTableCell . trim . snd
-  cells <- cellContents `sepEndBy1` char '|'
+  cells <- (snd <$> withRaw (many chunk)) `sepEndBy1` char '|'
   -- surrounding pipes needed for a one-column table:
   guard $ not (length cells == 1 && not openPipe)
   blankline
-  return $ sequence cells
+  return cells
 
 pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
 pipeTableCell =
@@ -1692,21 +1700,29 @@ strikeout = fmap B.strikeout <$>
 
 superscript :: PandocMonad m => MarkdownParser m (F Inlines)
 superscript = do
-  guardEnabled Ext_superscript
   fmap B.superscript <$> try (do
     char '^'
-    mconcat <$> many1Till (do notFollowedBy spaceChar
-                              notFollowedBy newline
-                              inline) (char '^'))
+    mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript))
+      where regularSuperscript = many1Till (do guardEnabled Ext_superscript
+                                               notFollowedBy spaceChar
+                                               notFollowedBy newline
+                                               inline) (char '^')
+            mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts
+                                     result <- T.pack <$> many1 alphaNum
+                                     return $ return $ return $ B.str result
 
 subscript :: PandocMonad m => MarkdownParser m (F Inlines)
 subscript = do
-  guardEnabled Ext_subscript
   fmap B.subscript <$> try (do
     char '~'
-    mconcat <$> many1Till (do notFollowedBy spaceChar
-                              notFollowedBy newline
-                              inline) (char '~'))
+    mconcat <$> (try regularSubscript <|> mmdShortSubscript))
+      where regularSubscript = many1Till (do guardEnabled Ext_subscript
+                                             notFollowedBy spaceChar
+                                             notFollowedBy newline
+                                             inline) (char '~')
+            mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts
+                                   result <- T.pack <$> many1 alphaNum
+                                   return $ return $ return $ B.str result
 
 whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
 whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
@@ -1768,7 +1784,6 @@ endline = try $ do
 reference :: PandocMonad m => MarkdownParser m (F Inlines, Text)
 reference = do
   guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^")
-  guardDisabled Ext_citations <|> notFollowedBy' (string "[@")
   withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
 
 parenthesizedChars :: PandocMonad m => MarkdownParser m Text
@@ -2187,6 +2202,7 @@ normalCite = try $ do
   citations <- citeList
   spnl
   char ']'
+  notFollowedBy (oneOf "{([")  -- not a link or a bracketed span
   return citations
 
 suffix :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -2200,7 +2216,7 @@ suffix = try $ do
 
 prefix :: PandocMonad m => MarkdownParser m (F Inlines)
 prefix = trimInlinesF . mconcat <$>
-  manyTill inline (char ']'
+  manyTill (notFollowedBy (char ';') >> inline) (char ']'
    <|> lookAhead
          (try $ do optional (try (char ';' >> spnl))
                    citeKey True