aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Markdown.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs108
1 files changed, 62 insertions, 46 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2dc7ddf52..b5017a433 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
@@ -21,14 +22,14 @@ module Text.Pandoc.Readers.Markdown (
import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isPunctuation, isSpace)
+import Text.DocLayout (realLength)
import Data.List (transpose, elemIndex, sortOn, foldl')
-import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.Map as M
import Data.Maybe
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
-import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as BS
import System.FilePath (addExtension, takeExtension, takeDirectory)
import qualified System.FilePath.Windows as Windows
import qualified System.FilePath.Posix as Posix
@@ -39,6 +40,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), report)
import Text.Pandoc.Definition as Pandoc
import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Error
+import Safe.Foldable (maximumBounded)
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Walk (walk)
@@ -72,14 +74,12 @@ readMarkdown opts s = do
yamlToMeta :: PandocMonad m
=> ReaderOptions
-> Maybe FilePath
- -> BL.ByteString
+ -> BS.ByteString
-> m Meta
yamlToMeta opts mbfp bstr = do
let parser = do
oldPos <- getPosition
- case mbfp of
- Nothing -> return ()
- Just fp -> setPosition $ initialPos fp
+ setPosition $ initialPos (fromMaybe "" mbfp)
meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
setPosition oldPos
return $ runF meta defaultParserState
@@ -95,7 +95,7 @@ yamlToRefs :: PandocMonad m
=> (Text -> Bool)
-> ReaderOptions
-> Maybe FilePath
- -> BL.ByteString
+ -> BS.ByteString
-> m [MetaValue]
yamlToRefs idpred opts mbfp bstr = do
let parser = do
@@ -198,6 +198,7 @@ inlinesInBalancedBrackets =
go openBrackets =
(() <$ (escapedChar <|>
code <|>
+ math <|>
rawHtmlInline <|>
rawLaTeXInline') >> go openBrackets)
<|>
@@ -326,6 +327,7 @@ referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
referenceKey = try $ do
pos <- getPosition
skipNonindentSpaces
+ notFollowedBy (void cite)
(_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
@@ -829,7 +831,7 @@ listLineCommon :: PandocMonad m => MarkdownParser m Text
listLineCommon = T.concat <$> manyTill
( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
<|> fmap snd (withRaw code)
- <|> fmap snd (htmlTag isCommentTag)
+ <|> fmap (renderTags . (:[]) . fst) (htmlTag isCommentTag)
<|> countChar 1 anyChar
) newline
@@ -1013,19 +1015,18 @@ normalDefinitionList = do
para :: PandocMonad m => MarkdownParser m (F Blocks)
para = try $ do
exts <- getOption readerExtensions
- let implicitFigures x
- | extensionEnabled Ext_implicit_figures exts = do
- x' <- x
- case B.toList x' of
- [Image attr alt (src,tit)]
- | not (null alt) ->
- -- the fig: at beginning of title indicates a figure
- return $ B.singleton
- $ Image attr alt (src, "fig:" <> tit)
- _ -> return x'
- | otherwise = x
- result <- implicitFigures . trimInlinesF <$> inlines1
- option (B.plain <$> result)
+
+ result <- trimInlinesF <$> inlines1
+ let figureOr constr inlns =
+ case B.toList inlns of
+ [Image attr figCaption (src, tit)]
+ | extensionEnabled Ext_implicit_figures exts
+ , not (null figCaption) -> do
+ B.simpleFigureWith attr (B.fromList figCaption) src tit
+
+ _ -> constr inlns
+
+ option (figureOr B.plain <$> result)
$ try $ do
newline
(mempty <$ blanklines)
@@ -1047,7 +1048,7 @@ para = try $ do
if divLevel > 0
then lookAhead divFenceEnd
else mzero
- return $ B.para <$> result
+ return $ figureOr B.para <$> result
plain :: PandocMonad m => MarkdownParser m (F Blocks)
plain = fmap B.plain . trimInlinesF <$> inlines1
@@ -1124,7 +1125,12 @@ rawHtmlBlocks = do
let selfClosing = "/>" `T.isSuffixOf` raw
-- we don't want '<td> text' to be a code block:
skipMany spaceChar
- indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
+ tabStop <- getOption readerTabStop
+ indentlevel <- option 0 $
+ do blankline
+ sum <$> many ( (1 <$ char ' ')
+ <|>
+ (tabStop <$ char '\t') )
-- try to find closing tag
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
@@ -1355,26 +1361,30 @@ pipeTable = try $ do
nonindentSpaces
lookAhead nonspaceChar
(heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
- let heads' = take (length aligns) <$> heads
+ let cellContents = parseFromString' pipeTableCell . trim
+ let numcols = length aligns
+ let heads' = take numcols heads
lines' <- many pipeTableRow
- let lines'' = map (take (length aligns) <$>) lines'
- let maxlength = maximum $
- fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'')
- numColumns <- getOption readerColumns
- let widths = if maxlength > numColumns
+ let lines'' = map (take numcols) lines'
+ let lineWidths = map (sum . map realLength) (heads' : lines'')
+ columns <- getOption readerColumns
+ -- add numcols + 1 for the pipes themselves
+ let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1) > columns
then map (\len ->
fromIntegral len / fromIntegral (sum seplengths))
seplengths
else replicate (length aligns) 0.0
- return (aligns, widths, toHeaderRow <$> heads', map toRow <$> sequence lines'')
+ (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads'
+ (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines''
+ return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows)
sepPipe :: PandocMonad m => MarkdownParser m ()
sepPipe = try $ do
char '|' <|> char '+'
notFollowedBy blankline
--- parse a row, also returning probable alignments for org-table cells
-pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
+-- parse a row, returning raw cell contents
+pipeTableRow :: PandocMonad m => MarkdownParser m [Text]
pipeTableRow = try $ do
scanForPipe
skipMany spaceChar
@@ -1382,13 +1392,11 @@ pipeTableRow = try $ do
-- split into cells
let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
<|> void (noneOf "|\n\r")
- let cellContents = withRaw (many chunk) >>=
- parseFromString' pipeTableCell . trim . snd
- cells <- cellContents `sepEndBy1` char '|'
+ cells <- (snd <$> withRaw (many chunk)) `sepEndBy1` char '|'
-- surrounding pipes needed for a one-column table:
guard $ not (length cells == 1 && not openPipe)
blankline
- return $ sequence cells
+ return cells
pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
pipeTableCell =
@@ -1692,21 +1700,29 @@ strikeout = fmap B.strikeout <$>
superscript :: PandocMonad m => MarkdownParser m (F Inlines)
superscript = do
- guardEnabled Ext_superscript
fmap B.superscript <$> try (do
char '^'
- mconcat <$> many1Till (do notFollowedBy spaceChar
- notFollowedBy newline
- inline) (char '^'))
+ mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript))
+ where regularSuperscript = many1Till (do guardEnabled Ext_superscript
+ notFollowedBy spaceChar
+ notFollowedBy newline
+ inline) (char '^')
+ mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts
+ result <- T.pack <$> many1 alphaNum
+ return $ return $ return $ B.str result
subscript :: PandocMonad m => MarkdownParser m (F Inlines)
subscript = do
- guardEnabled Ext_subscript
fmap B.subscript <$> try (do
char '~'
- mconcat <$> many1Till (do notFollowedBy spaceChar
- notFollowedBy newline
- inline) (char '~'))
+ mconcat <$> (try regularSubscript <|> mmdShortSubscript))
+ where regularSubscript = many1Till (do guardEnabled Ext_subscript
+ notFollowedBy spaceChar
+ notFollowedBy newline
+ inline) (char '~')
+ mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts
+ result <- T.pack <$> many1 alphaNum
+ return $ return $ return $ B.str result
whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
@@ -1768,7 +1784,6 @@ endline = try $ do
reference :: PandocMonad m => MarkdownParser m (F Inlines, Text)
reference = do
guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^")
- guardDisabled Ext_citations <|> notFollowedBy' (string "[@")
withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
parenthesizedChars :: PandocMonad m => MarkdownParser m Text
@@ -2187,6 +2202,7 @@ normalCite = try $ do
citations <- citeList
spnl
char ']'
+ notFollowedBy (oneOf "{([") -- not a link or a bracketed span
return citations
suffix :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -2200,7 +2216,7 @@ suffix = try $ do
prefix :: PandocMonad m => MarkdownParser m (F Inlines)
prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']'
+ manyTill (notFollowedBy (char ';') >> inline) (char ']'
<|> lookAhead
(try $ do optional (try (char ';' >> spnl))
citeKey True