diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 108 |
1 files changed, 62 insertions, 46 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2dc7ddf52..b5017a433 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,5 @@ {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} @@ -21,14 +22,14 @@ module Text.Pandoc.Readers.Markdown ( import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isPunctuation, isSpace) +import Text.DocLayout (realLength) import Data.List (transpose, elemIndex, sortOn, foldl') -import Data.List.NonEmpty (NonEmpty(..)) import qualified Data.Map as M import Data.Maybe import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as BS import System.FilePath (addExtension, takeExtension, takeDirectory) import qualified System.FilePath.Windows as Windows import qualified System.FilePath.Posix as Posix @@ -39,6 +40,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), report) import Text.Pandoc.Definition as Pandoc import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Error +import Safe.Foldable (maximumBounded) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Walk (walk) @@ -72,14 +74,12 @@ readMarkdown opts s = do yamlToMeta :: PandocMonad m => ReaderOptions -> Maybe FilePath - -> BL.ByteString + -> BS.ByteString -> m Meta yamlToMeta opts mbfp bstr = do let parser = do oldPos <- getPosition - case mbfp of - Nothing -> return () - Just fp -> setPosition $ initialPos fp + setPosition $ initialPos (fromMaybe "" mbfp) meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr setPosition oldPos return $ runF meta defaultParserState @@ -95,7 +95,7 @@ yamlToRefs :: PandocMonad m => (Text -> Bool) -> ReaderOptions -> Maybe FilePath - -> BL.ByteString + -> BS.ByteString -> m [MetaValue] yamlToRefs idpred opts mbfp bstr = do let parser = do @@ -198,6 +198,7 @@ inlinesInBalancedBrackets = go openBrackets = (() <$ (escapedChar <|> code <|> + math <|> rawHtmlInline <|> rawLaTeXInline') >> go openBrackets) <|> @@ -326,6 +327,7 @@ referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do pos <- getPosition skipNonindentSpaces + notFollowedBy (void cite) (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') @@ -829,7 +831,7 @@ listLineCommon :: PandocMonad m => MarkdownParser m Text listLineCommon = T.concat <$> manyTill ( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`']) <|> fmap snd (withRaw code) - <|> fmap snd (htmlTag isCommentTag) + <|> fmap (renderTags . (:[]) . fst) (htmlTag isCommentTag) <|> countChar 1 anyChar ) newline @@ -1013,19 +1015,18 @@ normalDefinitionList = do para :: PandocMonad m => MarkdownParser m (F Blocks) para = try $ do exts <- getOption readerExtensions - let implicitFigures x - | extensionEnabled Ext_implicit_figures exts = do - x' <- x - case B.toList x' of - [Image attr alt (src,tit)] - | not (null alt) -> - -- the fig: at beginning of title indicates a figure - return $ B.singleton - $ Image attr alt (src, "fig:" <> tit) - _ -> return x' - | otherwise = x - result <- implicitFigures . trimInlinesF <$> inlines1 - option (B.plain <$> result) + + result <- trimInlinesF <$> inlines1 + let figureOr constr inlns = + case B.toList inlns of + [Image attr figCaption (src, tit)] + | extensionEnabled Ext_implicit_figures exts + , not (null figCaption) -> do + B.simpleFigureWith attr (B.fromList figCaption) src tit + + _ -> constr inlns + + option (figureOr B.plain <$> result) $ try $ do newline (mempty <$ blanklines) @@ -1047,7 +1048,7 @@ para = try $ do if divLevel > 0 then lookAhead divFenceEnd else mzero - return $ B.para <$> result + return $ figureOr B.para <$> result plain :: PandocMonad m => MarkdownParser m (F Blocks) plain = fmap B.plain . trimInlinesF <$> inlines1 @@ -1124,7 +1125,12 @@ rawHtmlBlocks = do let selfClosing = "/>" `T.isSuffixOf` raw -- we don't want '<td> text' to be a code block: skipMany spaceChar - indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0 + tabStop <- getOption readerTabStop + indentlevel <- option 0 $ + do blankline + sum <$> many ( (1 <$ char ' ') + <|> + (tabStop <$ char '\t') ) -- try to find closing tag -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags @@ -1355,26 +1361,30 @@ pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak - let heads' = take (length aligns) <$> heads + let cellContents = parseFromString' pipeTableCell . trim + let numcols = length aligns + let heads' = take numcols heads lines' <- many pipeTableRow - let lines'' = map (take (length aligns) <$>) lines' - let maxlength = maximum $ - fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'') - numColumns <- getOption readerColumns - let widths = if maxlength > numColumns + let lines'' = map (take numcols) lines' + let lineWidths = map (sum . map realLength) (heads' : lines'') + columns <- getOption readerColumns + -- add numcols + 1 for the pipes themselves + let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1) > columns then map (\len -> fromIntegral len / fromIntegral (sum seplengths)) seplengths else replicate (length aligns) 0.0 - return (aligns, widths, toHeaderRow <$> heads', map toRow <$> sequence lines'') + (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads' + (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines'' + return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows) sepPipe :: PandocMonad m => MarkdownParser m () sepPipe = try $ do char '|' <|> char '+' notFollowedBy blankline --- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks]) +-- parse a row, returning raw cell contents +pipeTableRow :: PandocMonad m => MarkdownParser m [Text] pipeTableRow = try $ do scanForPipe skipMany spaceChar @@ -1382,13 +1392,11 @@ pipeTableRow = try $ do -- split into cells let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') <|> void (noneOf "|\n\r") - let cellContents = withRaw (many chunk) >>= - parseFromString' pipeTableCell . trim . snd - cells <- cellContents `sepEndBy1` char '|' + cells <- (snd <$> withRaw (many chunk)) `sepEndBy1` char '|' -- surrounding pipes needed for a one-column table: guard $ not (length cells == 1 && not openPipe) blankline - return $ sequence cells + return cells pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks) pipeTableCell = @@ -1692,21 +1700,29 @@ strikeout = fmap B.strikeout <$> superscript :: PandocMonad m => MarkdownParser m (F Inlines) superscript = do - guardEnabled Ext_superscript fmap B.superscript <$> try (do char '^' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '^')) + mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript)) + where regularSuperscript = many1Till (do guardEnabled Ext_superscript + notFollowedBy spaceChar + notFollowedBy newline + inline) (char '^') + mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts + result <- T.pack <$> many1 alphaNum + return $ return $ return $ B.str result subscript :: PandocMonad m => MarkdownParser m (F Inlines) subscript = do - guardEnabled Ext_subscript fmap B.subscript <$> try (do char '~' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '~')) + mconcat <$> (try regularSubscript <|> mmdShortSubscript)) + where regularSubscript = many1Till (do guardEnabled Ext_subscript + notFollowedBy spaceChar + notFollowedBy newline + inline) (char '~') + mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts + result <- T.pack <$> many1 alphaNum + return $ return $ return $ B.str result whitespace :: PandocMonad m => MarkdownParser m (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" @@ -1768,7 +1784,6 @@ endline = try $ do reference :: PandocMonad m => MarkdownParser m (F Inlines, Text) reference = do guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^") - guardDisabled Ext_citations <|> notFollowedBy' (string "[@") withRaw $ trimInlinesF <$> inlinesInBalancedBrackets parenthesizedChars :: PandocMonad m => MarkdownParser m Text @@ -2187,6 +2202,7 @@ normalCite = try $ do citations <- citeList spnl char ']' + notFollowedBy (oneOf "{([") -- not a link or a bracketed span return citations suffix :: PandocMonad m => MarkdownParser m (F Inlines) @@ -2200,7 +2216,7 @@ suffix = try $ do prefix :: PandocMonad m => MarkdownParser m (F Inlines) prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' + manyTill (notFollowedBy (char ';') >> inline) (char ']' <|> lookAhead (try $ do optional (try (char ';' >> spnl)) citeKey True |