diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 108 |
1 files changed, 69 insertions, 39 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index e1c481311..ab6a32b78 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -52,15 +52,17 @@ import System.FilePath (addExtension, takeExtension) import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..), report) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) -import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) +import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline, applyMacros, + macro) import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) @@ -74,7 +76,7 @@ readMarkdown :: PandocMonad m -> m Pandoc readMarkdown opts s = do parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -291,18 +293,22 @@ ignorable t = (T.pack "_") `T.isSuffixOf` t toMetaValue :: PandocMonad m => Text -> MarkdownParser m (F MetaValue) -toMetaValue x = toMeta <$> parseFromString' parseBlocks (T.unpack x) - where - toMeta p = do - p' <- p - return $ - case B.toList p' of - [Plain xs] -> MetaInlines xs - [Para xs] - | endsWithNewline x -> MetaBlocks [Para xs] - | otherwise -> MetaInlines xs - bs -> MetaBlocks bs - endsWithNewline t = T.pack "\n" `T.isSuffixOf` t +toMetaValue x = + parseFromString' parser' (T.unpack x) + where parser' = (asInlines <$> ((trimInlinesF . mconcat) + <$> (guard (not endsWithNewline) + *> manyTill inline eof))) + <|> (asBlocks <$> parseBlocks) + asBlocks p = do + p' <- p + return $ MetaBlocks (B.toList p') + asInlines p = do + p' <- p + return $ MetaInlines (B.toList p') + endsWithNewline = T.pack "\n" `T.isSuffixOf` x + -- Note: a standard quoted or unquoted YAML value will + -- not end in a newline, but a "block" set off with + -- `|` or `>` will. yamlToMeta :: PandocMonad m => Yaml.Value -> MarkdownParser m (F MetaValue) @@ -368,13 +374,14 @@ parseMarkdown = do -- lookup to get sourcepos case M.lookup n (stateNotes' st) of Just (pos, _) -> report (NoteDefinedButNotUsed n pos) - Nothing -> error "The impossible happened.") notesDefined + Nothing -> throwError $ + PandocShouldNeverHappenError "note not found") + notesDefined let doc = runF (do Pandoc _ bs <- B.doc <$> blocks meta <- stateMeta' st return $ Pandoc meta bs) st reportLogMessages - (do guardEnabled Ext_east_asian_line_breaks - return $ eastAsianLineBreakFilter doc) <|> return doc + return doc referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do @@ -488,7 +495,6 @@ parseBlocks = mconcat <$> manyTill block eof block :: PandocMonad m => MarkdownParser m (F Blocks) block = do - pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock @@ -514,8 +520,7 @@ block = do , para , plain ] <?> "block" - report $ ParsingTrace - (take 60 $ show $ B.toList $ runF res defaultParserState) pos + trace (take 60 $ show $ B.toList $ runF res defaultParserState) return res -- @@ -680,19 +685,36 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) +rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute = do + char '{' + skipMany spaceChar + char '=' + format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + skipMany spaceChar + char '}' + return format + codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing skipMany spaceChar - attr <- option ([],[],[]) $ - try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar) + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_fenced_code_attributes >> attributes) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) blankline - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) + contents <- intercalate "\n" <$> + manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ return $ + case rawattr of + Left syn -> B.rawBlock syn contents + Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers toLanguageId :: String -> String @@ -1013,7 +1035,8 @@ para = try $ do result' <- result case B.toList result' of [Image attr alt (src,tit)] - | Ext_implicit_figures `extensionEnabled` exts -> + | not (null alt) && + Ext_implicit_figures `extensionEnabled` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton $ Image attr alt (src,'f':'i':'g':':':tit) @@ -1083,10 +1106,11 @@ latexMacro = try $ do rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) - <|> (B.rawBlock "context" . concat <$> + result <- (B.rawBlock "context" . concat <$> rawConTeXtEnvironment `sepEndBy1` blankline) + <|> (B.rawBlock "latex" . concat <$> + rawLaTeXBlock `sepEndBy1` blankline) + spaces return $ return result @@ -1515,17 +1539,24 @@ code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + result <- (trim . concat) <$> + many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes - >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_inline_code_attributes >> attributes))) + return $ return $ + case rawattr of + Left syn -> B.rawInline syn result + Right attr -> B.codeWith attr result math :: PandocMonad m => MarkdownParser m (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros)) + <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?> (guardEnabled Ext_smart *> (return <$> apostrophe) <* notFollowedBy (space <|> satisfy isPunctuation)) @@ -1849,9 +1880,8 @@ rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead (char '\\') notFollowedBy' rawConTeXtEnvironment - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s - -- "tex" because it might be context or latex + s <- rawLaTeXInline + return $ return $ B.rawInline "tex" s -- "tex" because it might be context rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do |