diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 234 |
1 files changed, 144 insertions, 90 deletions
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index a67271a5d..95d4db29b 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -37,16 +37,17 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (blankline, char, space) -import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy ) +import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) +import Data.Maybe (fromMaybe) +import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy ) import Data.Char ( isSpace, isPunctuation ) import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.State import qualified Data.Set as Set import Text.Pandoc.Writers.HTML (writeHtmlString) -import Text.Pandoc.Readers.TeXMath (readTeXMath') -import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) +import Text.Pandoc.Readers.TeXMath (texMathToInlines) +import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) import Network.URI (isURI) import Data.Default import Data.Yaml (Value(Object,String,Array,Bool,Number)) @@ -77,26 +78,15 @@ writePlain :: WriterOptions -> Pandoc -> String writePlain opts document = evalState (pandocToMarkdown opts{ writerExtensions = Set.delete Ext_escaped_line_breaks $ + Set.delete Ext_pipe_tables $ + Set.delete Ext_raw_html $ + Set.delete Ext_markdown_in_html_blocks $ + Set.delete Ext_raw_tex $ + Set.delete Ext_footnotes $ + Set.delete Ext_tex_math_dollars $ + Set.delete Ext_citations $ writerExtensions opts } - document') def{ stPlain = True } - where document' = plainify document - -plainify :: Pandoc -> Pandoc -plainify = walk go - where go :: Inline -> Inline - go (Emph xs) = SmallCaps xs - go (Strong xs) = SmallCaps xs - go (Strikeout xs) = SmallCaps xs - go (Superscript xs) = SmallCaps xs - go (Subscript xs) = SmallCaps xs - go (SmallCaps xs) = SmallCaps xs - go (Code _ s) = Str s - go (Math _ s) = Str s - go (RawInline _ _) = Str "" - go (Link xs _) = SmallCaps xs - go (Image xs _) = SmallCaps $ [Str "["] ++ xs ++ [Str "]"] - go (Cite _ cits) = SmallCaps cits - go x = x + document) def{ stPlain = True } pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc pandocTitleBlock tit auths dat = @@ -187,7 +177,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do then tableOfContents opts headerBlocks else empty -- Strip off final 'references' header if markdown citations enabled - let blocks' = if not isPlain && isEnabled Ext_citations opts + let blocks' = if isEnabled Ext_citations opts then case reverse blocks of (Div (_,["references"],_) _):xs -> reverse xs _ -> blocks @@ -308,12 +298,12 @@ blockToMarkdown :: WriterOptions -- ^ Options -> State WriterState Doc blockToMarkdown _ Null = return empty blockToMarkdown opts (Div attrs ils) = do - isPlain <- gets stPlain contents <- blockListToMarkdown opts ils - return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts) - then contents <> blankline - else tagWithAttrs "div" attrs <> blankline <> + return $ if isEnabled Ext_raw_html opts && + isEnabled Ext_markdown_in_html_blocks opts + then tagWithAttrs "div" attrs <> blankline <> contents <> blankline <> "</div>" <> blankline + else contents <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines -- escape if para starts with ordered list marker @@ -337,21 +327,22 @@ blockToMarkdown opts (Para inlines) = (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) blockToMarkdown opts (RawBlock f str) | f == "html" = do - st <- get - if stPlain st - then return empty - else return $ if isEnabled Ext_markdown_attribute opts + plain <- gets stPlain + return $ if plain + then empty + else if isEnabled Ext_markdown_attribute opts then text (addMarkdownAttribute str) <> text "\n" else text str <> text "\n" | f `elem` ["latex", "tex", "markdown"] = do - st <- get - if stPlain st - then return empty - else return $ text str <> text "\n" + plain <- gets stPlain + return $ if plain + then empty + else text str <> text "\n" blockToMarkdown _ (RawBlock _ _) = return empty -blockToMarkdown _ HorizontalRule = - return $ blankline <> text "* * * * *" <> blankline +blockToMarkdown opts HorizontalRule = do + return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline blockToMarkdown opts (Header level attr inlines) = do + plain <- gets stPlain -- we calculate the id that would be used by auto_identifiers -- so we know whether to print an explicit identifier ids <- gets stIds @@ -366,19 +357,23 @@ blockToMarkdown opts (Header level attr inlines) = do _ | isEnabled Ext_header_attributes opts -> space <> attrsToMarkdown attr | otherwise -> empty - contents <- inlineListToMarkdown opts inlines - st <- get + contents <- inlineListToMarkdown opts $ + if level == 1 && plain + then capitalize inlines + else inlines let setext = writerSetextHeaders opts return $ nowrap $ case level of - 1 | setext -> + 1 | plain -> blanklines 3 <> contents <> blanklines 2 + | setext -> contents <> attr' <> cr <> text (replicate (offset contents) '=') <> blankline - 2 | setext -> + 2 | plain -> blanklines 2 <> contents <> blankline + | setext -> contents <> attr' <> cr <> text (replicate (offset contents) '-') <> blankline -- ghc interprets '#' characters in column 1 as linenum specifiers. - _ | stPlain st || isEnabled Ext_literate_haskell opts -> + _ | plain || isEnabled Ext_literate_haskell opts -> contents <> blankline _ -> text (replicate level '#') <> space <> contents <> attr' <> blankline blockToMarkdown opts (CodeBlock (_,classes,_) str) @@ -405,17 +400,15 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ attrs = if isEnabled Ext_fenced_code_attributes opts then nowrap $ " " <> attrsToMarkdown attribs else case attribs of - (_,[cls],_) -> " " <> text cls - _ -> empty + (_,(cls:_),_) -> " " <> text cls + _ -> empty blockToMarkdown opts (BlockQuote blocks) = do - st <- get + plain <- gets stPlain -- if we're writing literate haskell, put a space before the bird tracks -- so they won't be interpreted as lhs... let leader = if isEnabled Ext_literate_haskell opts then " > " - else if stPlain st - then " " - else "> " + else if plain then " " else "> " contents <- blockListToMarkdown opts blocks return $ (prefixed leader contents) <> blankline blockToMarkdown opts t@(Table caption aligns widths headers rows) = do @@ -471,7 +464,7 @@ addMarkdownAttribute :: String -> String addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of (xs,(TagOpen t attrs:rest)) -> - renderTags $ reverse rest ++ (TagOpen t attrs' : reverse xs) + renderTags' $ reverse rest ++ (TagOpen t attrs' : reverse xs) where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs, x /= "markdown"] _ -> s @@ -609,8 +602,19 @@ definitionListItemToMarkdown opts (label, defs) = do let sps = case writerTabStop opts - 3 of n | n > 0 -> text $ replicate n ' ' _ -> text " " - let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' - return $ nowrap labelText <> cr <> contents <> cr + if isEnabled Ext_compact_definition_lists opts + then do + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) + $ vcat d <> cr) defs' + return $ nowrap labelText <> cr <> contents <> cr + else do + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) + $ vcat d <> cr) defs' + let isTight = case defs of + ((Plain _ : _): _) -> True + _ -> False + return $ blankline <> nowrap labelText <> + (if isTight then cr else blankline) <> contents <> blankline else do return $ nowrap labelText <> text " " <> cr <> vsep (map vsep defs') <> blankline @@ -625,15 +629,21 @@ blockListToMarkdown opts blocks = -- code block will be treated as a list continuation paragraph where fixBlocks (b : CodeBlock attr x : rest) | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) - && isListBlock b = - b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x : - fixBlocks rest + && isListBlock b = b : commentSep : CodeBlock attr x : + fixBlocks rest + fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) fixBlocks (x : xs) = x : fixBlocks xs fixBlocks [] = [] isListBlock (BulletList _) = True isListBlock (OrderedList _ _) = True isListBlock (DefinitionList _) = True isListBlock _ = False + commentSep = RawBlock "html" "<!-- -->\n" -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. @@ -657,7 +667,11 @@ getReference label (src, tit) = do -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) lst >>= return . cat + mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat + where avoidBadWraps [] = [] + avoidBadWraps (Space:Str (c:cs):xs) + | c `elem` "-*+>" = Str (' ':c:cs) : avoidBadWraps xs + avoidBadWraps (x:xs) = x : avoidBadWraps xs escapeSpaces :: Inline -> Inline escapeSpaces (Str s) = Str $ substitute " " "\\ " s @@ -667,52 +681,67 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do - st <- get contents <- inlineListToMarkdown opts ils - return $ if stPlain st - then contents - else tagWithAttrs "span" attrs <> contents <> text "</span>" + return $ if isEnabled Ext_raw_html opts + then tagWithAttrs "span" attrs <> contents <> text "</span>" + else contents inlineToMarkdown opts (Emph lst) = do + plain <- gets stPlain contents <- inlineListToMarkdown opts lst - return $ "*" <> contents <> "*" + return $ if plain + then "_" <> contents <> "_" + else "*" <> contents <> "*" inlineToMarkdown opts (Strong lst) = do - contents <- inlineListToMarkdown opts lst - return $ "**" <> contents <> "**" + plain <- gets stPlain + if plain + then inlineListToMarkdown opts $ capitalize lst + else do + contents <- inlineListToMarkdown opts lst + return $ "**" <> contents <> "**" inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst return $ if isEnabled Ext_strikeout opts then "~~" <> contents <> "~~" else "<s>" <> contents <> "</s>" inlineToMarkdown opts (Superscript lst) = do - let lst' = walk escapeSpaces lst - contents <- inlineListToMarkdown opts lst' + contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_superscript opts then "^" <> contents <> "^" else "<sup>" <> contents <> "</sup>" inlineToMarkdown opts (Subscript lst) = do - let lst' = walk escapeSpaces lst - contents <- inlineListToMarkdown opts lst' + contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_subscript opts then "~" <> contents <> "~" else "<sub>" <> contents <> "</sub>" -inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst +inlineToMarkdown opts (SmallCaps lst) = do + plain <- gets stPlain + if plain + then inlineListToMarkdown opts $ capitalize lst + else do + contents <- inlineListToMarkdown opts lst + return $ tagWithAttrs "span" + ("",[],[("style","font-variant:small-caps;")]) + <> contents <> text "</span>" inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "‘" <> contents <> "’" inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "“" <> contents <> "”" -inlineToMarkdown opts (Code attr str) = +inlineToMarkdown opts (Code attr str) = do let tickGroups = filter (\s -> '`' `elem` s) $ group str - longest = if null tickGroups + let longest = if null tickGroups then 0 else maximum $ map length tickGroups - marker = replicate (longest + 1) '`' - spacer = if (longest == 0) then "" else " " - attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr + let marker = replicate (longest + 1) '`' + let spacer = if (longest == 0) then "" else " " + let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr then attrsToMarkdown attr else empty - in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs + plain <- gets stPlain + if plain + then return $ text str + else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs inlineToMarkdown _ (Str str) = do st <- get if stPlain st @@ -725,7 +754,11 @@ inlineToMarkdown opts (Math InlineMath str) return $ "\\(" <> text str <> "\\)" | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\(" <> text str <> "\\\\)" - | otherwise = inlineListToMarkdown opts $ readTeXMath' InlineMath str + | otherwise = do + plain <- gets stPlain + inlineListToMarkdown opts $ + (if plain then makeMathPlainer else id) $ + texMathToInlines InlineMath str inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_dollars opts = return $ "$$" <> text str <> "$$" @@ -734,16 +767,23 @@ inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\[" <> text str <> "\\\\]" | otherwise = (\x -> cr <> x <> cr) `fmap` - inlineListToMarkdown opts (readTeXMath' DisplayMath str) -inlineToMarkdown opts (RawInline f str) - | f == "html" || f == "markdown" || - (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) = - return $ text str -inlineToMarkdown _ (RawInline _ _) = return empty -inlineToMarkdown opts (LineBreak) - | isEnabled Ext_hard_line_breaks opts = return cr - | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr - | otherwise = return $ " " <> cr + inlineListToMarkdown opts (texMathToInlines DisplayMath str) +inlineToMarkdown opts (RawInline f str) = do + plain <- gets stPlain + if not plain && + ( f == "markdown" || + (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) || + (isEnabled Ext_raw_html opts && f == "html") ) + then return $ text str + else return empty +inlineToMarkdown opts (LineBreak) = do + plain <- gets stPlain + if plain || isEnabled Ext_hard_line_breaks opts + then return cr + else return $ + if isEnabled Ext_escaped_line_breaks opts + then "\\" <> cr + else " " <> cr inlineToMarkdown _ Space = return space inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Cite (c:cs) lst) @@ -776,11 +816,12 @@ inlineToMarkdown opts (Cite (c:cs) lst) modekey SuppressAuthor = "-" modekey _ = "" inlineToMarkdown opts (Link txt (src, tit)) = do + plain <- gets stPlain linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True @@ -789,22 +830,29 @@ inlineToMarkdown opts (Link txt (src, tit)) = do ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto - then "<" <> text srcSuffix <> ">" + then if plain + then text srcSuffix + else "<" <> text srcSuffix <> ">" else if useRefLinks then let first = "[" <> linktext <> "]" second = if txt == ref then "[]" else "[" <> reftext <> "]" in first <> second - else "[" <> linktext <> "](" <> - text src <> linktitle <> ")" + else if plain + then linktext + else "[" <> linktext <> "](" <> + text src <> linktitle <> ")" inlineToMarkdown opts (Image alternate (source, tit)) = do + plain <- gets stPlain let txt = if null alternate || alternate == [Str source] -- to prevent autolinks then [Str ""] else alternate linkPart <- inlineToMarkdown opts (Link txt (source, tit)) - return $ "!" <> linkPart + return $ if plain + then "[" <> linkPart <> "]" + else "!" <> linkPart inlineToMarkdown opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get @@ -812,3 +860,9 @@ inlineToMarkdown opts (Note contents) = do if isEnabled Ext_footnotes opts then return $ "[^" <> ref <> "]" else return $ "[" <> ref <> "]" + +makeMathPlainer :: [Inline] -> [Inline] +makeMathPlainer = walk go + where + go (Emph xs) = Span nullAttr xs + go x = x |