diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 316 |
1 files changed, 199 insertions, 117 deletions
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 278e5cc9d..f06f1d6cc 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} {- -Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2013 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -37,16 +37,17 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (blankline, char, space) -import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy ) +import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) +import Data.Maybe (fromMaybe) +import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy ) import Data.Char ( isSpace, isPunctuation ) import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.State import qualified Data.Set as Set import Text.Pandoc.Writers.HTML (writeHtmlString) -import Text.Pandoc.Readers.TeXMath (readTeXMath') -import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) +import Text.Pandoc.Readers.TeXMath (texMathToInlines) +import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) import Network.URI (isURI) import Data.Default import Data.Yaml (Value(Object,String,Array,Bool,Number)) @@ -77,26 +78,15 @@ writePlain :: WriterOptions -> Pandoc -> String writePlain opts document = evalState (pandocToMarkdown opts{ writerExtensions = Set.delete Ext_escaped_line_breaks $ + Set.delete Ext_pipe_tables $ + Set.delete Ext_raw_html $ + Set.delete Ext_markdown_in_html_blocks $ + Set.delete Ext_raw_tex $ + Set.delete Ext_footnotes $ + Set.delete Ext_tex_math_dollars $ + Set.delete Ext_citations $ writerExtensions opts } - document') def{ stPlain = True } - where document' = plainify document - -plainify :: Pandoc -> Pandoc -plainify = walk go - where go :: Inline -> Inline - go (Emph xs) = SmallCaps xs - go (Strong xs) = SmallCaps xs - go (Strikeout xs) = SmallCaps xs - go (Superscript xs) = SmallCaps xs - go (Subscript xs) = SmallCaps xs - go (SmallCaps xs) = SmallCaps xs - go (Code _ s) = Str s - go (Math _ s) = Str s - go (RawInline _ _) = Str "" - go (Link xs _) = SmallCaps xs - go (Image xs _) = SmallCaps $ [Str "["] ++ xs ++ [Str "]"] - go (Cite _ cits) = SmallCaps cits - go x = x + document) def{ stPlain = True } pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc pandocTitleBlock tit auths dat = @@ -187,7 +177,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do then tableOfContents opts headerBlocks else empty -- Strip off final 'references' header if markdown citations enabled - let blocks' = if not isPlain && isEnabled Ext_citations opts + let blocks' = if isEnabled Ext_citations opts then case reverse blocks of (Div (_,["references"],_) _):xs -> reverse xs _ -> blocks @@ -251,9 +241,20 @@ noteToMarkdown opts num blocks = do else marker <> spacer <> contents -- | Escape special characters for Markdown. -escapeString :: String -> String -escapeString = escapeStringUsing markdownEscapes - where markdownEscapes = backslashEscapes "\\`*_$<>#~^" +escapeString :: WriterOptions -> String -> String +escapeString opts = escapeStringUsing markdownEscapes + where markdownEscapes = backslashEscapes specialChars + specialChars = + (if isEnabled Ext_superscript opts + then ('^':) + else id) . + (if isEnabled Ext_subscript opts + then ('~':) + else id) . + (if isEnabled Ext_tex_math_dollars opts + then ('$':) + else id) $ + "\\`*_<>#" -- | Construct table of contents from list of header blocks. tableOfContents :: WriterOptions -> [Block] -> Doc @@ -308,45 +309,51 @@ blockToMarkdown :: WriterOptions -- ^ Options -> State WriterState Doc blockToMarkdown _ Null = return empty blockToMarkdown opts (Div attrs ils) = do - isPlain <- gets stPlain contents <- blockListToMarkdown opts ils - return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts) - then contents <> blankline - else tagWithAttrs "div" attrs <> blankline <> + return $ if isEnabled Ext_raw_html opts && + isEnabled Ext_markdown_in_html_blocks opts + then tagWithAttrs "div" attrs <> blankline <> contents <> blankline <> "</div>" <> blankline + else contents <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines - return $ contents <> cr + -- escape if para starts with ordered list marker + st <- get + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let rendered = render colwidth contents + let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs + | otherwise = x : escapeDelimiter xs + escapeDelimiter [] = [] + let contents' = if isEnabled Ext_all_symbols_escapable opts && + not (stPlain st) && beginsWithOrderedListMarker rendered + then text $ escapeDelimiter rendered + else contents + return $ contents' <> cr -- title beginning with fig: indicates figure blockToMarkdown opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = blockToMarkdown opts (Para [Image alt (src,tit)]) -blockToMarkdown opts (Para inlines) = do - contents <- inlineListToMarkdown opts inlines - -- escape if para starts with ordered list marker - st <- get - let esc = if isEnabled Ext_all_symbols_escapable opts && - not (stPlain st) && - beginsWithOrderedListMarker (render Nothing contents) - then text "\x200B" -- zero-width space, a hack - else empty - return $ esc <> contents <> blankline +blockToMarkdown opts (Para inlines) = + (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) blockToMarkdown opts (RawBlock f str) | f == "html" = do - st <- get - if stPlain st - then return empty - else return $ if isEnabled Ext_markdown_attribute opts + plain <- gets stPlain + return $ if plain + then empty + else if isEnabled Ext_markdown_attribute opts then text (addMarkdownAttribute str) <> text "\n" else text str <> text "\n" | f `elem` ["latex", "tex", "markdown"] = do - st <- get - if stPlain st - then return empty - else return $ text str <> text "\n" + plain <- gets stPlain + return $ if plain + then empty + else text str <> text "\n" blockToMarkdown _ (RawBlock _ _) = return empty -blockToMarkdown _ HorizontalRule = - return $ blankline <> text "* * * * *" <> blankline +blockToMarkdown opts HorizontalRule = do + return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline blockToMarkdown opts (Header level attr inlines) = do + plain <- gets stPlain -- we calculate the id that would be used by auto_identifiers -- so we know whether to print an explicit identifier ids <- gets stIds @@ -361,19 +368,23 @@ blockToMarkdown opts (Header level attr inlines) = do _ | isEnabled Ext_header_attributes opts -> space <> attrsToMarkdown attr | otherwise -> empty - contents <- inlineListToMarkdown opts inlines - st <- get + contents <- inlineListToMarkdown opts $ + if level == 1 && plain + then capitalize inlines + else inlines let setext = writerSetextHeaders opts return $ nowrap $ case level of - 1 | setext -> + 1 | plain -> blanklines 3 <> contents <> blanklines 2 + | setext -> contents <> attr' <> cr <> text (replicate (offset contents) '=') <> blankline - 2 | setext -> + 2 | plain -> blanklines 2 <> contents <> blankline + | setext -> contents <> attr' <> cr <> text (replicate (offset contents) '-') <> blankline -- ghc interprets '#' characters in column 1 as linenum specifiers. - _ | stPlain st || isEnabled Ext_literate_haskell opts -> + _ | plain || isEnabled Ext_literate_haskell opts -> contents <> blankline _ -> text (replicate level '#') <> space <> contents <> attr' <> blankline blockToMarkdown opts (CodeBlock (_,classes,_) str) @@ -392,21 +403,23 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ xs -> case maximum $ map length xs of n | n < 3 -> "~~~~" | otherwise -> replicate (n+1) '~' - backticks = text "```" + backticks = text $ case [ln | ln <- lines str, all (=='`') ln] of + [] -> "```" + xs -> case maximum $ map length xs of + n | n < 3 -> "```" + | otherwise -> replicate (n+1) '`' attrs = if isEnabled Ext_fenced_code_attributes opts then nowrap $ " " <> attrsToMarkdown attribs else case attribs of - (_,[cls],_) -> " " <> text cls - _ -> empty + (_,(cls:_),_) -> " " <> text cls + _ -> empty blockToMarkdown opts (BlockQuote blocks) = do - st <- get + plain <- gets stPlain -- if we're writing literate haskell, put a space before the bird tracks -- so they won't be interpreted as lhs... let leader = if isEnabled Ext_literate_haskell opts then " > " - else if stPlain st - then " " - else "> " + else if plain then " " else "> " contents <- blockListToMarkdown opts blocks return $ (prefixed leader contents) <> blankline blockToMarkdown opts t@(Table caption aligns widths headers rows) = do @@ -462,23 +475,31 @@ addMarkdownAttribute :: String -> String addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of (xs,(TagOpen t attrs:rest)) -> - renderTags $ reverse rest ++ (TagOpen t attrs' : reverse xs) + renderTags' $ reverse rest ++ (TagOpen t attrs' : reverse xs) where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs, x /= "markdown"] _ -> s pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> State WriterState Doc pipeTable headless aligns rawHeaders rawRows = do + let sp = text " " + let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor AlignRight x y = rblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty + let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows) let torow cs = nowrap $ text "|" <> - hcat (intersperse (text "|") $ map chomp cs) <> text "|" - let toborder (a, h) = let wid = max (offset h) 3 - in text $ case a of - AlignLeft -> ':':replicate (wid - 1) '-' - AlignCenter -> ':':replicate (wid - 2) '-' ++ ":" - AlignRight -> replicate (wid - 1) '-' ++ ":" - AlignDefault -> replicate wid '-' + hcat (intersperse (text "|") $ + zipWith3 blockFor aligns widths (map chomp cs)) + <> text "|" + let toborder (a, w) = text $ case a of + AlignLeft -> ':':replicate (w + 1) '-' + AlignCenter -> ':':replicate w '-' ++ ":" + AlignRight -> replicate (w + 1) '-' ++ ":" + AlignDefault -> replicate (w + 2) '-' let header = if headless then empty else torow rawHeaders - let border = torow $ map toborder $ zip aligns rawHeaders + let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ + map toborder $ zip aligns widths) <> text "|" let body = vcat $ map torow rawRows return $ header $$ border $$ body @@ -592,8 +613,19 @@ definitionListItemToMarkdown opts (label, defs) = do let sps = case writerTabStop opts - 3 of n | n > 0 -> text $ replicate n ' ' _ -> text " " - let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' - return $ nowrap labelText <> cr <> contents <> cr + if isEnabled Ext_compact_definition_lists opts + then do + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) + $ vcat d <> cr) defs' + return $ nowrap labelText <> cr <> contents <> cr + else do + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) + $ vcat d <> cr) defs' + let isTight = case defs of + ((Plain _ : _): _) -> True + _ -> False + return $ blankline <> nowrap labelText <> + (if isTight then cr else blankline) <> contents <> blankline else do return $ nowrap labelText <> text " " <> cr <> vsep (map vsep defs') <> blankline @@ -608,15 +640,21 @@ blockListToMarkdown opts blocks = -- code block will be treated as a list continuation paragraph where fixBlocks (b : CodeBlock attr x : rest) | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) - && isListBlock b = - b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x : - fixBlocks rest + && isListBlock b = b : commentSep : CodeBlock attr x : + fixBlocks rest + fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) fixBlocks (x : xs) = x : fixBlocks xs fixBlocks [] = [] isListBlock (BulletList _) = True isListBlock (OrderedList _ _) = True isListBlock (DefinitionList _) = True isListBlock _ = False + commentSep = RawBlock "html" "<!-- -->\n" -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. @@ -640,7 +678,11 @@ getReference label (src, tit) = do -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) lst >>= return . cat + mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat + where avoidBadWraps [] = [] + avoidBadWraps (Space:Str (c:cs):xs) + | c `elem` "-*+>" = Str (' ':c:cs) : avoidBadWraps xs + avoidBadWraps (x:xs) = x : avoidBadWraps xs escapeSpaces :: Inline -> Inline escapeSpaces (Str s) = Str $ substitute " " "\\ " s @@ -650,57 +692,72 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do - st <- get contents <- inlineListToMarkdown opts ils - return $ if stPlain st - then contents - else tagWithAttrs "span" attrs <> contents <> text "</span>" + return $ if isEnabled Ext_raw_html opts + then tagWithAttrs "span" attrs <> contents <> text "</span>" + else contents inlineToMarkdown opts (Emph lst) = do + plain <- gets stPlain contents <- inlineListToMarkdown opts lst - return $ "*" <> contents <> "*" + return $ if plain + then "_" <> contents <> "_" + else "*" <> contents <> "*" inlineToMarkdown opts (Strong lst) = do - contents <- inlineListToMarkdown opts lst - return $ "**" <> contents <> "**" + plain <- gets stPlain + if plain + then inlineListToMarkdown opts $ capitalize lst + else do + contents <- inlineListToMarkdown opts lst + return $ "**" <> contents <> "**" inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst return $ if isEnabled Ext_strikeout opts then "~~" <> contents <> "~~" else "<s>" <> contents <> "</s>" inlineToMarkdown opts (Superscript lst) = do - let lst' = walk escapeSpaces lst - contents <- inlineListToMarkdown opts lst' + contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_superscript opts then "^" <> contents <> "^" else "<sup>" <> contents <> "</sup>" inlineToMarkdown opts (Subscript lst) = do - let lst' = walk escapeSpaces lst - contents <- inlineListToMarkdown opts lst' + contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_subscript opts then "~" <> contents <> "~" else "<sub>" <> contents <> "</sub>" -inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst +inlineToMarkdown opts (SmallCaps lst) = do + plain <- gets stPlain + if plain + then inlineListToMarkdown opts $ capitalize lst + else do + contents <- inlineListToMarkdown opts lst + return $ tagWithAttrs "span" + ("",[],[("style","font-variant:small-caps;")]) + <> contents <> text "</span>" inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "‘" <> contents <> "’" inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "“" <> contents <> "”" -inlineToMarkdown opts (Code attr str) = +inlineToMarkdown opts (Code attr str) = do let tickGroups = filter (\s -> '`' `elem` s) $ group str - longest = if null tickGroups + let longest = if null tickGroups then 0 else maximum $ map length tickGroups - marker = replicate (longest + 1) '`' - spacer = if (longest == 0) then "" else " " - attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr + let marker = replicate (longest + 1) '`' + let spacer = if (longest == 0) then "" else " " + let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr then attrsToMarkdown attr else empty - in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs -inlineToMarkdown _ (Str str) = do + plain <- gets stPlain + if plain + then return $ text str + else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs +inlineToMarkdown opts (Str str) = do st <- get if stPlain st then return $ text str - else return $ text $ escapeString str + else return $ text $ escapeString opts str inlineToMarkdown opts (Math InlineMath str) | isEnabled Ext_tex_math_dollars opts = return $ "$" <> text str <> "$" @@ -708,7 +765,11 @@ inlineToMarkdown opts (Math InlineMath str) return $ "\\(" <> text str <> "\\)" | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\(" <> text str <> "\\\\)" - | otherwise = inlineListToMarkdown opts $ readTeXMath' InlineMath str + | otherwise = do + plain <- gets stPlain + inlineListToMarkdown opts $ + (if plain then makeMathPlainer else id) $ + texMathToInlines InlineMath str inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_dollars opts = return $ "$$" <> text str <> "$$" @@ -717,16 +778,23 @@ inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\[" <> text str <> "\\\\]" | otherwise = (\x -> cr <> x <> cr) `fmap` - inlineListToMarkdown opts (readTeXMath' DisplayMath str) -inlineToMarkdown opts (RawInline f str) - | f == "html" || f == "markdown" || - (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) = - return $ text str -inlineToMarkdown _ (RawInline _ _) = return empty -inlineToMarkdown opts (LineBreak) - | isEnabled Ext_hard_line_breaks opts = return cr - | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr - | otherwise = return $ " " <> cr + inlineListToMarkdown opts (texMathToInlines DisplayMath str) +inlineToMarkdown opts (RawInline f str) = do + plain <- gets stPlain + if not plain && + ( f == "markdown" || + (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) || + (isEnabled Ext_raw_html opts && f == "html") ) + then return $ text str + else return empty +inlineToMarkdown opts (LineBreak) = do + plain <- gets stPlain + if plain || isEnabled Ext_hard_line_breaks opts + then return cr + else return $ + if isEnabled Ext_escaped_line_breaks opts + then "\\" <> cr + else " " <> cr inlineToMarkdown _ Space = return space inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Cite (c:cs) lst) @@ -759,11 +827,12 @@ inlineToMarkdown opts (Cite (c:cs) lst) modekey SuppressAuthor = "-" modekey _ = "" inlineToMarkdown opts (Link txt (src, tit)) = do + plain <- gets stPlain linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True @@ -772,22 +841,29 @@ inlineToMarkdown opts (Link txt (src, tit)) = do ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto - then "<" <> text srcSuffix <> ">" + then if plain + then text srcSuffix + else "<" <> text srcSuffix <> ">" else if useRefLinks then let first = "[" <> linktext <> "]" second = if txt == ref then "[]" else "[" <> reftext <> "]" in first <> second - else "[" <> linktext <> "](" <> - text src <> linktitle <> ")" + else if plain + then linktext + else "[" <> linktext <> "](" <> + text src <> linktitle <> ")" inlineToMarkdown opts (Image alternate (source, tit)) = do + plain <- gets stPlain let txt = if null alternate || alternate == [Str source] -- to prevent autolinks then [Str ""] else alternate linkPart <- inlineToMarkdown opts (Link txt (source, tit)) - return $ "!" <> linkPart + return $ if plain + then "[" <> linkPart <> "]" + else "!" <> linkPart inlineToMarkdown opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get @@ -795,3 +871,9 @@ inlineToMarkdown opts (Note contents) = do if isEnabled Ext_footnotes opts then return $ "[^" <> ref <> "]" else return $ "[" <> ref <> "]" + +makeMathPlainer :: [Inline] -> [Inline] +makeMathPlainer = walk go + where + go (Emph xs) = Span nullAttr xs + go x = x |