diff options
author | John MacFarlane <jgm@berkeley.edu> | 2020-07-18 15:17:06 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2020-07-19 22:51:59 -0700 |
commit | d6b7b1dc772249e9a1b56bcdd4ae816cc54edb51 (patch) | |
tree | 59c29c5daecd6897d8756a1b04b53bcf382aff6d /src/Text/Pandoc/Writers/CommonMark.hs | |
parent | a63105fffffeea18bb258f31f6fdf2e2d3730eaa (diff) | |
download | pandoc-d6b7b1dc772249e9a1b56bcdd4ae816cc54edb51.tar.gz |
Remove use of cmark-gfm for commonmark/gfm rendering.
Instead rely on the markdown writer with appropriate extensions.
Export writeCommonMark variant from Markdown writer.
This changes a few small things in rendering markdown,
e.g. w/r/t requiring backslashes before spaces inside
super/subscripts.
Diffstat (limited to 'src/Text/Pandoc/Writers/CommonMark.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/CommonMark.hs | 350 |
1 files changed, 1 insertions, 349 deletions
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index ca6a4e334..e991cd384 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -15,353 +15,5 @@ CommonMark: <http://commonmark.org> -} module Text.Pandoc.Writers.CommonMark (writeCommonMark) where -import CMarkGFM -import Control.Monad.State.Strict (State, get, modify, runState) -import Data.Char (isAscii) -import Data.Foldable (foldrM) -import Data.List (transpose) -import Data.Text (Text) -import qualified Data.Text as T -import Network.HTTP (urlEncode) -import Text.Pandoc.Class.PandocMonad (PandocMonad) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared (capitalize, isTightList, - linesToPara, onlySimpleTableCells, taskListItemToAscii, tshow) -import Text.Pandoc.Templates (renderTemplate) -import Text.Pandoc.Walk (walk, walkM) -import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) -import Text.Pandoc.Writers.Shared -import Text.Pandoc.XML (toHtml5Entities) -import Text.DocLayout (literal, render) +import Text.Pandoc.Writers.Markdown (writeCommonMark) --- | Convert Pandoc to CommonMark. -writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text -writeCommonMark opts (Pandoc meta blocks) = do - toc <- if writerTableOfContents opts - then blocksToCommonMark opts [ toTableOfContents opts blocks ] - else return mempty - - let (blocks', notes) = runState (walkM processNotes blocks) [] - notes' = [OrderedList (1, Decimal, Period) $ reverse notes | not (null notes)] - main <- blocksToCommonMark opts (blocks' ++ notes') - metadata <- metaToContext opts - (fmap (literal . T.stripEnd) . blocksToCommonMark opts) - (fmap (literal . T.stripEnd) . inlinesToCommonMark opts) - meta - let context = - -- for backwards compatibility we populate toc - -- with the contents of the toc, rather than a boolean: - defField "toc" toc - $ defField "table-of-contents" toc - $ defField "body" main metadata - return $ - case writerTemplate opts of - Nothing -> main - Just tpl -> render Nothing $ renderTemplate tpl context - -softBreakToSpace :: Inline -> Inline -softBreakToSpace SoftBreak = Space -softBreakToSpace x = x - -processNotes :: Inline -> State [[Block]] Inline -processNotes (Note bs) = do - modify (bs :) - notes <- get - return $ Str $ "[" <> tshow (length notes) <> "]" -processNotes x = return x - -node :: NodeType -> [Node] -> Node -node = Node Nothing - -blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m Text -blocksToCommonMark opts bs = do - let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - nodes <- blocksToNodes opts bs - return $ T.stripEnd $ - nodeToCommonmark cmarkOpts colwidth $ - node DOCUMENT nodes - -inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m Text -inlinesToCommonMark opts ils = return $ - nodeToCommonmark cmarkOpts colwidth $ - node PARAGRAPH (inlinesToNodes opts ils) - where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - -blocksToNodes :: PandocMonad m => WriterOptions -> [Block] -> m [Node] -blocksToNodes opts = foldrM (blockToNodes opts) [] - -blockToNodes :: PandocMonad m => WriterOptions -> Block -> [Node] -> m [Node] -blockToNodes opts (Plain xs) ns = - return (node PARAGRAPH (inlinesToNodes opts xs) : ns) -blockToNodes opts (Para xs) ns = - return (node PARAGRAPH (inlinesToNodes opts xs) : ns) -blockToNodes opts (LineBlock lns) ns = blockToNodes opts (linesToPara lns) ns -blockToNodes _ (CodeBlock (_,classes,_) xs) ns = return - (node (CODE_BLOCK (T.unwords classes) xs) [] : ns) -blockToNodes opts (RawBlock (Format f) xs) ns - | f == "html" && isEnabled Ext_raw_html opts - = return (node (HTML_BLOCK xs) [] : ns) - | (f == "latex" || f == "tex") && isEnabled Ext_raw_tex opts - = return (node (CUSTOM_BLOCK xs T.empty) [] : ns) - | f == "markdown" - = return (node (CUSTOM_BLOCK xs T.empty) [] : ns) - | otherwise = return ns -blockToNodes opts (BlockQuote bs) ns = do - nodes <- blocksToNodes opts bs - return (node BLOCK_QUOTE nodes : ns) -blockToNodes opts (BulletList items) ns = do - let exts = writerExtensions opts - nodes <- mapM (blocksToNodes opts . taskListItemToAscii exts) items - return (node (LIST ListAttributes{ - listType = BULLET_LIST, - listDelim = PERIOD_DELIM, - listTight = isTightList items, - listStart = 1 }) (map (node ITEM) nodes) : ns) -blockToNodes opts (OrderedList (start, _sty, delim) items) ns = do - let exts = writerExtensions opts - nodes <- mapM (blocksToNodes opts . taskListItemToAscii exts) items - return (node (LIST ListAttributes{ - listType = ORDERED_LIST, - listDelim = case delim of - OneParen -> PAREN_DELIM - TwoParens -> PAREN_DELIM - _ -> PERIOD_DELIM, - listTight = isTightList items, - listStart = start }) (map (node ITEM) nodes) : ns) -blockToNodes _ HorizontalRule ns = return (node THEMATIC_BREAK [] : ns) -blockToNodes opts (Header lev _ ils) ns = - return (node (HEADING lev) (inlinesToNodes opts ils) : ns) -blockToNodes opts (Div attr bs) ns = do - nodes <- blocksToNodes opts bs - let op = tagWithAttributes opts True False "div" attr - if isEnabled Ext_raw_html opts - then return (node (HTML_BLOCK op) [] : nodes ++ - [node (HTML_BLOCK (T.pack "</div>")) []] ++ ns) - else return (nodes ++ ns) -blockToNodes opts (DefinitionList items) ns = - blockToNodes opts (BulletList items') ns - where items' = map dlToBullet items - dlToBullet (term, (Para xs : ys) : zs) = - Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs - dlToBullet (term, (Plain xs : ys) : zs) = - Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs - dlToBullet (term, xs) = - Para term : concat xs -blockToNodes opts t@(Table _ blkCapt specs thead tbody tfoot) ns = - let (capt, aligns, _widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot - in if isEnabled Ext_pipe_tables opts && onlySimpleTableCells (headers : rows) - then do - -- We construct a table manually as a CUSTOM_BLOCK, for - -- two reasons: (1) cmark-gfm currently doesn't support - -- rendering TABLE nodes; (2) we can align the column sides; - -- (3) we can render the caption as a regular paragraph. - let capt' = node PARAGRAPH (inlinesToNodes opts capt) - -- backslash | in code and raw: - let fixPipe (Code attr xs) = - Code attr (T.replace "|" "\\|" xs) - fixPipe (RawInline format xs) = - RawInline format (T.replace "|" "\\|" xs) - fixPipe x = x - let toCell [Plain ils] = T.strip - $ nodeToCommonmark [] Nothing - $ node (CUSTOM_INLINE mempty mempty) - $ inlinesToNodes opts - $ walk (fixPipe . softBreakToSpace) ils - toCell [Para ils] = T.strip - $ nodeToCommonmark [] Nothing - $ node (CUSTOM_INLINE mempty mempty) - $ inlinesToNodes opts - $ walk (fixPipe . softBreakToSpace) ils - toCell [] = "" - toCell xs = error $ "toCell encountered " ++ show xs - let separator = " | " - let starter = "| " - let ender = " |" - let rawheaders = map toCell headers - let rawrows = map (map toCell) rows - let maximum' [] = 0 - maximum' xs = maximum xs - let colwidths = map (maximum' . map T.length) $ - transpose (rawheaders:rawrows) - let toHeaderLine len AlignDefault = T.replicate len "-" - toHeaderLine len AlignLeft = ":" <> - T.replicate (max (len - 1) 1) "-" - toHeaderLine len AlignRight = - T.replicate (max (len - 1) 1) "-" <> ":" - toHeaderLine len AlignCenter = ":" <> - T.replicate (max (len - 2) 1) (T.pack "-") <> ":" - let rawheaderlines = zipWith toHeaderLine colwidths aligns - let headerlines = starter <> T.intercalate separator rawheaderlines <> - ender - let padContent (align, w) t' = - let padding = w - T.length t' - halfpadding = padding `div` 2 - in case align of - AlignRight -> T.replicate padding " " <> t' - AlignCenter -> T.replicate halfpadding " " <> t' <> - T.replicate (padding - halfpadding) " " - _ -> t' <> T.replicate padding " " - let toRow xs = starter <> T.intercalate separator - (zipWith padContent (zip aligns colwidths) xs) <> - ender - let table' = toRow rawheaders <> "\n" <> headerlines <> "\n" <> - T.intercalate "\n" (map toRow rawrows) - return (node (CUSTOM_BLOCK table' mempty) [] : - if null capt - then ns - else capt' : ns) - else do -- fall back to raw HTML - s <- writeHtml5String def $! Pandoc nullMeta [t] - return (node (HTML_BLOCK s) [] : ns) -blockToNodes _ Null ns = return ns - -inlinesToNodes :: WriterOptions -> [Inline] -> [Node] -inlinesToNodes opts = foldr (inlineToNodes opts) [] - -inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node] -inlineToNodes opts (Str s) = stringToNodes opts s' - where s' = if isEnabled Ext_smart opts - then unsmartify opts s - else s -inlineToNodes _ Space = (node (TEXT (T.pack " ")) [] :) -inlineToNodes _ LineBreak = (node LINEBREAK [] :) -inlineToNodes opts SoftBreak - | isEnabled Ext_hard_line_breaks opts = (node (TEXT " ") [] :) - | writerWrapText opts == WrapNone = (node (TEXT " ") [] :) - | otherwise = (node SOFTBREAK [] :) -inlineToNodes opts (Emph xs) = (node EMPH (inlinesToNodes opts xs) :) -inlineToNodes opts (Underline xs) - | isEnabled Ext_raw_html opts = - ((node (HTML_INLINE (T.pack "<u>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</u>")) []]) ++ ) - | otherwise = (node EMPH (inlinesToNodes opts xs) :) -inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :) -inlineToNodes opts (Strikeout xs) - | isEnabled Ext_strikeout opts = (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :) - | isEnabled Ext_raw_html opts = ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) - | otherwise = (inlinesToNodes opts xs ++) -inlineToNodes opts (Superscript xs) = - if isEnabled Ext_raw_html opts - then ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) - else case traverse toSuperscriptInline xs of - Just xs' | not (writerPreferAscii opts) - -> (inlinesToNodes opts xs' ++) - _ -> - ((node (TEXT (T.pack "^(")) [] : inlinesToNodes opts xs ++ - [node (TEXT (T.pack ")")) []]) ++ ) -inlineToNodes opts (Subscript xs) = - if isEnabled Ext_raw_html opts - then ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) - else case traverse toSubscriptInline xs of - Just xs' | not (writerPreferAscii opts) - -> (inlinesToNodes opts xs' ++) - _ -> - ((node (TEXT (T.pack "_(")) [] : inlinesToNodes opts xs ++ - [node (TEXT (T.pack ")")) []]) ++ ) -inlineToNodes opts (SmallCaps xs) = - if isEnabled Ext_raw_html opts - then ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] - : inlinesToNodes opts xs ++ - [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) - else (inlinesToNodes opts (capitalize xs) ++) -inlineToNodes opts (Link _ ils (url,tit)) = - (node (LINK url tit) (inlinesToNodes opts ils) :) --- title beginning with fig: indicates implicit figure -inlineToNodes opts (Image alt ils (url,T.stripPrefix "fig:" -> Just tit)) = - inlineToNodes opts (Image alt ils (url,tit)) -inlineToNodes opts (Image _ ils (url,tit)) = - (node (IMAGE url tit) (inlinesToNodes opts ils) :) -inlineToNodes opts (RawInline (Format f) xs) - | f == "html" && isEnabled Ext_raw_html opts - = (node (HTML_INLINE xs) [] :) - | (f == "latex" || f == "tex") && isEnabled Ext_raw_tex opts - = (node (CUSTOM_INLINE xs T.empty) [] :) - | f == "markdown" - = (node (CUSTOM_INLINE xs T.empty) [] :) - | otherwise = id -inlineToNodes opts (Quoted qt ils) = - ((node (HTML_INLINE start) [] : - inlinesToNodes opts ils ++ [node (HTML_INLINE end) []]) ++) - where (start, end) = case qt of - SingleQuote - | isEnabled Ext_smart opts -> ("'","'") - | writerPreferAscii opts -> - ("‘", "’") - | otherwise -> ("‘", "’") - DoubleQuote - | isEnabled Ext_smart opts -> ("\"", "\"") - | writerPreferAscii opts -> - ("“", "”") - | otherwise -> ("“", "”") -inlineToNodes _ (Code _ str) = (node (CODE str) [] :) -inlineToNodes opts (Math mt str) = - case writerHTMLMathMethod opts of - WebTeX url -> - let core = inlineToNodes opts - (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) - sep = if mt == DisplayMath - then (node LINEBREAK [] :) - else id - in (sep . core . sep) - _ -> - case mt of - InlineMath -> - (node (HTML_INLINE ("\\(" <> str <> "\\)")) [] :) - DisplayMath -> - (node (HTML_INLINE ("\\[" <> str <> "\\]")) [] :) -inlineToNodes opts (Span ("",["emoji"],kvs) [Str s]) = - case lookup "data-emoji" kvs of - Just emojiname | isEnabled Ext_emoji opts -> - (node (TEXT (":" <> emojiname <> ":")) [] :) - _ -> (node (TEXT s) [] :) -inlineToNodes opts (Span attr ils) = - let nodes = inlinesToNodes opts ils - op = tagWithAttributes opts True False "span" attr - in if isEnabled Ext_raw_html opts - then ((node (HTML_INLINE op) [] : nodes ++ - [node (HTML_INLINE (T.pack "</span>")) []]) ++) - else (nodes ++) -inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++) -inlineToNodes _ (Note _) = id -- should not occur --- we remove Note elements in preprocessing - -stringToNodes :: WriterOptions -> Text -> [Node] -> [Node] -stringToNodes opts s - | not (writerPreferAscii opts) = (node (TEXT s) [] :) - | otherwise = step s - where - step input = - let (ascii, rest) = T.span isAscii input - this = node (TEXT ascii) [] - nodes = case T.uncons rest of - Nothing -> id - Just (nonAscii, rest') -> - let escaped = toHtml5Entities (T.singleton nonAscii) - in (node (HTML_INLINE escaped) [] :) . step rest' - in (this :) . nodes - -toSubscriptInline :: Inline -> Maybe Inline -toSubscriptInline Space = Just Space -toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils -toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s) -toSubscriptInline LineBreak = Just LineBreak -toSubscriptInline SoftBreak = Just SoftBreak -toSubscriptInline _ = Nothing - -toSuperscriptInline :: Inline -> Maybe Inline -toSuperscriptInline Space = Just Space -toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils -toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s) -toSuperscriptInline LineBreak = Just LineBreak -toSuperscriptInline SoftBreak = Just SoftBreak -toSuperscriptInline _ = Nothing |