diff options
| -rw-r--r-- | README | 83 | ||||
| -rw-r--r-- | pandoc.cabal | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc.hs | 3 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/CommonMark.hs | 178 | 
4 files changed, 225 insertions, 40 deletions
| @@ -158,22 +158,22 @@ General options  :   Specify input format.  *FORMAT* can be `native` (native Haskell),      `json` (JSON version of native AST), `markdown` (pandoc's -    extended markdown), `markdown_strict` (original unextended markdown), -    `markdown_phpextra` (PHP Markdown Extra extended markdown), -    `markdown_github` (github extended markdown), -    `commonmark` (CommonMark markdown), -    `textile` (Textile), `rst` (reStructuredText), `html` (HTML), -    `docbook` (DocBook), `t2t` (txt2tags), `docx` (docx), `epub` (EPUB), -    `opml` (OPML), `org` (Emacs Org-mode), `mediawiki` (MediaWiki markup), -    `twiki` (TWiki markup), `haddock` (Haddock markup), or `latex` (LaTeX). -    If `+lhs` is appended to `markdown`, `rst`, -    `latex`, or `html`, the input will be treated as literate Haskell -    source: see [Literate Haskell support](#literate-haskell-support), -    below. Markdown syntax extensions can be individually enabled or -    disabled by appending `+EXTENSION` or `-EXTENSION` to the format -    name. So, for example, `markdown_strict+footnotes+definition_lists` -    is strict markdown with footnotes and definition lists enabled, -    and `markdown-pipe_tables+hard_line_breaks` is pandoc's markdown +    extended markdown), `markdown_strict` (original unextended +    markdown), `markdown_phpextra` (PHP Markdown Extra extended +    markdown), `markdown_github` (github extended markdown), +    `commonmark` (CommonMark markdown), `textile` (Textile), `rst` +    (reStructuredText), `html` (HTML), `docbook` (DocBook), `t2t` +    (txt2tags), `docx` (docx), `epub` (EPUB), `opml` (OPML), `org` +    (Emacs Org-mode), `mediawiki` (MediaWiki markup), `twiki` (TWiki +    markup), `haddock` (Haddock markup), or `latex` (LaTeX).  If +    `+lhs` is appended to `markdown`, `rst`, `latex`, or `html`, the +    input will be treated as literate Haskell source: see [Literate +    Haskell support](#literate-haskell-support), below. Markdown +    syntax extensions can be individually enabled or disabled by +    appending `+EXTENSION` or `-EXTENSION` to the format name. So, for +    example, `markdown_strict+footnotes+definition_lists` is strict +    markdown with footnotes and definition lists enabled, and +    `markdown-pipe_tables+hard_line_breaks` is pandoc's markdown      without pipe tables and with hard line breaks. See [Pandoc's      markdown](#pandocs-markdown), below, for a list of extensions and      their names. @@ -182,30 +182,33 @@ General options  :   Specify output format.  *FORMAT* can be `native` (native Haskell),      `json` (JSON version of native AST), `plain` (plain text), -    `markdown` (pandoc's extended markdown), `markdown_strict` (original -    unextended markdown), `markdown_phpextra` (PHP Markdown extra -    extended markdown), `markdown_github` (github extended markdown), -    `rst` (reStructuredText), `html` (XHTML 1), `html5` (HTML 5), -    `latex` (LaTeX), `beamer` (LaTeX beamer slide show), -    `context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup), -    `dokuwiki` (DokuWiki markup), -    `textile` (Textile), `org` (Emacs Org-Mode), `texinfo` (GNU Texinfo), -    `opml` (OPML), `docbook` (DocBook), `opendocument` (OpenDocument), `odt` -    (OpenOffice text document), `docx` (Word docx), `haddock` (Haddock -    markup), `rtf` (rich text format), `epub` (EPUB v2 book), `epub3` -    (EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc), -    `icml` (InDesign ICML), `slidy` (Slidy HTML and javascript slide show), -    `slideous` (Slideous HTML and javascript slide show), `dzslides` -    (DZSlides HTML5 + javascript slide show), `revealjs` (reveal.js -    HTML5 + javascript slide show), `s5` (S5 HTML and javascript slide show), -    or the path of a custom lua writer (see [Custom writers](#custom-writers), -    below). Note that `odt`, `epub`, and `epub3` output will not be directed -    to *stdout*; an output filename must be specified using the `-o/--output` -    option. If `+lhs` is appended to `markdown`, `rst`, `latex`, `beamer`, -    `html`, or `html5`, the output will be rendered as literate Haskell -    source: see [Literate Haskell support](#literate-haskell-support), below. -    Markdown syntax extensions can be individually enabled or disabled by -    appending `+EXTENSION` or `-EXTENSION` to the format name, as described +    `markdown` (pandoc's extended markdown), `markdown_strict` +    (original unextended markdown), `markdown_phpextra` (PHP Markdown +    extra extended markdown), `markdown_github` (github extended +    markdown), `commonmark` (CommonMark markdown), `rst` +    (reStructuredText), `html` (XHTML 1), `html5` (HTML 5), `latex` +    (LaTeX), `beamer` (LaTeX beamer slide show), `context` (ConTeXt), +    `man` (groff man), `mediawiki` (MediaWiki markup), `dokuwiki` +    (DokuWiki markup), `textile` (Textile), `org` (Emacs Org-Mode), +    `texinfo` (GNU Texinfo), `opml` (OPML), `docbook` (DocBook), +    `opendocument` (OpenDocument), `odt` (OpenOffice text document), +    `docx` (Word docx), `haddock` (Haddock markup), `rtf` (rich text +    format), `epub` (EPUB v2 book), `epub3` (EPUB v3), `fb2` +    (FictionBook2 e-book), `asciidoc` (AsciiDoc), `icml` (InDesign +    ICML), `slidy` (Slidy HTML and javascript slide show), `slideous` +    (Slideous HTML and javascript slide show), `dzslides` (DZSlides +    HTML5 + javascript slide show), `revealjs` (reveal.js HTML5 + +    javascript slide show), `s5` (S5 HTML and javascript slide show), +    or the path of a custom lua writer (see [Custom +    writers](#custom-writers), below). Note that `odt`, `epub`, and +    `epub3` output will not be directed to *stdout*; an output +    filename must be specified using the `-o/--output` option. If +    `+lhs` is appended to `markdown`, `rst`, `latex`, `beamer`, +    `html`, or `html5`, the output will be rendered as literate +    Haskell source: see [Literate Haskell +    support](#literate-haskell-support), below.  Markdown syntax +    extensions can be individually enabled or disabled by appending +    `+EXTENSION` or `-EXTENSION` to the format name, as described      above under `-f`.  `-o` *FILE*, `--output=`*FILE* diff --git a/pandoc.cabal b/pandoc.cabal index 7bbdd9f0f..9b1001ace 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -342,6 +342,7 @@ Library                     Text.Pandoc.Writers.Texinfo,                     Text.Pandoc.Writers.Man,                     Text.Pandoc.Writers.Markdown, +                   Text.Pandoc.Writers.CommonMark,                     Text.Pandoc.Writers.Haddock,                     Text.Pandoc.Writers.RST,                     Text.Pandoc.Writers.Org, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 3387a7d64..dd361f8d7 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -112,6 +112,7 @@ module Text.Pandoc                 , writeOrg                 , writeAsciiDoc                 , writeHaddock +               , writeCommonMark                 , writeCustom                 -- * Rendering templates and default templates                 , module Text.Pandoc.Templates @@ -165,6 +166,7 @@ import Text.Pandoc.Writers.Textile  import Text.Pandoc.Writers.Org  import Text.Pandoc.Writers.AsciiDoc  import Text.Pandoc.Writers.Haddock +import Text.Pandoc.Writers.CommonMark  import Text.Pandoc.Writers.Custom  import Text.Pandoc.Templates  import Text.Pandoc.Options @@ -305,6 +307,7 @@ writers = [    ,("org"          , PureStringWriter writeOrg)    ,("asciidoc"     , PureStringWriter writeAsciiDoc)    ,("haddock"      , PureStringWriter writeHaddock) +  ,("commonmark"   , PureStringWriter writeCommonMark)    ]  getDefaultExtensions :: String -> Set Extension diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs new file mode 100644 index 000000000..706b27175 --- /dev/null +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -0,0 +1,178 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA +-} + +{- | +   Module      : Text.Pandoc.Writers.CommonMark +   Copyright   : Copyright (C) 2015 John MacFarlane +   License     : GNU GPL, version 2 or above + +   Maintainer  : John MacFarlane <jgm@berkeley.edu> +   Stability   : alpha +   Portability : portable + +Conversion of 'Pandoc' documents to CommonMark. + +CommonMark:  <http://commonmark.org> +-} +module Text.Pandoc.Writers.CommonMark (writeCommonMark) where + +import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Definition +import Text.Pandoc.Shared (isTightList) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import CMark +import qualified Data.Text as T +import Control.Monad.Identity (runIdentity, Identity) +import Control.Monad.State (runState, State, modify, get) +import Text.Pandoc.Walk (walkM) + +-- | Convert Pandoc to CommonMark. +writeCommonMark :: WriterOptions -> Pandoc -> String +writeCommonMark opts (Pandoc meta blocks) = rendered +  where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes') +        (blocks', notes) = runState (walkM processNotes blocks) [] +        notes' = if null notes +                    then [] +                    else [OrderedList (1, Decimal, Period) $ reverse notes] +        metadata = runIdentity $ metaToJSON opts +                     (blocksToCommonMark opts) +                     (inlinesToCommonMark opts) +                     meta +        context = defField "body" main $ metadata +        rendered = if writerStandalone opts +                      then renderTemplate' (writerTemplate opts) context +                      else main + +processNotes :: Inline -> State [[Block]] Inline +processNotes (Note bs) = do +  modify (bs :) +  notes <- get +  return $ Str $ "[" ++ show (length notes) ++ "]" +processNotes x = return x + +node :: NodeType -> [Node] -> Node +node = Node Nothing + +blocksToCommonMark :: WriterOptions -> [Block] -> Identity String +blocksToCommonMark opts bs = return $ +  T.unpack $ nodeToCommonmark cmarkOpts colwidth +           $ node DOCUMENT (blocksToNodes bs) +   where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] +         colwidth = if writerWrapText opts +                       then writerColumns opts +                       else 0 + +inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String +inlinesToCommonMark opts ils = return $ +  T.unpack $ nodeToCommonmark cmarkOpts colwidth +           $ node PARAGRAPH (inlinesToNodes ils) +   where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] +         colwidth = if writerWrapText opts +                       then writerColumns opts +                       else 0 + +blocksToNodes :: [Block] -> [Node] +blocksToNodes = foldr blockToNodes [] + +blockToNodes :: Block -> [Node] -> [Node] +blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (CodeBlock (_,classes,_) xs) = +  (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :) +blockToNodes (RawBlock fmt xs) +  | fmt == Format "html" = (node (HTML (T.pack xs)) [] :) +  | otherwise = id +blockToNodes (BlockQuote bs) = +  (node BLOCK_QUOTE (blocksToNodes bs) :) +blockToNodes (BulletList items) = +  (node (LIST ListAttributes{ +               listType = BULLET_LIST, +               listDelim = PERIOD_DELIM, +               listTight = isTightList items, +               listStart = 1 }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes (OrderedList (start, _sty, delim) items) = +  (node (LIST ListAttributes{ +               listType = ORDERED_LIST, +               listDelim = case delim of +                                OneParen  -> PAREN_DELIM +                                TwoParens -> PAREN_DELIM +                                _         -> PERIOD_DELIM, +               listTight = isTightList items, +               listStart = start }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes HorizontalRule = (node HRULE [] :) +blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :) +blockToNodes (Div _ bs) = (blocksToNodes bs ++) +blockToNodes (DefinitionList items) = blockToNodes (BulletList items') +  where items' = map dlToBullet items +        dlToBullet (term, ((Para xs : ys) : zs))  = +          Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs +        dlToBullet (term, ((Plain xs : ys) : zs)) = +          Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs +        dlToBullet (term, xs) = +          Para term : concat xs +blockToNodes t@(Table _ _ _ _ _) = +  (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :) +blockToNodes Null = id + +inlinesToNodes :: [Inline] -> [Node] +inlinesToNodes  = foldr inlineToNodes [] + +inlineToNodes :: Inline -> [Node] -> [Node] +inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) +inlineToNodes Space   = (node (TEXT (T.pack " ")) [] :) +inlineToNodes LineBreak = (node LINEBREAK [] :) +inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) +inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) +inlineToNodes (Strikeout xs) = +  ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++ +   [node (INLINE_HTML (T.pack "</s>")) []]) ++ ) +inlineToNodes (Superscript xs) = +  ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++ +   [node (INLINE_HTML (T.pack "</sub>")) []]) ++ ) +inlineToNodes (Subscript xs) = +  ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++ +   [node (INLINE_HTML (T.pack "</sup>")) []]) ++ ) +inlineToNodes (SmallCaps xs) = +  ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) [] +    : inlinesToNodes xs ++ +    [node (INLINE_HTML (T.pack "</span>")) []]) ++ ) +inlineToNodes (Link ils (url,tit)) = +  (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (Image ils (url,tit)) = +  (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (RawInline fmt xs) +  | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :) +  | otherwise = id +inlineToNodes (Quoted qt ils) = +  ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) +  where (start, end) = case qt of +                          SingleQuote -> (T.pack "‘", T.pack "’") +                          DoubleQuote -> (T.pack "“", T.pack "”") +inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes (Math mt str) = +  case mt of +    InlineMath  -> +      (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :) +    DisplayMath -> +      (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Note _) = id -- should not occur +-- we remove Note elements in preprocessing | 
