diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Readers.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/CommonMark.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/GFM.hs | 185 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/GFM.hs | 189 |
5 files changed, 382 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 78a2038a4..996412d48 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -45,6 +45,7 @@ module Text.Pandoc.Readers , readOdt , readMarkdown , readCommonMark + , readGFM , readMediaWiki , readVimwiki , readRST @@ -76,6 +77,7 @@ import Text.Pandoc.Error import Text.Pandoc.Extensions import Text.Pandoc.Options import Text.Pandoc.Readers.CommonMark +import Text.Pandoc.Readers.GFM import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.Docx import Text.Pandoc.Readers.EPUB @@ -117,6 +119,7 @@ readers = [ ("native" , TextReader readNative) ,("markdown_github" , TextReader readMarkdown) ,("markdown_mmd", TextReader readMarkdown) ,("commonmark" , TextReader readCommonMark) + ,("gfm" , TextReader readGFM) ,("rst" , TextReader readRST) ,("mediawiki" , TextReader readMediaWiki) ,("vimwiki" , TextReader readVimwiki) diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 3c62f8db5..a0ea9f3e8 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -44,8 +44,8 @@ readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ nodeToPandoc $ commonmarkToNode opts' s where opts' = if extensionEnabled Ext_smart (readerExtensions opts) - then [optNormalize, optSmart] - else [optNormalize] + then [optSmart] + else [] nodeToPandoc :: Node -> Pandoc nodeToPandoc (Node _ DOCUMENT nodes) = diff --git a/src/Text/Pandoc/Readers/GFM.hs b/src/Text/Pandoc/Readers/GFM.hs new file mode 100644 index 000000000..1cdc47b98 --- /dev/null +++ b/src/Text/Pandoc/Readers/GFM.hs @@ -0,0 +1,185 @@ +{- +Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.GFM + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of GitHub flavored CommonMark text to 'Pandoc' document. + +CommonMark is a strongly specified variant of Markdown: http://commonmark.org. +-} +module Text.Pandoc.Readers.GFM (readGFM) +where + +import CMarkGFM +import Data.List (groupBy) +import Data.Text (Text, unpack) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Options + +-- | Parse a CommonMark formatted string into a 'Pandoc' structure. +readGFM :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readGFM opts s = return $ + nodeToPandoc $ commonmarkToNode opts' exts s + where opts' = [optSmart | enabled Ext_smart] + exts = [extStrikethrough, extTable, extAutolink] + enabled x = extensionEnabled x (readerExtensions opts) + +nodeToPandoc :: Node -> Pandoc +nodeToPandoc (Node _ DOCUMENT nodes) = + Pandoc nullMeta $ foldr addBlock [] nodes +nodeToPandoc n = -- shouldn't happen + Pandoc nullMeta $ foldr addBlock [] [n] + +addBlocks :: [Node] -> [Block] +addBlocks = foldr addBlock [] + +addBlock :: Node -> [Block] -> [Block] +addBlock (Node _ PARAGRAPH nodes) = + (Para (addInlines nodes) :) +addBlock (Node _ THEMATIC_BREAK _) = + (HorizontalRule :) +addBlock (Node _ BLOCK_QUOTE nodes) = + (BlockQuote (addBlocks nodes) :) +addBlock (Node _ (HTML_BLOCK t) _) = + (RawBlock (Format "html") (unpack t) :) +-- Note: the cmark parser will never generate CUSTOM_BLOCK, +-- so we don't need to handle it: +addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = + id +addBlock (Node _ (CODE_BLOCK info t) _) = + (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) +addBlock (Node _ (HEADING lev) nodes) = + (Header lev ("",[],[]) (addInlines nodes) :) +addBlock (Node _ (LIST listAttrs) nodes) = + (constructor (map (setTightness . addBlocks . children) nodes) :) + where constructor = case listType listAttrs of + BULLET_LIST -> BulletList + ORDERED_LIST -> OrderedList + (start, DefaultStyle, delim) + start = listStart listAttrs + setTightness = if listTight listAttrs + then map paraToPlain + else id + paraToPlain (Para xs) = Plain (xs) + paraToPlain x = x + delim = case listDelim listAttrs of + PERIOD_DELIM -> Period + PAREN_DELIM -> OneParen +addBlock (Node _ (TABLE alignments) nodes) = do + (Table [] aligns widths headers rows :) + where aligns = map fromTableCellAlignment alignments + fromTableCellAlignment NoAlignment = AlignDefault + fromTableCellAlignment LeftAligned = AlignLeft + fromTableCellAlignment RightAligned = AlignRight + fromTableCellAlignment CenterAligned = AlignCenter + widths = replicate numcols 0.0 + numcols = if null rows' + then 0 + else maximum $ map length rows' + rows' = map toRow $ filter isRow nodes + (headers, rows) = case rows' of + (h:rs) -> (h, rs) + [] -> ([], []) + isRow (Node _ TABLE_ROW _) = True + isRow _ = False + isCell (Node _ TABLE_CELL _) = True + isCell _ = False + toRow (Node _ TABLE_ROW ns) = map toCell $ filter isCell ns + toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t + toCell (Node _ TABLE_CELL []) = [] + toCell (Node _ TABLE_CELL (n:ns)) + | isBlockNode n = addBlocks (n:ns) + | otherwise = [Plain (addInlines (n:ns))] + toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t +addBlock (Node _ TABLE_ROW _) = id -- handled in TABLE +addBlock (Node _ TABLE_CELL _) = id -- handled in TABLE +addBlock _ = id + +isBlockNode :: Node -> Bool +isBlockNode (Node _ nodetype _) = + case nodetype of + DOCUMENT -> True + THEMATIC_BREAK -> True + PARAGRAPH -> True + BLOCK_QUOTE -> True + HTML_BLOCK _ -> True + CUSTOM_BLOCK _ _ -> True + CODE_BLOCK _ _ -> True + HEADING _ -> True + LIST _ -> True + ITEM -> True + TEXT _ -> False + SOFTBREAK -> False + LINEBREAK -> False + HTML_INLINE _ -> False + CUSTOM_INLINE _ _ -> False + CODE _ -> False + EMPH -> False + STRONG -> False + LINK _ _ -> False + IMAGE _ _ -> False + STRIKETHROUGH -> False + TABLE _ -> False + TABLE_ROW -> False + TABLE_CELL -> False + +children :: Node -> [Node] +children (Node _ _ ns) = ns + +addInlines :: [Node] -> [Inline] +addInlines = foldr addInline [] + +addInline :: Node -> [Inline] -> [Inline] +addInline (Node _ (TEXT t) _) = (map toinl clumps ++) + where raw = unpack t + clumps = groupBy samekind raw + samekind ' ' ' ' = True + samekind ' ' _ = False + samekind _ ' ' = False + samekind _ _ = True + toinl (' ':_) = Space + toinl xs = Str xs +addInline (Node _ LINEBREAK _) = (LineBreak :) +addInline (Node _ SOFTBREAK _) = (SoftBreak :) +addInline (Node _ (HTML_INLINE t) _) = + (RawInline (Format "html") (unpack t) :) +-- Note: the cmark parser will never generate CUSTOM_BLOCK, +-- so we don't need to handle it: +addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = + id +addInline (Node _ (CODE t) _) = + (Code ("",[],[]) (unpack t) :) +addInline (Node _ EMPH nodes) = + (Emph (addInlines nodes) :) +addInline (Node _ STRONG nodes) = + (Strong (addInlines nodes) :) +addInline (Node _ STRIKETHROUGH nodes) = + (Strikeout (addInlines nodes) :) +addInline (Node _ (LINK url title) nodes) = + (Link nullAttr (addInlines nodes) (unpack url, unpack title) :) +addInline (Node _ (IMAGE url title) nodes) = + (Image nullAttr (addInlines nodes) (unpack url, unpack title) :) +addInline _ = id diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index 6dfc1a7b3..c10d9149b 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -38,6 +38,7 @@ module Text.Pandoc.Writers , writeAsciiDoc , writeBeamer , writeCommonMark + , writeGFM , writeConTeXt , writeCustom , writeDZSlides @@ -89,6 +90,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Writers.CommonMark +import Text.Pandoc.Writers.GFM import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.Custom import Text.Pandoc.Writers.Docbook @@ -172,6 +174,7 @@ writers = [ ,("asciidoc" , TextWriter writeAsciiDoc) ,("haddock" , TextWriter writeHaddock) ,("commonmark" , TextWriter writeCommonMark) + ,("gfm" , TextWriter writeGFM) ,("tei" , TextWriter writeTEI) ,("muse" , TextWriter writeMuse) ] diff --git a/src/Text/Pandoc/Writers/GFM.hs b/src/Text/Pandoc/Writers/GFM.hs new file mode 100644 index 000000000..d9806e2fa --- /dev/null +++ b/src/Text/Pandoc/Writers/GFM.hs @@ -0,0 +1,189 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.GFM + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to CommonMark. + +CommonMark: <http://commonmark.org> +-} +module Text.Pandoc.Writers.GFM (writeGFM) where + +import CMarkGFM +import Control.Monad.State.Strict (State, get, modify, runState) +import Data.Foldable (foldrM) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Shared (isTightList, linesToPara) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Walk (walkM) +import Text.Pandoc.Writers.HTML (writeHtml5String) +import Text.Pandoc.Writers.Shared + +-- | Convert Pandoc to GitHub flavored CommonMark. +writeGFM :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeGFM opts (Pandoc meta blocks) = do + let (blocks', notes) = runState (walkM processNotes blocks) [] + notes' = if null notes + then [] + else [OrderedList (1, Decimal, Period) $ reverse notes] + main <- blocksToCommonMark opts (blocks' ++ notes') + metadata <- metaToJSON opts + (blocksToCommonMark opts) + (inlinesToCommonMark opts) + meta + let context = defField "body" main $ metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context + +processNotes :: Inline -> State [[Block]] Inline +processNotes (Note bs) = do + modify (bs :) + notes <- get + return $ Str $ "[" ++ show (length notes) ++ "]" +processNotes x = return x + +node :: NodeType -> [Node] -> Node +node = Node Nothing + +blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m Text +blocksToCommonMark opts bs = do + let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + nodes <- blocksToNodes bs + return $ + nodeToCommonmark cmarkOpts colwidth $ + node DOCUMENT nodes + +inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m Text +inlinesToCommonMark opts ils = return $ + nodeToCommonmark cmarkOpts colwidth $ node PARAGRAPH (inlinesToNodes ils) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + +blocksToNodes :: PandocMonad m => [Block] -> m [Node] +blocksToNodes = foldrM blockToNodes [] + +blockToNodes :: PandocMonad m => Block -> [Node] -> m [Node] +blockToNodes (Plain xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns) +blockToNodes (Para xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns) +blockToNodes (LineBlock lns) ns = blockToNodes (linesToPara lns) ns +blockToNodes (CodeBlock (_,classes,_) xs) ns = return $ + (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns) +blockToNodes (RawBlock fmt xs) ns + | fmt == Format "html" = return (node (HTML_BLOCK (T.pack xs)) [] : ns) + | otherwise = return (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] : ns) +blockToNodes (BlockQuote bs) ns = do + nodes <- blocksToNodes bs + return (node BLOCK_QUOTE nodes : ns) +blockToNodes (BulletList items) ns = do + nodes <- mapM blocksToNodes items + return (node (LIST ListAttributes{ + listType = BULLET_LIST, + listDelim = PERIOD_DELIM, + listTight = isTightList items, + listStart = 1 }) (map (node ITEM) nodes) : ns) +blockToNodes (OrderedList (start, _sty, delim) items) ns = do + nodes <- mapM blocksToNodes items + return (node (LIST ListAttributes{ + listType = ORDERED_LIST, + listDelim = case delim of + OneParen -> PAREN_DELIM + TwoParens -> PAREN_DELIM + _ -> PERIOD_DELIM, + listTight = isTightList items, + listStart = start }) (map (node ITEM) nodes) : ns) +blockToNodes HorizontalRule ns = return (node THEMATIC_BREAK [] : ns) +blockToNodes (Header lev _ ils) ns = return (node (HEADING lev) (inlinesToNodes ils) : ns) +blockToNodes (Div _ bs) ns = do + nodes <- blocksToNodes bs + return (nodes ++ ns) +blockToNodes (DefinitionList items) ns = blockToNodes (BulletList items') ns + where items' = map dlToBullet items + dlToBullet (term, ((Para xs : ys) : zs)) = + Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, ((Plain xs : ys) : zs)) = + Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, xs) = + Para term : concat xs +blockToNodes t@(Table _ _ _ _ _) ns = do + s <- writeHtml5String def $! Pandoc nullMeta [t] + return (node (HTML_BLOCK s) [] : ns) +blockToNodes Null ns = return ns + +inlinesToNodes :: [Inline] -> [Node] +inlinesToNodes = foldr inlineToNodes [] + +inlineToNodes :: Inline -> [Node] -> [Node] +inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) +inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) +inlineToNodes LineBreak = (node LINEBREAK [] :) +inlineToNodes SoftBreak = (node SOFTBREAK [] :) +inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) +inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) +inlineToNodes (Strikeout xs) = + ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes xs ++ + [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) +inlineToNodes (Superscript xs) = + ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes xs ++ + [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) +inlineToNodes (Subscript xs) = + ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes xs ++ + [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) +inlineToNodes (SmallCaps xs) = + ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] + : inlinesToNodes xs ++ + [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) +inlineToNodes (Link _ ils (url,tit)) = + (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (Image _ ils (url,tit)) = + (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (RawInline fmt xs) + | fmt == Format "html" = (node (HTML_INLINE (T.pack xs)) [] :) + | otherwise = (node (CUSTOM_INLINE (T.pack xs) (T.empty)) [] :) +inlineToNodes (Quoted qt ils) = + ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) + where (start, end) = case qt of + SingleQuote -> (T.pack "‘", T.pack "’") + DoubleQuote -> (T.pack "“", T.pack "”") +inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes (Math mt str) = + case mt of + InlineMath -> + (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + DisplayMath -> + (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Note _) = id -- should not occur +-- we remove Note elements in preprocessing |