diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-08-07 23:10:17 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-08-07 23:11:14 -0700 |
commit | 91c989d6221991a47b1f0a9d180ccf3ce96b3f02 (patch) | |
tree | 0ae78b62dee006d93821ac909eaca4c12159c302 /src/Text/Pandoc/Readers | |
parent | 6a9db1fde3b592a843b5fa6ce843a46cb3163968 (diff) | |
download | pandoc-91c989d6221991a47b1f0a9d180ccf3ce96b3f02.tar.gz |
Remove GFM modules; use CMarkGFM for both gfm and commonmark.
We no longer have a separate readGFM and writeGFM;
instead, we'll use readCommonMark and writeCommonMark
with githubExtensions.
It remains to implement these extensions conditionally.
Closes #3841.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/CommonMark.hs | 69 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/GFM.hs | 185 |
2 files changed, 63 insertions, 191 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index a0ea9f3e8..9a67c8597 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -32,7 +32,7 @@ CommonMark is a strongly specified variant of Markdown: http://commonmark.org. module Text.Pandoc.Readers.CommonMark (readCommonMark) where -import CMark +import CMarkGFM import Data.List (groupBy) import Data.Text (Text, unpack) import Text.Pandoc.Class (PandocMonad) @@ -42,10 +42,10 @@ import Text.Pandoc.Options -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ - nodeToPandoc $ commonmarkToNode opts' s - where opts' = if extensionEnabled Ext_smart (readerExtensions opts) - then [optSmart] - else [] + nodeToPandoc $ commonmarkToNode opts' exts s + where opts' = [optSmart | enabled Ext_smart] + exts = [extStrikethrough, extTable, extAutolink] + enabled x = extensionEnabled x (readerExtensions opts) nodeToPandoc :: Node -> Pandoc nodeToPandoc (Node _ DOCUMENT nodes) = @@ -88,9 +88,64 @@ addBlock (Node _ (LIST listAttrs) nodes) = delim = case listDelim listAttrs of PERIOD_DELIM -> Period PAREN_DELIM -> OneParen -addBlock (Node _ ITEM _) = id -- handled in LIST +addBlock (Node _ (TABLE alignments) nodes) = do + (Table [] aligns widths headers rows :) + where aligns = map fromTableCellAlignment alignments + fromTableCellAlignment NoAlignment = AlignDefault + fromTableCellAlignment LeftAligned = AlignLeft + fromTableCellAlignment RightAligned = AlignRight + fromTableCellAlignment CenterAligned = AlignCenter + widths = replicate numcols 0.0 + numcols = if null rows' + then 0 + else maximum $ map length rows' + rows' = map toRow $ filter isRow nodes + (headers, rows) = case rows' of + (h:rs) -> (h, rs) + [] -> ([], []) + isRow (Node _ TABLE_ROW _) = True + isRow _ = False + isCell (Node _ TABLE_CELL _) = True + isCell _ = False + toRow (Node _ TABLE_ROW ns) = map toCell $ filter isCell ns + toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t + toCell (Node _ TABLE_CELL []) = [] + toCell (Node _ TABLE_CELL (n:ns)) + | isBlockNode n = addBlocks (n:ns) + | otherwise = [Plain (addInlines (n:ns))] + toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t +addBlock (Node _ TABLE_ROW _) = id -- handled in TABLE +addBlock (Node _ TABLE_CELL _) = id -- handled in TABLE addBlock _ = id +isBlockNode :: Node -> Bool +isBlockNode (Node _ nodetype _) = + case nodetype of + DOCUMENT -> True + THEMATIC_BREAK -> True + PARAGRAPH -> True + BLOCK_QUOTE -> True + HTML_BLOCK _ -> True + CUSTOM_BLOCK _ _ -> True + CODE_BLOCK _ _ -> True + HEADING _ -> True + LIST _ -> True + ITEM -> True + TEXT _ -> False + SOFTBREAK -> False + LINEBREAK -> False + HTML_INLINE _ -> False + CUSTOM_INLINE _ _ -> False + CODE _ -> False + EMPH -> False + STRONG -> False + LINK _ _ -> False + IMAGE _ _ -> False + STRIKETHROUGH -> False + TABLE _ -> False + TABLE_ROW -> False + TABLE_CELL -> False + children :: Node -> [Node] children (Node _ _ ns) = ns @@ -121,6 +176,8 @@ addInline (Node _ EMPH nodes) = (Emph (addInlines nodes) :) addInline (Node _ STRONG nodes) = (Strong (addInlines nodes) :) +addInline (Node _ STRIKETHROUGH nodes) = + (Strikeout (addInlines nodes) :) addInline (Node _ (LINK url title) nodes) = (Link nullAttr (addInlines nodes) (unpack url, unpack title) :) addInline (Node _ (IMAGE url title) nodes) = diff --git a/src/Text/Pandoc/Readers/GFM.hs b/src/Text/Pandoc/Readers/GFM.hs deleted file mode 100644 index 1cdc47b98..000000000 --- a/src/Text/Pandoc/Readers/GFM.hs +++ /dev/null @@ -1,185 +0,0 @@ -{- -Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.GFM - Copyright : Copyright (C) 2017 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of GitHub flavored CommonMark text to 'Pandoc' document. - -CommonMark is a strongly specified variant of Markdown: http://commonmark.org. --} -module Text.Pandoc.Readers.GFM (readGFM) -where - -import CMarkGFM -import Data.List (groupBy) -import Data.Text (Text, unpack) -import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Definition -import Text.Pandoc.Options - --- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readGFM :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readGFM opts s = return $ - nodeToPandoc $ commonmarkToNode opts' exts s - where opts' = [optSmart | enabled Ext_smart] - exts = [extStrikethrough, extTable, extAutolink] - enabled x = extensionEnabled x (readerExtensions opts) - -nodeToPandoc :: Node -> Pandoc -nodeToPandoc (Node _ DOCUMENT nodes) = - Pandoc nullMeta $ foldr addBlock [] nodes -nodeToPandoc n = -- shouldn't happen - Pandoc nullMeta $ foldr addBlock [] [n] - -addBlocks :: [Node] -> [Block] -addBlocks = foldr addBlock [] - -addBlock :: Node -> [Block] -> [Block] -addBlock (Node _ PARAGRAPH nodes) = - (Para (addInlines nodes) :) -addBlock (Node _ THEMATIC_BREAK _) = - (HorizontalRule :) -addBlock (Node _ BLOCK_QUOTE nodes) = - (BlockQuote (addBlocks nodes) :) -addBlock (Node _ (HTML_BLOCK t) _) = - (RawBlock (Format "html") (unpack t) :) --- Note: the cmark parser will never generate CUSTOM_BLOCK, --- so we don't need to handle it: -addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = - id -addBlock (Node _ (CODE_BLOCK info t) _) = - (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) -addBlock (Node _ (HEADING lev) nodes) = - (Header lev ("",[],[]) (addInlines nodes) :) -addBlock (Node _ (LIST listAttrs) nodes) = - (constructor (map (setTightness . addBlocks . children) nodes) :) - where constructor = case listType listAttrs of - BULLET_LIST -> BulletList - ORDERED_LIST -> OrderedList - (start, DefaultStyle, delim) - start = listStart listAttrs - setTightness = if listTight listAttrs - then map paraToPlain - else id - paraToPlain (Para xs) = Plain (xs) - paraToPlain x = x - delim = case listDelim listAttrs of - PERIOD_DELIM -> Period - PAREN_DELIM -> OneParen -addBlock (Node _ (TABLE alignments) nodes) = do - (Table [] aligns widths headers rows :) - where aligns = map fromTableCellAlignment alignments - fromTableCellAlignment NoAlignment = AlignDefault - fromTableCellAlignment LeftAligned = AlignLeft - fromTableCellAlignment RightAligned = AlignRight - fromTableCellAlignment CenterAligned = AlignCenter - widths = replicate numcols 0.0 - numcols = if null rows' - then 0 - else maximum $ map length rows' - rows' = map toRow $ filter isRow nodes - (headers, rows) = case rows' of - (h:rs) -> (h, rs) - [] -> ([], []) - isRow (Node _ TABLE_ROW _) = True - isRow _ = False - isCell (Node _ TABLE_CELL _) = True - isCell _ = False - toRow (Node _ TABLE_ROW ns) = map toCell $ filter isCell ns - toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t - toCell (Node _ TABLE_CELL []) = [] - toCell (Node _ TABLE_CELL (n:ns)) - | isBlockNode n = addBlocks (n:ns) - | otherwise = [Plain (addInlines (n:ns))] - toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t -addBlock (Node _ TABLE_ROW _) = id -- handled in TABLE -addBlock (Node _ TABLE_CELL _) = id -- handled in TABLE -addBlock _ = id - -isBlockNode :: Node -> Bool -isBlockNode (Node _ nodetype _) = - case nodetype of - DOCUMENT -> True - THEMATIC_BREAK -> True - PARAGRAPH -> True - BLOCK_QUOTE -> True - HTML_BLOCK _ -> True - CUSTOM_BLOCK _ _ -> True - CODE_BLOCK _ _ -> True - HEADING _ -> True - LIST _ -> True - ITEM -> True - TEXT _ -> False - SOFTBREAK -> False - LINEBREAK -> False - HTML_INLINE _ -> False - CUSTOM_INLINE _ _ -> False - CODE _ -> False - EMPH -> False - STRONG -> False - LINK _ _ -> False - IMAGE _ _ -> False - STRIKETHROUGH -> False - TABLE _ -> False - TABLE_ROW -> False - TABLE_CELL -> False - -children :: Node -> [Node] -children (Node _ _ ns) = ns - -addInlines :: [Node] -> [Inline] -addInlines = foldr addInline [] - -addInline :: Node -> [Inline] -> [Inline] -addInline (Node _ (TEXT t) _) = (map toinl clumps ++) - where raw = unpack t - clumps = groupBy samekind raw - samekind ' ' ' ' = True - samekind ' ' _ = False - samekind _ ' ' = False - samekind _ _ = True - toinl (' ':_) = Space - toinl xs = Str xs -addInline (Node _ LINEBREAK _) = (LineBreak :) -addInline (Node _ SOFTBREAK _) = (SoftBreak :) -addInline (Node _ (HTML_INLINE t) _) = - (RawInline (Format "html") (unpack t) :) --- Note: the cmark parser will never generate CUSTOM_BLOCK, --- so we don't need to handle it: -addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = - id -addInline (Node _ (CODE t) _) = - (Code ("",[],[]) (unpack t) :) -addInline (Node _ EMPH nodes) = - (Emph (addInlines nodes) :) -addInline (Node _ STRONG nodes) = - (Strong (addInlines nodes) :) -addInline (Node _ STRIKETHROUGH nodes) = - (Strikeout (addInlines nodes) :) -addInline (Node _ (LINK url title) nodes) = - (Link nullAttr (addInlines nodes) (unpack url, unpack title) :) -addInline (Node _ (IMAGE url title) nodes) = - (Image nullAttr (addInlines nodes) (unpack url, unpack title) :) -addInline _ = id |