aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs4
-rw-r--r--src/Text/Pandoc/Readers/GFM.hs185
2 files changed, 187 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 3c62f8db5..a0ea9f3e8 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -44,8 +44,8 @@ readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMark opts s = return $
nodeToPandoc $ commonmarkToNode opts' s
where opts' = if extensionEnabled Ext_smart (readerExtensions opts)
- then [optNormalize, optSmart]
- else [optNormalize]
+ then [optSmart]
+ else []
nodeToPandoc :: Node -> Pandoc
nodeToPandoc (Node _ DOCUMENT nodes) =
diff --git a/src/Text/Pandoc/Readers/GFM.hs b/src/Text/Pandoc/Readers/GFM.hs
new file mode 100644
index 000000000..1cdc47b98
--- /dev/null
+++ b/src/Text/Pandoc/Readers/GFM.hs
@@ -0,0 +1,185 @@
+{-
+Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.GFM
+ Copyright : Copyright (C) 2017 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of GitHub flavored CommonMark text to 'Pandoc' document.
+
+CommonMark is a strongly specified variant of Markdown: http://commonmark.org.
+-}
+module Text.Pandoc.Readers.GFM (readGFM)
+where
+
+import CMarkGFM
+import Data.List (groupBy)
+import Data.Text (Text, unpack)
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+
+-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
+readGFM :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readGFM opts s = return $
+ nodeToPandoc $ commonmarkToNode opts' exts s
+ where opts' = [optSmart | enabled Ext_smart]
+ exts = [extStrikethrough, extTable, extAutolink]
+ enabled x = extensionEnabled x (readerExtensions opts)
+
+nodeToPandoc :: Node -> Pandoc
+nodeToPandoc (Node _ DOCUMENT nodes) =
+ Pandoc nullMeta $ foldr addBlock [] nodes
+nodeToPandoc n = -- shouldn't happen
+ Pandoc nullMeta $ foldr addBlock [] [n]
+
+addBlocks :: [Node] -> [Block]
+addBlocks = foldr addBlock []
+
+addBlock :: Node -> [Block] -> [Block]
+addBlock (Node _ PARAGRAPH nodes) =
+ (Para (addInlines nodes) :)
+addBlock (Node _ THEMATIC_BREAK _) =
+ (HorizontalRule :)
+addBlock (Node _ BLOCK_QUOTE nodes) =
+ (BlockQuote (addBlocks nodes) :)
+addBlock (Node _ (HTML_BLOCK t) _) =
+ (RawBlock (Format "html") (unpack t) :)
+-- Note: the cmark parser will never generate CUSTOM_BLOCK,
+-- so we don't need to handle it:
+addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
+ id
+addBlock (Node _ (CODE_BLOCK info t) _) =
+ (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
+addBlock (Node _ (HEADING lev) nodes) =
+ (Header lev ("",[],[]) (addInlines nodes) :)
+addBlock (Node _ (LIST listAttrs) nodes) =
+ (constructor (map (setTightness . addBlocks . children) nodes) :)
+ where constructor = case listType listAttrs of
+ BULLET_LIST -> BulletList
+ ORDERED_LIST -> OrderedList
+ (start, DefaultStyle, delim)
+ start = listStart listAttrs
+ setTightness = if listTight listAttrs
+ then map paraToPlain
+ else id
+ paraToPlain (Para xs) = Plain (xs)
+ paraToPlain x = x
+ delim = case listDelim listAttrs of
+ PERIOD_DELIM -> Period
+ PAREN_DELIM -> OneParen
+addBlock (Node _ (TABLE alignments) nodes) = do
+ (Table [] aligns widths headers rows :)
+ where aligns = map fromTableCellAlignment alignments
+ fromTableCellAlignment NoAlignment = AlignDefault
+ fromTableCellAlignment LeftAligned = AlignLeft
+ fromTableCellAlignment RightAligned = AlignRight
+ fromTableCellAlignment CenterAligned = AlignCenter
+ widths = replicate numcols 0.0
+ numcols = if null rows'
+ then 0
+ else maximum $ map length rows'
+ rows' = map toRow $ filter isRow nodes
+ (headers, rows) = case rows' of
+ (h:rs) -> (h, rs)
+ [] -> ([], [])
+ isRow (Node _ TABLE_ROW _) = True
+ isRow _ = False
+ isCell (Node _ TABLE_CELL _) = True
+ isCell _ = False
+ toRow (Node _ TABLE_ROW ns) = map toCell $ filter isCell ns
+ toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t
+ toCell (Node _ TABLE_CELL []) = []
+ toCell (Node _ TABLE_CELL (n:ns))
+ | isBlockNode n = addBlocks (n:ns)
+ | otherwise = [Plain (addInlines (n:ns))]
+ toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t
+addBlock (Node _ TABLE_ROW _) = id -- handled in TABLE
+addBlock (Node _ TABLE_CELL _) = id -- handled in TABLE
+addBlock _ = id
+
+isBlockNode :: Node -> Bool
+isBlockNode (Node _ nodetype _) =
+ case nodetype of
+ DOCUMENT -> True
+ THEMATIC_BREAK -> True
+ PARAGRAPH -> True
+ BLOCK_QUOTE -> True
+ HTML_BLOCK _ -> True
+ CUSTOM_BLOCK _ _ -> True
+ CODE_BLOCK _ _ -> True
+ HEADING _ -> True
+ LIST _ -> True
+ ITEM -> True
+ TEXT _ -> False
+ SOFTBREAK -> False
+ LINEBREAK -> False
+ HTML_INLINE _ -> False
+ CUSTOM_INLINE _ _ -> False
+ CODE _ -> False
+ EMPH -> False
+ STRONG -> False
+ LINK _ _ -> False
+ IMAGE _ _ -> False
+ STRIKETHROUGH -> False
+ TABLE _ -> False
+ TABLE_ROW -> False
+ TABLE_CELL -> False
+
+children :: Node -> [Node]
+children (Node _ _ ns) = ns
+
+addInlines :: [Node] -> [Inline]
+addInlines = foldr addInline []
+
+addInline :: Node -> [Inline] -> [Inline]
+addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
+ where raw = unpack t
+ clumps = groupBy samekind raw
+ samekind ' ' ' ' = True
+ samekind ' ' _ = False
+ samekind _ ' ' = False
+ samekind _ _ = True
+ toinl (' ':_) = Space
+ toinl xs = Str xs
+addInline (Node _ LINEBREAK _) = (LineBreak :)
+addInline (Node _ SOFTBREAK _) = (SoftBreak :)
+addInline (Node _ (HTML_INLINE t) _) =
+ (RawInline (Format "html") (unpack t) :)
+-- Note: the cmark parser will never generate CUSTOM_BLOCK,
+-- so we don't need to handle it:
+addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
+ id
+addInline (Node _ (CODE t) _) =
+ (Code ("",[],[]) (unpack t) :)
+addInline (Node _ EMPH nodes) =
+ (Emph (addInlines nodes) :)
+addInline (Node _ STRONG nodes) =
+ (Strong (addInlines nodes) :)
+addInline (Node _ STRIKETHROUGH nodes) =
+ (Strikeout (addInlines nodes) :)
+addInline (Node _ (LINK url title) nodes) =
+ (Link nullAttr (addInlines nodes) (unpack url, unpack title) :)
+addInline (Node _ (IMAGE url title) nodes) =
+ (Image nullAttr (addInlines nodes) (unpack url, unpack title) :)
+addInline _ = id