From 3752298d917f101ac0279b7fc057c38d765f1770 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 8 Aug 2017 13:55:19 -0700 Subject: Thread options through CommonMark reader. This is more efficient than doing AST traversals for emojis and hard breaks. Also make behavior sensitive to `raw_html` extension. --- src/Text/Pandoc/Readers/CommonMark.hs | 158 +++++++++++++++++----------------- 1 file changed, 77 insertions(+), 81 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 14be946e6..2dba18c9f 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -43,44 +43,34 @@ import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) import Text.Pandoc.Options import Text.Pandoc.Shared (stringify) -import Text.Pandoc.Walk (walkM, walk) +import Text.Pandoc.Walk (walkM) -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ - (if enabled Ext_gfm_auto_identifiers + (if enabled Ext_gfm_auto_identifiers opts then addHeaderIdentifiers else id) $ - (if enabled Ext_emoji - then addEmojis - else id) $ - (if enabled Ext_hard_line_breaks - then walk softToHardBreaks - else id) $ - nodeToPandoc $ commonmarkToNode opts' exts s - where opts' = [ optSmart | enabled Ext_smart ] - exts = [ extStrikethrough | enabled Ext_strikeout ] ++ - [ extTable | enabled Ext_pipe_tables ] ++ - [ extAutolink | enabled Ext_autolink_bare_uris ] - enabled x = extensionEnabled x (readerExtensions opts) - -softToHardBreaks :: Inline -> Inline -softToHardBreaks SoftBreak = LineBreak -softToHardBreaks x = x - -addEmojis :: Pandoc -> Pandoc -addEmojis = walk go - where go (Str xs) = Str (convertEmojis xs) - go x = x - convertEmojis (':':xs) = - case break (==':') xs of - (ys,':':zs) -> - case Map.lookup ys emojis of - Just s -> s ++ convertEmojis zs - Nothing -> ':' : ys ++ convertEmojis (':':zs) - _ -> ':':xs - convertEmojis (x:xs) = x : convertEmojis xs - convertEmojis [] = [] + nodeToPandoc opts $ commonmarkToNode opts' exts s + where opts' = [ optSmart | enabled Ext_smart opts ] + exts = [ extStrikethrough | enabled Ext_strikeout opts ] ++ + [ extTable | enabled Ext_pipe_tables opts ] ++ + [ extAutolink | enabled Ext_autolink_bare_uris opts ] + +-- | Returns True if the given extension is enabled. +enabled :: Extension -> ReaderOptions -> Bool +enabled ext opts = ext `extensionEnabled` (readerExtensions opts) + +convertEmojis :: String -> String +convertEmojis (':':xs) = + case break (==':') xs of + (ys,':':zs) -> + case Map.lookup ys emojis of + Just s -> s ++ convertEmojis zs + Nothing -> ':' : ys ++ convertEmojis (':':zs) + _ -> ':':xs +convertEmojis (x:xs) = x : convertEmojis xs +convertEmojis [] = [] addHeaderIdentifiers :: Pandoc -> Pandoc addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty @@ -105,34 +95,35 @@ toIdent = map (\c -> if isSpace c then '-' else c) c == '_' || c == '-') . map toLower . stringify -nodeToPandoc :: Node -> Pandoc -nodeToPandoc (Node _ DOCUMENT nodes) = - Pandoc nullMeta $ foldr addBlock [] nodes -nodeToPandoc n = -- shouldn't happen - Pandoc nullMeta $ foldr addBlock [] [n] +nodeToPandoc :: ReaderOptions -> Node -> Pandoc +nodeToPandoc opts (Node _ DOCUMENT nodes) = + Pandoc nullMeta $ foldr (addBlock opts) [] nodes +nodeToPandoc opts n = -- shouldn't happen + Pandoc nullMeta $ foldr (addBlock opts) [] [n] -addBlocks :: [Node] -> [Block] -addBlocks = foldr addBlock [] +addBlocks :: ReaderOptions -> [Node] -> [Block] +addBlocks opts = foldr (addBlock opts) [] -addBlock :: Node -> [Block] -> [Block] -addBlock (Node _ PARAGRAPH nodes) = - (Para (addInlines nodes) :) -addBlock (Node _ THEMATIC_BREAK _) = +addBlock :: ReaderOptions -> Node -> [Block] -> [Block] +addBlock opts (Node _ PARAGRAPH nodes) = + (Para (addInlines opts nodes) :) +addBlock _ (Node _ THEMATIC_BREAK _) = (HorizontalRule :) -addBlock (Node _ BLOCK_QUOTE nodes) = - (BlockQuote (addBlocks nodes) :) -addBlock (Node _ (HTML_BLOCK t) _) = - (RawBlock (Format "html") (unpack t) :) +addBlock opts (Node _ BLOCK_QUOTE nodes) = + (BlockQuote (addBlocks opts nodes) :) +addBlock opts (Node _ (HTML_BLOCK t) _) + | enabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :) + | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: -addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = +addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = id -addBlock (Node _ (CODE_BLOCK info t) _) = +addBlock _ (Node _ (CODE_BLOCK info t) _) = (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) -addBlock (Node _ (HEADING lev) nodes) = - (Header lev ("",[],[]) (addInlines nodes) :) -addBlock (Node _ (LIST listAttrs) nodes) = - (constructor (map (setTightness . addBlocks . children) nodes) :) +addBlock opts (Node _ (HEADING lev) nodes) = + (Header lev ("",[],[]) (addInlines opts nodes) :) +addBlock opts (Node _ (LIST listAttrs) nodes) = + (constructor (map (setTightness . addBlocks opts . children) nodes) :) where constructor = case listType listAttrs of BULLET_LIST -> BulletList ORDERED_LIST -> OrderedList @@ -146,7 +137,7 @@ addBlock (Node _ (LIST listAttrs) nodes) = delim = case listDelim listAttrs of PERIOD_DELIM -> Period PAREN_DELIM -> OneParen -addBlock (Node _ (TABLE alignments) nodes) = do +addBlock opts (Node _ (TABLE alignments) nodes) = do (Table [] aligns widths headers rows :) where aligns = map fromTableCellAlignment alignments fromTableCellAlignment NoAlignment = AlignDefault @@ -169,12 +160,12 @@ addBlock (Node _ (TABLE alignments) nodes) = do toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t toCell (Node _ TABLE_CELL []) = [] toCell (Node _ TABLE_CELL (n:ns)) - | isBlockNode n = addBlocks (n:ns) - | otherwise = [Plain (addInlines (n:ns))] + | isBlockNode n = addBlocks opts (n:ns) + | otherwise = [Plain (addInlines opts (n:ns))] toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t -addBlock (Node _ TABLE_ROW _) = id -- handled in TABLE -addBlock (Node _ TABLE_CELL _) = id -- handled in TABLE -addBlock _ = id +addBlock _ (Node _ TABLE_ROW _) = id -- handled in TABLE +addBlock _ (Node _ TABLE_CELL _) = id -- handled in TABLE +addBlock _ _ = id isBlockNode :: Node -> Bool isBlockNode (Node _ nodetype _) = @@ -207,11 +198,11 @@ isBlockNode (Node _ nodetype _) = children :: Node -> [Node] children (Node _ _ ns) = ns -addInlines :: [Node] -> [Inline] -addInlines = foldr addInline [] +addInlines :: ReaderOptions -> [Node] -> [Inline] +addInlines opts = foldr (addInline opts) [] -addInline :: Node -> [Inline] -> [Inline] -addInline (Node _ (TEXT t) _) = (map toinl clumps ++) +addInline :: ReaderOptions -> Node -> [Inline] -> [Inline] +addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++) where raw = unpack t clumps = groupBy samekind raw samekind ' ' ' ' = True @@ -219,25 +210,30 @@ addInline (Node _ (TEXT t) _) = (map toinl clumps ++) samekind _ ' ' = False samekind _ _ = True toinl (' ':_) = Space - toinl xs = Str xs -addInline (Node _ LINEBREAK _) = (LineBreak :) -addInline (Node _ SOFTBREAK _) = (SoftBreak :) -addInline (Node _ (HTML_INLINE t) _) = - (RawInline (Format "html") (unpack t) :) + toinl xs = Str $ if enabled Ext_emoji opts + then convertEmojis xs + else xs +addInline _ (Node _ LINEBREAK _) = (LineBreak :) +addInline opts (Node _ SOFTBREAK _) + | enabled Ext_hard_line_breaks opts = (LineBreak :) + | otherwise = (SoftBreak :) +addInline opts (Node _ (HTML_INLINE t) _) + | enabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :) + | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: -addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = +addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = id -addInline (Node _ (CODE t) _) = +addInline _ (Node _ (CODE t) _) = (Code ("",[],[]) (unpack t) :) -addInline (Node _ EMPH nodes) = - (Emph (addInlines nodes) :) -addInline (Node _ STRONG nodes) = - (Strong (addInlines nodes) :) -addInline (Node _ STRIKETHROUGH nodes) = - (Strikeout (addInlines nodes) :) -addInline (Node _ (LINK url title) nodes) = - (Link nullAttr (addInlines nodes) (unpack url, unpack title) :) -addInline (Node _ (IMAGE url title) nodes) = - (Image nullAttr (addInlines nodes) (unpack url, unpack title) :) -addInline _ = id +addInline opts (Node _ EMPH nodes) = + (Emph (addInlines opts nodes) :) +addInline opts (Node _ STRONG nodes) = + (Strong (addInlines opts nodes) :) +addInline opts (Node _ STRIKETHROUGH nodes) = + (Strikeout (addInlines opts nodes) :) +addInline opts (Node _ (LINK url title) nodes) = + (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :) +addInline opts (Node _ (IMAGE url title) nodes) = + (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :) +addInline _ _ = id -- cgit v1.2.3