aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-08-08 13:55:19 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2017-08-08 13:55:19 -0700
commit3752298d917f101ac0279b7fc057c38d765f1770 (patch)
tree7ee943723573be7c58ff0c7f78966577a674dc5a /src
parent54658b923a6660336e7f6583d5416f786a54473a (diff)
downloadpandoc-3752298d917f101ac0279b7fc057c38d765f1770.tar.gz
Thread options through CommonMark reader.
This is more efficient than doing AST traversals for emojis and hard breaks. Also make behavior sensitive to `raw_html` extension.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs158
1 files changed, 77 insertions, 81 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 14be946e6..2dba18c9f 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -43,44 +43,34 @@ import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
import Text.Pandoc.Options
import Text.Pandoc.Shared (stringify)
-import Text.Pandoc.Walk (walkM, walk)
+import Text.Pandoc.Walk (walkM)
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMark opts s = return $
- (if enabled Ext_gfm_auto_identifiers
+ (if enabled Ext_gfm_auto_identifiers opts
then addHeaderIdentifiers
else id) $
- (if enabled Ext_emoji
- then addEmojis
- else id) $
- (if enabled Ext_hard_line_breaks
- then walk softToHardBreaks
- else id) $
- nodeToPandoc $ commonmarkToNode opts' exts s
- where opts' = [ optSmart | enabled Ext_smart ]
- exts = [ extStrikethrough | enabled Ext_strikeout ] ++
- [ extTable | enabled Ext_pipe_tables ] ++
- [ extAutolink | enabled Ext_autolink_bare_uris ]
- enabled x = extensionEnabled x (readerExtensions opts)
-
-softToHardBreaks :: Inline -> Inline
-softToHardBreaks SoftBreak = LineBreak
-softToHardBreaks x = x
-
-addEmojis :: Pandoc -> Pandoc
-addEmojis = walk go
- where go (Str xs) = Str (convertEmojis xs)
- go x = x
- convertEmojis (':':xs) =
- case break (==':') xs of
- (ys,':':zs) ->
- case Map.lookup ys emojis of
- Just s -> s ++ convertEmojis zs
- Nothing -> ':' : ys ++ convertEmojis (':':zs)
- _ -> ':':xs
- convertEmojis (x:xs) = x : convertEmojis xs
- convertEmojis [] = []
+ nodeToPandoc opts $ commonmarkToNode opts' exts s
+ where opts' = [ optSmart | enabled Ext_smart opts ]
+ exts = [ extStrikethrough | enabled Ext_strikeout opts ] ++
+ [ extTable | enabled Ext_pipe_tables opts ] ++
+ [ extAutolink | enabled Ext_autolink_bare_uris opts ]
+
+-- | Returns True if the given extension is enabled.
+enabled :: Extension -> ReaderOptions -> Bool
+enabled ext opts = ext `extensionEnabled` (readerExtensions opts)
+
+convertEmojis :: String -> String
+convertEmojis (':':xs) =
+ case break (==':') xs of
+ (ys,':':zs) ->
+ case Map.lookup ys emojis of
+ Just s -> s ++ convertEmojis zs
+ Nothing -> ':' : ys ++ convertEmojis (':':zs)
+ _ -> ':':xs
+convertEmojis (x:xs) = x : convertEmojis xs
+convertEmojis [] = []
addHeaderIdentifiers :: Pandoc -> Pandoc
addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty
@@ -105,34 +95,35 @@ toIdent = map (\c -> if isSpace c then '-' else c)
c == '_' || c == '-')
. map toLower . stringify
-nodeToPandoc :: Node -> Pandoc
-nodeToPandoc (Node _ DOCUMENT nodes) =
- Pandoc nullMeta $ foldr addBlock [] nodes
-nodeToPandoc n = -- shouldn't happen
- Pandoc nullMeta $ foldr addBlock [] [n]
+nodeToPandoc :: ReaderOptions -> Node -> Pandoc
+nodeToPandoc opts (Node _ DOCUMENT nodes) =
+ Pandoc nullMeta $ foldr (addBlock opts) [] nodes
+nodeToPandoc opts n = -- shouldn't happen
+ Pandoc nullMeta $ foldr (addBlock opts) [] [n]
-addBlocks :: [Node] -> [Block]
-addBlocks = foldr addBlock []
+addBlocks :: ReaderOptions -> [Node] -> [Block]
+addBlocks opts = foldr (addBlock opts) []
-addBlock :: Node -> [Block] -> [Block]
-addBlock (Node _ PARAGRAPH nodes) =
- (Para (addInlines nodes) :)
-addBlock (Node _ THEMATIC_BREAK _) =
+addBlock :: ReaderOptions -> Node -> [Block] -> [Block]
+addBlock opts (Node _ PARAGRAPH nodes) =
+ (Para (addInlines opts nodes) :)
+addBlock _ (Node _ THEMATIC_BREAK _) =
(HorizontalRule :)
-addBlock (Node _ BLOCK_QUOTE nodes) =
- (BlockQuote (addBlocks nodes) :)
-addBlock (Node _ (HTML_BLOCK t) _) =
- (RawBlock (Format "html") (unpack t) :)
+addBlock opts (Node _ BLOCK_QUOTE nodes) =
+ (BlockQuote (addBlocks opts nodes) :)
+addBlock opts (Node _ (HTML_BLOCK t) _)
+ | enabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :)
+ | otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
-addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
+addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
id
-addBlock (Node _ (CODE_BLOCK info t) _) =
+addBlock _ (Node _ (CODE_BLOCK info t) _) =
(CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
-addBlock (Node _ (HEADING lev) nodes) =
- (Header lev ("",[],[]) (addInlines nodes) :)
-addBlock (Node _ (LIST listAttrs) nodes) =
- (constructor (map (setTightness . addBlocks . children) nodes) :)
+addBlock opts (Node _ (HEADING lev) nodes) =
+ (Header lev ("",[],[]) (addInlines opts nodes) :)
+addBlock opts (Node _ (LIST listAttrs) nodes) =
+ (constructor (map (setTightness . addBlocks opts . children) nodes) :)
where constructor = case listType listAttrs of
BULLET_LIST -> BulletList
ORDERED_LIST -> OrderedList
@@ -146,7 +137,7 @@ addBlock (Node _ (LIST listAttrs) nodes) =
delim = case listDelim listAttrs of
PERIOD_DELIM -> Period
PAREN_DELIM -> OneParen
-addBlock (Node _ (TABLE alignments) nodes) = do
+addBlock opts (Node _ (TABLE alignments) nodes) = do
(Table [] aligns widths headers rows :)
where aligns = map fromTableCellAlignment alignments
fromTableCellAlignment NoAlignment = AlignDefault
@@ -169,12 +160,12 @@ addBlock (Node _ (TABLE alignments) nodes) = do
toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t
toCell (Node _ TABLE_CELL []) = []
toCell (Node _ TABLE_CELL (n:ns))
- | isBlockNode n = addBlocks (n:ns)
- | otherwise = [Plain (addInlines (n:ns))]
+ | isBlockNode n = addBlocks opts (n:ns)
+ | otherwise = [Plain (addInlines opts (n:ns))]
toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t
-addBlock (Node _ TABLE_ROW _) = id -- handled in TABLE
-addBlock (Node _ TABLE_CELL _) = id -- handled in TABLE
-addBlock _ = id
+addBlock _ (Node _ TABLE_ROW _) = id -- handled in TABLE
+addBlock _ (Node _ TABLE_CELL _) = id -- handled in TABLE
+addBlock _ _ = id
isBlockNode :: Node -> Bool
isBlockNode (Node _ nodetype _) =
@@ -207,11 +198,11 @@ isBlockNode (Node _ nodetype _) =
children :: Node -> [Node]
children (Node _ _ ns) = ns
-addInlines :: [Node] -> [Inline]
-addInlines = foldr addInline []
+addInlines :: ReaderOptions -> [Node] -> [Inline]
+addInlines opts = foldr (addInline opts) []
-addInline :: Node -> [Inline] -> [Inline]
-addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
+addInline :: ReaderOptions -> Node -> [Inline] -> [Inline]
+addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++)
where raw = unpack t
clumps = groupBy samekind raw
samekind ' ' ' ' = True
@@ -219,25 +210,30 @@ addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
samekind _ ' ' = False
samekind _ _ = True
toinl (' ':_) = Space
- toinl xs = Str xs
-addInline (Node _ LINEBREAK _) = (LineBreak :)
-addInline (Node _ SOFTBREAK _) = (SoftBreak :)
-addInline (Node _ (HTML_INLINE t) _) =
- (RawInline (Format "html") (unpack t) :)
+ toinl xs = Str $ if enabled Ext_emoji opts
+ then convertEmojis xs
+ else xs
+addInline _ (Node _ LINEBREAK _) = (LineBreak :)
+addInline opts (Node _ SOFTBREAK _)
+ | enabled Ext_hard_line_breaks opts = (LineBreak :)
+ | otherwise = (SoftBreak :)
+addInline opts (Node _ (HTML_INLINE t) _)
+ | enabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :)
+ | otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
-addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
+addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
id
-addInline (Node _ (CODE t) _) =
+addInline _ (Node _ (CODE t) _) =
(Code ("",[],[]) (unpack t) :)
-addInline (Node _ EMPH nodes) =
- (Emph (addInlines nodes) :)
-addInline (Node _ STRONG nodes) =
- (Strong (addInlines nodes) :)
-addInline (Node _ STRIKETHROUGH nodes) =
- (Strikeout (addInlines nodes) :)
-addInline (Node _ (LINK url title) nodes) =
- (Link nullAttr (addInlines nodes) (unpack url, unpack title) :)
-addInline (Node _ (IMAGE url title) nodes) =
- (Image nullAttr (addInlines nodes) (unpack url, unpack title) :)
-addInline _ = id
+addInline opts (Node _ EMPH nodes) =
+ (Emph (addInlines opts nodes) :)
+addInline opts (Node _ STRONG nodes) =
+ (Strong (addInlines opts nodes) :)
+addInline opts (Node _ STRIKETHROUGH nodes) =
+ (Strikeout (addInlines opts nodes) :)
+addInline opts (Node _ (LINK url title) nodes) =
+ (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+addInline opts (Node _ (IMAGE url title) nodes) =
+ (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+addInline _ _ = id