From 73caf92871a2fb40b6f092e90a86d366eca630cb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 8 Aug 2017 11:33:13 -0700 Subject: CommonMark reader: support `gfm_auto_identifiers`. Added `Ext_gfm_auto_identifiers`: new constructor for `Extension` in `Text.Pandoc.Extensions` [API change]. Use this in githubExtensions. Closes #2821. --- src/Text/Pandoc/Extensions.hs | 4 +++- src/Text/Pandoc/Readers/CommonMark.hs | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 9e49c5907..e6a3ca044 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -133,6 +133,8 @@ data Extension = | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions | Ext_emoji -- ^ Support emoji like :smile: | Ext_auto_identifiers -- ^ Automatic identifiers for headers + | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, + -- using GitHub's method for generating identifiers | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] @@ -237,7 +239,7 @@ githubMarkdownExtensions = extensionsFromList , Ext_pipe_tables , Ext_raw_html , Ext_fenced_code_blocks - , Ext_auto_identifiers + , Ext_gfm_auto_identifiers , Ext_ascii_identifiers , Ext_backtick_code_blocks , Ext_autolink_bare_uris diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 4b24ea374..ea9d696cb 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -33,15 +33,23 @@ module Text.Pandoc.Readers.CommonMark (readCommonMark) where import CMarkGFM +import Control.Monad.State +import Data.Char (isLetter, isAlphaNum, isSpace, toLower) import Data.List (groupBy) import Data.Text (Text, unpack) +import qualified Data.Map as Map import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options +import Text.Pandoc.Shared (stringify) +import Text.Pandoc.Walk (walkM) -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ + (if enabled Ext_gfm_auto_identifiers + then addHeaderIdentifiers + else id) $ nodeToPandoc $ commonmarkToNode opts' exts s where opts' = [ optSmart | enabled Ext_smart ] exts = [ extStrikethrough | enabled Ext_strikeout ] ++ @@ -49,6 +57,29 @@ readCommonMark opts s = return $ [ extAutolink | enabled Ext_autolink_bare_uris ] enabled x = extensionEnabled x (readerExtensions opts) +addHeaderIdentifiers :: Pandoc -> Pandoc +addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty + +addHeaderId :: Block -> State (Map.Map String Int) Block +addHeaderId (Header lev (_,classes,kvs) ils) = do + idmap <- get + let ident = toIdent ils + ident' <- case Map.lookup ident idmap of + Nothing -> do + put (Map.insert ident 1 idmap) + return ident + Just i -> do + put (Map.adjust (+ 1) ident idmap) + return (ident ++ "-" ++ show i) + return $ Header lev (ident',classes,kvs) ils +addHeaderId x = return x + +toIdent :: [Inline] -> String +toIdent = map (\c -> if isSpace c then '-' else c) + . filter (\c -> isLetter c || isAlphaNum c || isSpace c || + c == '_' || c == '-') + . map toLower . stringify + nodeToPandoc :: Node -> Pandoc nodeToPandoc (Node _ DOCUMENT nodes) = Pandoc nullMeta $ foldr addBlock [] nodes -- cgit v1.2.3