From 6b4b7a4ba3fc07036185c08c1f3cc8c3ca30c456 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Nov 2018 10:33:04 -0800 Subject: Clean up toIdent in CommonMark reader. This partially addresses #5057, fixing a bad interaction between the `ascii_identifiers` extension and the `gfm_auto_identifiers` extension, and creating identifiers that match the ones GitHub produces. This code still needs to be put somewhere common, so the `gfm_auto_identifiers` extension will work with other formats. --- src/Text/Pandoc/Readers/CommonMark.hs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 9c4f7a8ac..5a2e5784a 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -36,7 +36,7 @@ where import Prelude import CMarkGFM import Control.Monad.State -import Data.Char (isAlphaNum, isLetter, isSpace, toLower) +import Data.Char (isAlphaNum, isSpace, toLower) import Data.List (groupBy) import qualified Data.Map as Map import Data.Maybe (mapMaybe) @@ -93,13 +93,14 @@ addHeaderId opts (Header lev (_,classes,kvs) ils) = do addHeaderId _ x = return x toIdent :: ReaderOptions -> [Inline] -> String -toIdent opts = map (\c -> if isSpace c then '-' else c) - . filterer - . map toLower . stringify - where filterer = if isEnabled Ext_ascii_identifiers opts - then mapMaybe toAsciiChar - else filter (\c -> isLetter c || isAlphaNum c || isSpace c || - c == '_' || c == '-') +toIdent opts = + filterAscii . filterPunct . spaceToDash . map toLower. stringify + where + filterAscii = if isEnabled Ext_ascii_identifiers opts + then mapMaybe toAsciiChar + else id + filterPunct = filter (\c -> isAlphaNum c || c == '_' || c == '-') + spaceToDash = map (\c -> if isSpace c then '-' else c) nodeToPandoc :: ReaderOptions -> Node -> Pandoc nodeToPandoc opts (Node _ DOCUMENT nodes) = -- cgit v1.2.3