From 904924d172d2fced32a96aa1d022d47a0fb59cd6 Mon Sep 17 00:00:00 2001 From: Anders Waldenborg Date: Fri, 29 Jun 2018 10:41:26 +0200 Subject: CommonMark reader: Handle ascii_identifiers extension (#4733) Non-ascii characters were not stripped from identifiers even if the `ascii_identifiers` extension was enabled (which is is by default for gfm). Closes #4742 --- src/Text/Pandoc/Readers/CommonMark.hs | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 79a4abbc2..a742ca666 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -39,7 +39,9 @@ import Control.Monad.State import Data.Char (isAlphaNum, isLetter, isSpace, toLower) import Data.List (groupBy) import qualified Data.Map as Map +import Data.Maybe (mapMaybe) import Data.Text (Text, unpack) +import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) @@ -51,7 +53,7 @@ import Text.Pandoc.Walk (walkM) readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ (if isEnabled Ext_gfm_auto_identifiers opts - then addHeaderIdentifiers + then addHeaderIdentifiers opts else id) $ nodeToPandoc opts $ commonmarkToNode opts' exts s where opts' = [ optSmart | isEnabled Ext_smart opts ] @@ -70,13 +72,13 @@ convertEmojis (':':xs) = convertEmojis (x:xs) = x : convertEmojis xs convertEmojis [] = [] -addHeaderIdentifiers :: Pandoc -> Pandoc -addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty +addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc +addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty -addHeaderId :: Block -> State (Map.Map String Int) Block -addHeaderId (Header lev (_,classes,kvs) ils) = do +addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block +addHeaderId opts (Header lev (_,classes,kvs) ils) = do idmap <- get - let ident = toIdent ils + let ident = toIdent opts ils ident' <- case Map.lookup ident idmap of Nothing -> do put (Map.insert ident 1 idmap) @@ -85,13 +87,16 @@ addHeaderId (Header lev (_,classes,kvs) ils) = do put (Map.adjust (+ 1) ident idmap) return (ident ++ "-" ++ show i) return $ Header lev (ident',classes,kvs) ils -addHeaderId x = return x - -toIdent :: [Inline] -> String -toIdent = map (\c -> if isSpace c then '-' else c) - . filter (\c -> isLetter c || isAlphaNum c || isSpace c || - c == '_' || c == '-') - . map toLower . stringify +addHeaderId _ x = return x + +toIdent :: ReaderOptions -> [Inline] -> String +toIdent opts = map (\c -> if isSpace c then '-' else c) + . filterer + . map toLower . stringify + where filterer = if isEnabled Ext_ascii_identifiers opts + then mapMaybe toAsciiChar + else filter (\c -> isLetter c || isAlphaNum c || isSpace c || + c == '_' || c == '-') nodeToPandoc :: ReaderOptions -> Node -> Pandoc nodeToPandoc opts (Node _ DOCUMENT nodes) = -- cgit v1.2.3