CommonMark reader: Handle ascii_identifiers extension (#4733)

Non-ascii characters were not stripped from identifiers even if the `ascii_identifiers` extension was enabled (which is is by default for gfm). Closes #4742
author: Anders Waldenborg <anders@0x63.nu> 2018-06-29 10:41:26 +0200
committer: John MacFarlane <jgm@berkeley.edu> 2018-06-29 10:41:26 +0200
commit: 904924d172d2fced32a96aa1d022d47a0fb59cd6 (patch)
tree: 6d331bb9ea93612b88071e0430a36e66b8fbf4c8 /src/Text/Pandoc
parent: bb5a2464d54b76e97e42a015676042244bd09970 (diff)
download: pandoc-904924d172d2fced32a96aa1d022d47a0fb59cd6.tar.gz
1 files changed, 18 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 79a4abbc2..a742ca666 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -39,7 +39,9 @@ import Control.Monad.State
 import Data.Char (isAlphaNum, isLetter, isSpace, toLower)
 import Data.List (groupBy)
 import qualified Data.Map as Map
+import Data.Maybe (mapMaybe)
 import Data.Text (Text, unpack)
+import Text.Pandoc.Asciify (toAsciiChar)
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Emoji (emojis)
@@ -51,7 +53,7 @@ import Text.Pandoc.Walk (walkM)
 readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readCommonMark opts s = return $
   (if isEnabled Ext_gfm_auto_identifiers opts
-      then addHeaderIdentifiers
+      then addHeaderIdentifiers opts
       else id) $
   nodeToPandoc opts $ commonmarkToNode opts' exts s
   where opts' = [ optSmart | isEnabled Ext_smart opts ]
@@ -70,13 +72,13 @@ convertEmojis (':':xs) =
 convertEmojis (x:xs) = x : convertEmojis xs
 convertEmojis [] = []
 
-addHeaderIdentifiers :: Pandoc -> Pandoc
-addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty
+addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc
+addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty
 
-addHeaderId :: Block -> State (Map.Map String Int) Block
-addHeaderId (Header lev (_,classes,kvs) ils) = do
+addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block
+addHeaderId opts (Header lev (_,classes,kvs) ils) = do
   idmap <- get
-  let ident = toIdent ils
+  let ident = toIdent opts ils
   ident' <- case Map.lookup ident idmap of
                  Nothing -> do
                    put (Map.insert ident 1 idmap)
@@ -85,13 +87,16 @@ addHeaderId (Header lev (_,classes,kvs) ils) = do
                    put (Map.adjust (+ 1) ident idmap)
                    return (ident ++ "-" ++ show i)
   return $ Header lev (ident',classes,kvs) ils
-addHeaderId x = return x
-
-toIdent :: [Inline] -> String
-toIdent =   map (\c -> if isSpace c then '-' else c)
-          . filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
-                           c == '_' || c == '-')
-          . map toLower . stringify
+addHeaderId _ x = return x
+
+toIdent :: ReaderOptions -> [Inline] -> String
+toIdent opts = map (\c -> if isSpace c then '-' else c)
+               . filterer
+               . map toLower . stringify
+  where filterer = if isEnabled Ext_ascii_identifiers opts
+                   then mapMaybe toAsciiChar
+                   else filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
+                                      c == '_' || c == '-')
 
 nodeToPandoc :: ReaderOptions -> Node -> Pandoc
 nodeToPandoc opts (Node _ DOCUMENT nodes) =
author	Anders Waldenborg <anders@0x63.nu>	2018-06-29 10:41:26 +0200
committer	John MacFarlane <jgm@berkeley.edu>	2018-06-29 10:41:26 +0200
commit	904924d172d2fced32a96aa1d022d47a0fb59cd6 (patch)
tree	6d331bb9ea93612b88071e0430a36e66b8fbf4c8 /src/Text/Pandoc
parent	bb5a2464d54b76e97e42a015676042244bd09970 (diff)
download	pandoc-904924d172d2fced32a96aa1d022d47a0fb59cd6.tar.gz