aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorAnders Waldenborg <anders@0x63.nu>2018-06-29 10:41:26 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2018-06-29 10:41:26 +0200
commit904924d172d2fced32a96aa1d022d47a0fb59cd6 (patch)
tree6d331bb9ea93612b88071e0430a36e66b8fbf4c8 /src/Text
parentbb5a2464d54b76e97e42a015676042244bd09970 (diff)
downloadpandoc-904924d172d2fced32a96aa1d022d47a0fb59cd6.tar.gz
CommonMark reader: Handle ascii_identifiers extension (#4733)
Non-ascii characters were not stripped from identifiers even if the `ascii_identifiers` extension was enabled (which is is by default for gfm). Closes #4742
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs31
1 files changed, 18 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 79a4abbc2..a742ca666 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -39,7 +39,9 @@ import Control.Monad.State
import Data.Char (isAlphaNum, isLetter, isSpace, toLower)
import Data.List (groupBy)
import qualified Data.Map as Map
+import Data.Maybe (mapMaybe)
import Data.Text (Text, unpack)
+import Text.Pandoc.Asciify (toAsciiChar)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
@@ -51,7 +53,7 @@ import Text.Pandoc.Walk (walkM)
readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMark opts s = return $
(if isEnabled Ext_gfm_auto_identifiers opts
- then addHeaderIdentifiers
+ then addHeaderIdentifiers opts
else id) $
nodeToPandoc opts $ commonmarkToNode opts' exts s
where opts' = [ optSmart | isEnabled Ext_smart opts ]
@@ -70,13 +72,13 @@ convertEmojis (':':xs) =
convertEmojis (x:xs) = x : convertEmojis xs
convertEmojis [] = []
-addHeaderIdentifiers :: Pandoc -> Pandoc
-addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty
+addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc
+addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty
-addHeaderId :: Block -> State (Map.Map String Int) Block
-addHeaderId (Header lev (_,classes,kvs) ils) = do
+addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block
+addHeaderId opts (Header lev (_,classes,kvs) ils) = do
idmap <- get
- let ident = toIdent ils
+ let ident = toIdent opts ils
ident' <- case Map.lookup ident idmap of
Nothing -> do
put (Map.insert ident 1 idmap)
@@ -85,13 +87,16 @@ addHeaderId (Header lev (_,classes,kvs) ils) = do
put (Map.adjust (+ 1) ident idmap)
return (ident ++ "-" ++ show i)
return $ Header lev (ident',classes,kvs) ils
-addHeaderId x = return x
-
-toIdent :: [Inline] -> String
-toIdent = map (\c -> if isSpace c then '-' else c)
- . filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
- c == '_' || c == '-')
- . map toLower . stringify
+addHeaderId _ x = return x
+
+toIdent :: ReaderOptions -> [Inline] -> String
+toIdent opts = map (\c -> if isSpace c then '-' else c)
+ . filterer
+ . map toLower . stringify
+ where filterer = if isEnabled Ext_ascii_identifiers opts
+ then mapMaybe toAsciiChar
+ else filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
+ c == '_' || c == '-')
nodeToPandoc :: ReaderOptions -> Node -> Pandoc
nodeToPandoc opts (Node _ DOCUMENT nodes) =