diff options
author | Anders Waldenborg <anders@0x63.nu> | 2018-06-29 10:41:26 +0200 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-06-29 10:41:26 +0200 |
commit | 904924d172d2fced32a96aa1d022d47a0fb59cd6 (patch) | |
tree | 6d331bb9ea93612b88071e0430a36e66b8fbf4c8 /src/Text/Pandoc | |
parent | bb5a2464d54b76e97e42a015676042244bd09970 (diff) | |
download | pandoc-904924d172d2fced32a96aa1d022d47a0fb59cd6.tar.gz |
CommonMark reader: Handle ascii_identifiers extension (#4733)
Non-ascii characters were not stripped from identifiers even if the
`ascii_identifiers` extension was enabled (which is is by default for
gfm).
Closes #4742
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Readers/CommonMark.hs | 31 |
1 files changed, 18 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 79a4abbc2..a742ca666 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -39,7 +39,9 @@ import Control.Monad.State import Data.Char (isAlphaNum, isLetter, isSpace, toLower) import Data.List (groupBy) import qualified Data.Map as Map +import Data.Maybe (mapMaybe) import Data.Text (Text, unpack) +import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) @@ -51,7 +53,7 @@ import Text.Pandoc.Walk (walkM) readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ (if isEnabled Ext_gfm_auto_identifiers opts - then addHeaderIdentifiers + then addHeaderIdentifiers opts else id) $ nodeToPandoc opts $ commonmarkToNode opts' exts s where opts' = [ optSmart | isEnabled Ext_smart opts ] @@ -70,13 +72,13 @@ convertEmojis (':':xs) = convertEmojis (x:xs) = x : convertEmojis xs convertEmojis [] = [] -addHeaderIdentifiers :: Pandoc -> Pandoc -addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty +addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc +addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty -addHeaderId :: Block -> State (Map.Map String Int) Block -addHeaderId (Header lev (_,classes,kvs) ils) = do +addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block +addHeaderId opts (Header lev (_,classes,kvs) ils) = do idmap <- get - let ident = toIdent ils + let ident = toIdent opts ils ident' <- case Map.lookup ident idmap of Nothing -> do put (Map.insert ident 1 idmap) @@ -85,13 +87,16 @@ addHeaderId (Header lev (_,classes,kvs) ils) = do put (Map.adjust (+ 1) ident idmap) return (ident ++ "-" ++ show i) return $ Header lev (ident',classes,kvs) ils -addHeaderId x = return x - -toIdent :: [Inline] -> String -toIdent = map (\c -> if isSpace c then '-' else c) - . filter (\c -> isLetter c || isAlphaNum c || isSpace c || - c == '_' || c == '-') - . map toLower . stringify +addHeaderId _ x = return x + +toIdent :: ReaderOptions -> [Inline] -> String +toIdent opts = map (\c -> if isSpace c then '-' else c) + . filterer + . map toLower . stringify + where filterer = if isEnabled Ext_ascii_identifiers opts + then mapMaybe toAsciiChar + else filter (\c -> isLetter c || isAlphaNum c || isSpace c || + c == '_' || c == '-') nodeToPandoc :: ReaderOptions -> Node -> Pandoc nodeToPandoc opts (Node _ DOCUMENT nodes) = |