From 5bc38a741bdd5a0470b92c4cd62769bb8dd4ddf2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Nov 2018 20:45:38 -0800 Subject: Exactly match GitHub's identifier generating algorithm. See #5057. --- src/Text/Pandoc/Shared.hs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 0b29347a3..0bb9b20d7 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -116,7 +116,9 @@ import Control.Monad (MonadPlus (..), msum, unless) import qualified Control.Monad.State.Strict as S import qualified Data.ByteString.Lazy as BL import qualified Data.Bifunctor as Bifunctor -import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum) +import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum, + generalCategory, GeneralCategory(NonSpacingMark, + SpacingCombiningMark, EnclosingMark, ConnectorPunctuation)) import Data.Data (Data, Typeable) import Data.List (find, intercalate, intersperse, stripPrefix) import qualified Data.Map as M @@ -504,7 +506,10 @@ inlineListToIdentifier exts = | otherwise = intercalate "-" . words . filterPunct . map toLower filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c) isAllowedPunct c - | extensionEnabled Ext_gfm_auto_identifiers exts = c == '_' || c == '-' + | extensionEnabled Ext_gfm_auto_identifiers exts + = c == '-' || c == '_' || + generalCategory c `elem` [NonSpacingMark, SpacingCombiningMark, + EnclosingMark, ConnectorPunctuation] | otherwise = c == '_' || c == '-' || c == '.' spaceToDash = map (\c -> if isSpace c then '-' else c) -- cgit v1.2.3