aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Shared.hs9
-rw-r--r--test/command/4742.md4
2 files changed, 11 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 0b29347a3..0bb9b20d7 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -116,7 +116,9 @@ import Control.Monad (MonadPlus (..), msum, unless)
import qualified Control.Monad.State.Strict as S
import qualified Data.ByteString.Lazy as BL
import qualified Data.Bifunctor as Bifunctor
-import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum)
+import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
+ generalCategory, GeneralCategory(NonSpacingMark,
+ SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
import Data.Data (Data, Typeable)
import Data.List (find, intercalate, intersperse, stripPrefix)
import qualified Data.Map as M
@@ -504,7 +506,10 @@ inlineListToIdentifier exts =
| otherwise = intercalate "-" . words . filterPunct . map toLower
filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c)
isAllowedPunct c
- | extensionEnabled Ext_gfm_auto_identifiers exts = c == '_' || c == '-'
+ | extensionEnabled Ext_gfm_auto_identifiers exts
+ = c == '-' || c == '_' ||
+ generalCategory c `elem` [NonSpacingMark, SpacingCombiningMark,
+ EnclosingMark, ConnectorPunctuation]
| otherwise = c == '_' || c == '-' || c == '.'
spaceToDash = map (\c -> if isSpace c then '-' else c)
diff --git a/test/command/4742.md b/test/command/4742.md
index f97314b10..600880e90 100644
--- a/test/command/4742.md
+++ b/test/command/4742.md
@@ -8,6 +8,10 @@ extension properly.
[Header 1 ("non-ascii--raksmorgas",[],[]) [Str "non",Space,Str "ascii",Space,Str "\9888\65039",Space,Str "r\228ksm\246rg\229s"]]
```
+Note that the emoji here is actually a composite character,
+formed from \9888 and \65039. The latter is a combining mark,
+so it survives...
+
```
% pandoc -f commonmark+auto_identifiers+gfm_auto_identifiers-ascii_identifiers -t native
# non ascii ⚠️ räksmörgås