aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnders Waldenborg <anders@0x63.nu>2018-07-16 00:14:40 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2018-07-15 15:14:40 -0700
commitec30fb37c12fc5d1a248971831414891cf6dcbe7 (patch)
tree9b41353a8f3678381a09cdcfdf15ea0190461ca6
parentef07db6cefb34b5a6d89fc7d40e0144b6d6440b6 (diff)
downloadpandoc-ec30fb37c12fc5d1a248971831414891cf6dcbe7.tar.gz
Wrap emojis in span nodes (#4759)
Text.Pandoc.Emoji now exports `emojiToInline`, which returns a Span inline containing the emoji character and some attributes with metadata (class `emoji`, attribute `data-emoji` with emoji name). Previously, emojis (as supported in Markdown and CommonMark readers, e.g ":smile:") were simply translated into the corresponding unicode code point. By wrapping them in Span nodes, we make it possible to do special handling such as giving them a special font in HTML output. We also open up the possibility of treating them differently when the `--ascii` option is selected (though that is not part of this commit). Closes #4743.
-rw-r--r--src/Text/Pandoc/Emoji.hs7
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs31
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs8
-rw-r--r--test/Tests/Readers/Markdown.hs4
-rw-r--r--test/command/4743.md25
-rw-r--r--test/command/gfm.md2
6 files changed, 56 insertions, 21 deletions
diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs
index 5cc965153..7d0af1a72 100644
--- a/src/Text/Pandoc/Emoji.hs
+++ b/src/Text/Pandoc/Emoji.hs
@@ -28,9 +28,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Emoji symbol lookup from canonical string identifier.
-}
-module Text.Pandoc.Emoji ( emojis ) where
+module Text.Pandoc.Emoji ( emojis, emojiToInline ) where
import Prelude
import qualified Data.Map as M
+import Text.Pandoc.Definition (Inline (Span, Str))
emojis :: M.Map String String
emojis = M.fromList
@@ -905,3 +906,7 @@ emojis = M.fromList
,("zero","0\65039\8419")
,("zzz","\128164")
]
+
+emojiToInline :: String -> Maybe Inline
+emojiToInline emojikey = makeSpan <$> M.lookup emojikey emojis
+ where makeSpan = Span ("", ["emoji"], [("data-emoji", emojikey)]) . (:[]) . Str
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index a742ca666..9c4f7a8ac 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -44,7 +44,7 @@ import Data.Text (Text, unpack)
import Text.Pandoc.Asciify (toAsciiChar)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Options
import Text.Pandoc.Shared (stringify)
import Text.Pandoc.Walk (walkM)
@@ -61,16 +61,19 @@ readCommonMark opts s = return $
[ extTable | isEnabled Ext_pipe_tables opts ] ++
[ extAutolink | isEnabled Ext_autolink_bare_uris opts ]
-convertEmojis :: String -> String
-convertEmojis (':':xs) =
+convertEmojis :: String -> [Inline]
+convertEmojis s@(':':xs) =
case break (==':') xs of
(ys,':':zs) ->
- case Map.lookup ys emojis of
- Just s -> s ++ convertEmojis zs
- Nothing -> ':' : ys ++ convertEmojis (':':zs)
- _ -> ':':xs
-convertEmojis (x:xs) = x : convertEmojis xs
-convertEmojis [] = []
+ case emojiToInline ys of
+ Just em -> em : convertEmojis zs
+ Nothing -> Str (':' : ys) : convertEmojis (':':zs)
+ _ -> [Str s]
+convertEmojis s =
+ case break (==':') s of
+ ("","") -> []
+ (_,"") -> [Str s]
+ (xs,ys) -> Str xs:convertEmojis ys
addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc
addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty
@@ -205,17 +208,17 @@ addInlines :: ReaderOptions -> [Node] -> [Inline]
addInlines opts = foldr (addInline opts) []
addInline :: ReaderOptions -> Node -> [Inline] -> [Inline]
-addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++)
+addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++)
where raw = unpack t
clumps = groupBy samekind raw
samekind ' ' ' ' = True
samekind ' ' _ = False
samekind _ ' ' = False
samekind _ _ = True
- toinl (' ':_) = Space
- toinl xs = Str $ if isEnabled Ext_emoji opts
- then convertEmojis xs
- else xs
+ toinl (' ':_) = [Space]
+ toinl xs = if isEnabled Ext_emoji opts
+ then convertEmojis xs
+ else [Str xs]
addInline _ (Node _ LINEBREAK _) = (LineBreak :)
addInline opts (Node _ SOFTBREAK _)
| isEnabled Ext_hard_line_breaks opts = (LineBreak :)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 68f076e35..e491f6276 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -51,7 +51,7 @@ import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..), report)
import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
@@ -2027,9 +2027,9 @@ emoji = try $ do
char ':'
emojikey <- many1 (oneOf emojiChars)
char ':'
- case M.lookup emojikey emojis of
- Just s -> return (return (B.str s))
- Nothing -> mzero
+ case emojiToInline emojikey of
+ Just i -> return (return $ B.singleton i)
+ Nothing -> mzero
-- Citations
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index e44c7fc19..bc8e55615 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -199,7 +199,9 @@ tests = [ testGroup "inline code"
]
, testGroup "emoji"
[ test markdownGH "emoji symbols" $
- ":smile: and :+1:" =?> para (text "😄 and 👍")
+ ":smile: and :+1:" =?> para (spanWith ("", ["emoji"], [("data-emoji", "smile")]) "😄" <>
+ space <> str "and" <> space <>
+ spanWith ("", ["emoji"], [("data-emoji", "+1")]) "👍")
]
, "unbalanced brackets" =:
"[[[[[[[[[[[[hi" =?> para (text "[[[[[[[[[[[[hi")
diff --git a/test/command/4743.md b/test/command/4743.md
new file mode 100644
index 000000000..49b4b6d59
--- /dev/null
+++ b/test/command/4743.md
@@ -0,0 +1,25 @@
+Test that emojis are wrapped in Span
+
+```
+% pandoc -f commonmark+emoji -t native
+My:thumbsup:emoji:heart:
+^D
+[Para [Str "My",Span ("",["emoji"],[("data-emoji","thumbsup")]) [Str "\128077"],Str "emoji",Span ("",["emoji"],[("data-emoji","heart")]) [Str "\10084\65039"]]]
+```
+
+```
+% pandoc -f markdown+emoji -t native
+My:thumbsup:emoji:heart:
+^D
+[Para [Str "My",Span ("",["emoji"],[("data-emoji","thumbsup")]) [Str "\128077"],Str "emoji",Span ("",["emoji"],[("data-emoji","heart")]) [Str "\10084\65039"]]]
+```
+
+```
+% pandoc -f commonmark+emoji -t html
+:zero: header
+=============
+My:thumbsup:emoji:heart:x :hearts: xyz
+^D
+<h1><span class="emoji" data-emoji="zero">0️⃣</span> header</h1>
+<p>My<span class="emoji" data-emoji="thumbsup">👍</span>emoji<span class="emoji" data-emoji="heart">❤️</span>x <span class="emoji" data-emoji="hearts">♥️</span> xyz</p>
+```
diff --git a/test/command/gfm.md b/test/command/gfm.md
index 670f3cd6e..7a7098989 100644
--- a/test/command/gfm.md
+++ b/test/command/gfm.md
@@ -38,7 +38,7 @@ gfm tests:
% pandoc -f gfm -t native
My:thumbsup:emoji:heart:
^D
-[Para [Str "My\128077emoji\10084\65039"]]
+[Para [Str "My",Span ("",["emoji"],[("data-emoji","thumbsup")]) [Str "\128077"],Str "emoji",Span ("",["emoji"],[("data-emoji","heart")]) [Str "\10084\65039"]]]
```
```