diff options
author | Anders Waldenborg <anders@0x63.nu> | 2018-07-16 00:14:40 +0200 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-07-15 15:14:40 -0700 |
commit | ec30fb37c12fc5d1a248971831414891cf6dcbe7 (patch) | |
tree | 9b41353a8f3678381a09cdcfdf15ea0190461ca6 | |
parent | ef07db6cefb34b5a6d89fc7d40e0144b6d6440b6 (diff) | |
download | pandoc-ec30fb37c12fc5d1a248971831414891cf6dcbe7.tar.gz |
Wrap emojis in span nodes (#4759)
Text.Pandoc.Emoji now exports `emojiToInline`, which returns a Span inline containing the emoji character and some attributes with metadata (class `emoji`, attribute `data-emoji` with emoji name). Previously, emojis (as supported in Markdown and CommonMark readers, e.g ":smile:")
were simply translated into the corresponding unicode code point. By wrapping them in Span
nodes, we make it possible to do special handling such as giving them a special font
in HTML output. We also open up the possibility of treating them differently when the
`--ascii` option is selected (though that is not part of this commit).
Closes #4743.
-rw-r--r-- | src/Text/Pandoc/Emoji.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/CommonMark.hs | 31 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 8 | ||||
-rw-r--r-- | test/Tests/Readers/Markdown.hs | 4 | ||||
-rw-r--r-- | test/command/4743.md | 25 | ||||
-rw-r--r-- | test/command/gfm.md | 2 |
6 files changed, 56 insertions, 21 deletions
diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs index 5cc965153..7d0af1a72 100644 --- a/src/Text/Pandoc/Emoji.hs +++ b/src/Text/Pandoc/Emoji.hs @@ -28,9 +28,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Emoji symbol lookup from canonical string identifier. -} -module Text.Pandoc.Emoji ( emojis ) where +module Text.Pandoc.Emoji ( emojis, emojiToInline ) where import Prelude import qualified Data.Map as M +import Text.Pandoc.Definition (Inline (Span, Str)) emojis :: M.Map String String emojis = M.fromList @@ -905,3 +906,7 @@ emojis = M.fromList ,("zero","0\65039\8419") ,("zzz","\128164") ] + +emojiToInline :: String -> Maybe Inline +emojiToInline emojikey = makeSpan <$> M.lookup emojikey emojis + where makeSpan = Span ("", ["emoji"], [("data-emoji", emojikey)]) . (:[]) . Str diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index a742ca666..9c4f7a8ac 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -44,7 +44,7 @@ import Data.Text (Text, unpack) import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition -import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Options import Text.Pandoc.Shared (stringify) import Text.Pandoc.Walk (walkM) @@ -61,16 +61,19 @@ readCommonMark opts s = return $ [ extTable | isEnabled Ext_pipe_tables opts ] ++ [ extAutolink | isEnabled Ext_autolink_bare_uris opts ] -convertEmojis :: String -> String -convertEmojis (':':xs) = +convertEmojis :: String -> [Inline] +convertEmojis s@(':':xs) = case break (==':') xs of (ys,':':zs) -> - case Map.lookup ys emojis of - Just s -> s ++ convertEmojis zs - Nothing -> ':' : ys ++ convertEmojis (':':zs) - _ -> ':':xs -convertEmojis (x:xs) = x : convertEmojis xs -convertEmojis [] = [] + case emojiToInline ys of + Just em -> em : convertEmojis zs + Nothing -> Str (':' : ys) : convertEmojis (':':zs) + _ -> [Str s] +convertEmojis s = + case break (==':') s of + ("","") -> [] + (_,"") -> [Str s] + (xs,ys) -> Str xs:convertEmojis ys addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty @@ -205,17 +208,17 @@ addInlines :: ReaderOptions -> [Node] -> [Inline] addInlines opts = foldr (addInline opts) [] addInline :: ReaderOptions -> Node -> [Inline] -> [Inline] -addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++) +addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++) where raw = unpack t clumps = groupBy samekind raw samekind ' ' ' ' = True samekind ' ' _ = False samekind _ ' ' = False samekind _ _ = True - toinl (' ':_) = Space - toinl xs = Str $ if isEnabled Ext_emoji opts - then convertEmojis xs - else xs + toinl (' ':_) = [Space] + toinl xs = if isEnabled Ext_emoji opts + then convertEmojis xs + else [Str xs] addInline _ (Node _ LINEBREAK _) = (LineBreak :) addInline opts (Node _ SOFTBREAK _) | isEnabled Ext_hard_line_breaks opts = (LineBreak :) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 68f076e35..e491f6276 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -51,7 +51,7 @@ import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..), report) import Text.Pandoc.Definition -import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options @@ -2027,9 +2027,9 @@ emoji = try $ do char ':' emojikey <- many1 (oneOf emojiChars) char ':' - case M.lookup emojikey emojis of - Just s -> return (return (B.str s)) - Nothing -> mzero + case emojiToInline emojikey of + Just i -> return (return $ B.singleton i) + Nothing -> mzero -- Citations diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index e44c7fc19..bc8e55615 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -199,7 +199,9 @@ tests = [ testGroup "inline code" ] , testGroup "emoji" [ test markdownGH "emoji symbols" $ - ":smile: and :+1:" =?> para (text "😄 and 👍") + ":smile: and :+1:" =?> para (spanWith ("", ["emoji"], [("data-emoji", "smile")]) "😄" <> + space <> str "and" <> space <> + spanWith ("", ["emoji"], [("data-emoji", "+1")]) "👍") ] , "unbalanced brackets" =: "[[[[[[[[[[[[hi" =?> para (text "[[[[[[[[[[[[hi") diff --git a/test/command/4743.md b/test/command/4743.md new file mode 100644 index 000000000..49b4b6d59 --- /dev/null +++ b/test/command/4743.md @@ -0,0 +1,25 @@ +Test that emojis are wrapped in Span + +``` +% pandoc -f commonmark+emoji -t native +My:thumbsup:emoji:heart: +^D +[Para [Str "My",Span ("",["emoji"],[("data-emoji","thumbsup")]) [Str "\128077"],Str "emoji",Span ("",["emoji"],[("data-emoji","heart")]) [Str "\10084\65039"]]] +``` + +``` +% pandoc -f markdown+emoji -t native +My:thumbsup:emoji:heart: +^D +[Para [Str "My",Span ("",["emoji"],[("data-emoji","thumbsup")]) [Str "\128077"],Str "emoji",Span ("",["emoji"],[("data-emoji","heart")]) [Str "\10084\65039"]]] +``` + +``` +% pandoc -f commonmark+emoji -t html +:zero: header +============= +My:thumbsup:emoji:heart:x :hearts: xyz +^D +<h1><span class="emoji" data-emoji="zero">0️⃣</span> header</h1> +<p>My<span class="emoji" data-emoji="thumbsup">👍</span>emoji<span class="emoji" data-emoji="heart">❤️</span>x <span class="emoji" data-emoji="hearts">♥️</span> xyz</p> +``` diff --git a/test/command/gfm.md b/test/command/gfm.md index 670f3cd6e..7a7098989 100644 --- a/test/command/gfm.md +++ b/test/command/gfm.md @@ -38,7 +38,7 @@ gfm tests: % pandoc -f gfm -t native My:thumbsup:emoji:heart: ^D -[Para [Str "My\128077emoji\10084\65039"]] +[Para [Str "My",Span ("",["emoji"],[("data-emoji","thumbsup")]) [Str "\128077"],Str "emoji",Span ("",["emoji"],[("data-emoji","heart")]) [Str "\10084\65039"]]] ``` ``` |