diff options
-rw-r--r-- | README | 6 | ||||
-rw-r--r-- | changelog | 9 | ||||
-rw-r--r-- | pandoc.cabal | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Emoji.hs | 905 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 13 | ||||
-rw-r--r-- | tests/Tests/Readers/Markdown.hs | 4 | ||||
-rw-r--r-- | tests/Tests/Readers/Org.hs | 9 |
10 files changed, 963 insertions, 6 deletions
@@ -3146,6 +3146,10 @@ treated as spaces or as hard line breaks. This option is intended for use with East Asian languages where spaces are not used between words, but text is divided into lines for readability. +#### Extension: `emoji` #### + +Parses textual emojis like `:smile:` as Unicode emoticons. + #### Extension: `tex_math_single_backslash` #### Causes anything between `\(` and `\)` to be interpreted as inline @@ -3256,7 +3260,7 @@ variants are supported: : `pipe_tables`, `raw_html`, `tex_math_single_backslash`, `fenced_code_blocks`, `auto_identifiers`, `ascii_identifiers`, `backtick_code_blocks`, `autolink_bare_uris`, - `intraword_underscores`, `strikeout`, `hard_line_breaks`, + `intraword_underscores`, `strikeout`, `hard_line_breaks`, `emoji`, `shortcut_reference_links`. `markdown_mmd` (MultiMarkdown) @@ -5,6 +5,10 @@ pandoc (1.15.2) * Fixed omitted `url(...)` in CSS data-uri with `--self-contained` (#2489). + * Added `emoji` Markdown extension, enabled by default in `markdown_github` + (#2523). Added `Ext_emoji` to `Extension` in `Text.Pandoc.Options` + (API change). + * `Text.Pandoc.Readers.HTML.parseTags`: Fixed over-eager raw HTML inline parsing (#2469). Tightened up the inline HTML parser so it disallows TagWarnings. @@ -27,6 +31,9 @@ pandoc (1.15.2) can start need to be marked explicitly by changing the parser state. This wasn't done for headers. The proper function to update the state is now called at the beginning of the header parser, fixing this issue. + + Fix emphasis rules for smart parsing (Albert Krewinkel, #2513). + Smart quotes, ellipses, and dashes should behave like normal quotes, + single dashes, and dots with respect to text markup parsing. * Markdown reader: @@ -41,6 +48,8 @@ pandoc (1.15.2) permit this. + Improved parser for `mmd_title_block`. We now allow blank metadata fields. These were explicitly disallowed before. + + Citation keys can now contain `://`, so URLs and DOIs can be used + as citation keys (jgm/pandoc-citeproc#166). * Beamer template: fix incompatibility of section slides with natbib. Natbib (and presumably biblatex) bibliography commands create diff --git a/pandoc.cabal b/pandoc.cabal index 7c2666edf..40f994ef1 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -391,6 +391,7 @@ Library Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, + Text.Pandoc.Emoji, Text.Pandoc.Parsing, Text.Pandoc.UUID, Text.Pandoc.ImageSize, diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs new file mode 100644 index 000000000..2ae7962cb --- /dev/null +++ b/src/Text/Pandoc/Emoji.hs @@ -0,0 +1,905 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Emoji + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Emoji symbol lookup from canonical string identifier. +-} +module Text.Pandoc.Emoji ( emojis ) where +import qualified Data.Map as M + +emojis :: M.Map String String +emojis = M.fromList + [ ("100", "๐ฏ") + , ("1234", "๐ข") + , ("smile", "๐") + , ("smiley", "๐") + , ("grinning", "๐") + , ("blush", "๐") + , ("relaxed", "โบ๏ธ") + , ("wink", "๐") + , ("heart_eyes", "๐") + , ("kissing_heart", "๐") + , ("kissing_closed_eyes", "๐") + , ("kissing", "๐") + , ("kissing_smiling_eyes", "๐") + , ("stuck_out_tongue_winking_eye", "๐") + , ("stuck_out_tongue_closed_eyes", "๐") + , ("stuck_out_tongue", "๐") + , ("flushed", "๐ณ") + , ("grin", "๐") + , ("pensive", "๐") + , ("relieved", "๐") + , ("unamused", "๐") + , ("disappointed", "๐") + , ("persevere", "๐ฃ") + , ("cry", "๐ข") + , ("joy", "๐") + , ("sob", "๐ญ") + , ("sleepy", "๐ช") + , ("disappointed_relieved", "๐ฅ") + , ("cold_sweat", "๐ฐ") + , ("sweat_smile", "๐
") + , ("sweat", "๐") + , ("weary", "๐ฉ") + , ("tired_face", "๐ซ") + , ("fearful", "๐จ") + , ("scream", "๐ฑ") + , ("angry", "๐ ") + , ("rage", "๐ก") + , ("triumph", "๐ค") + , ("confounded", "๐") + , ("laughing", "๐") + , ("satisfied", "๐") + , ("yum", "๐") + , ("mask", "๐ท") + , ("sunglasses", "๐") + , ("sleeping", "๐ด") + , ("dizzy_face", "๐ต") + , ("astonished", "๐ฒ") + , ("worried", "๐") + , ("frowning", "๐ฆ") + , ("anguished", "๐ง") + , ("smiling_imp", "๐") + , ("imp", "๐ฟ") + , ("open_mouth", "๐ฎ") + , ("grimacing", "๐ฌ") + , ("neutral_face", "๐") + , ("confused", "๐") + , ("hushed", "๐ฏ") + , ("no_mouth", "๐ถ") + , ("innocent", "๐") + , ("smirk", "๐") + , ("expressionless", "๐") + , ("man_with_gua_pi_mao", "๐ฒ") + , ("man_with_turban", "๐ณ") + , ("cop", "๐ฎ") + , ("construction_worker", "๐ท") + , ("guardsman", "๐") + , ("baby", "๐ถ") + , ("boy", "๐ฆ") + , ("girl", "๐ง") + , ("man", "๐จ") + , ("woman", "๐ฉ") + , ("older_man", "๐ด") + , ("older_woman", "๐ต") + , ("person_with_blond_hair", "๐ฑ") + , ("angel", "๐ผ") + , ("princess", "๐ธ") + , ("smiley_cat", "๐บ") + , ("smile_cat", "๐ธ") + , ("heart_eyes_cat", "๐ป") + , ("kissing_cat", "๐ฝ") + , ("smirk_cat", "๐ผ") + , ("scream_cat", "๐") + , ("crying_cat_face", "๐ฟ") + , ("joy_cat", "๐น") + , ("pouting_cat", "๐พ") + , ("japanese_ogre", "๐น") + , ("japanese_goblin", "๐บ") + , ("see_no_evil", "๐") + , ("hear_no_evil", "๐") + , ("speak_no_evil", "๐") + , ("skull", "๐") + , ("alien", "๐ฝ") + , ("hankey", "๐ฉ") + , ("poop", "๐ฉ") + , ("shit", "๐ฉ") + , ("fire", "๐ฅ") + , ("sparkles", "โจ") + , ("star2", "๐") + , ("dizzy", "๐ซ") + , ("boom", "๐ฅ") + , ("collision", "๐ฅ") + , ("anger", "๐ข") + , ("sweat_drops", "๐ฆ") + , ("droplet", "๐ง") + , ("zzz", "๐ค") + , ("dash", "๐จ") + , ("ear", "๐") + , ("eyes", "๐") + , ("nose", "๐") + , ("tongue", "๐
") + , ("lips", "๐") + , ("+1", "๐") + , ("thumbsup", "๐") + , ("-1", "๐") + , ("thumbsdown", "๐") + , ("ok_hand", "๐") + , ("facepunch", "๐") + , ("punch", "๐") + , ("fist", "โ") + , ("v", "โ๏ธ") + , ("wave", "๐") + , ("hand", "โ") + , ("raised_hand", "โ") + , ("open_hands", "๐") + , ("point_up_2", "๐") + , ("point_down", "๐") + , ("point_right", "๐") + , ("point_left", "๐") + , ("raised_hands", "๐") + , ("pray", "๐") + , ("point_up", "โ๏ธ") + , ("clap", "๐") + , ("muscle", "๐ช") + , ("walking", "๐ถ") + , ("runner", "๐") + , ("running", "๐") + , ("dancer", "๐") + , ("couple", "๐ซ") + , ("family", "๐ช") + , ("two_men_holding_hands", "๐ฌ") + , ("two_women_holding_hands", "๐ญ") + , ("couplekiss", "๐") + , ("couple_with_heart", "๐") + , ("dancers", "๐ฏ") + , ("ok_woman", "๐") + , ("no_good", "๐
") + , ("information_desk_person", "๐") + , ("raising_hand", "๐") + , ("massage", "๐") + , ("haircut", "๐") + , ("nail_care", "๐
") + , ("bride_with_veil", "๐ฐ") + , ("person_with_pouting_face", "๐") + , ("person_frowning", "๐") + , ("bow", "๐") + , ("tophat", "๐ฉ") + , ("crown", "๐") + , ("womans_hat", "๐") + , ("athletic_shoe", "๐") + , ("mans_shoe", "๐") + , ("shoe", "๐") + , ("sandal", "๐ก") + , ("high_heel", "๐ ") + , ("boot", "๐ข") + , ("shirt", "๐") + , ("tshirt", "๐") + , ("necktie", "๐") + , ("womans_clothes", "๐") + , ("dress", "๐") + , ("running_shirt_with_sash", "๐ฝ") + , ("jeans", "๐") + , ("kimono", "๐") + , ("bikini", "๐") + , ("briefcase", "๐ผ") + , ("handbag", "๐") + , ("pouch", "๐") + , ("purse", "๐") + , ("eyeglasses", "๐") + , ("ribbon", "๐") + , ("closed_umbrella", "๐") + , ("lipstick", "๐") + , ("yellow_heart", "๐") + , ("blue_heart", "๐") + , ("purple_heart", "๐") + , ("green_heart", "๐") + , ("heart", "โค๏ธ") + , ("broken_heart", "๐") + , ("heartpulse", "๐") + , ("heartbeat", "๐") + , ("two_hearts", "๐") + , ("sparkling_heart", "๐") + , ("revolving_hearts", "๐") + , ("cupid", "๐") + , ("love_letter", "๐") + , ("kiss", "๐") + , ("ring", "๐") + , ("gem", "๐") + , ("bust_in_silhouette", "๐ค") + , ("busts_in_silhouette", "๐ฅ") + , ("speech_balloon", "๐ฌ") + , ("footprints", "๐ฃ") + , ("thought_balloon", "๐ญ") + , ("dog", "๐ถ") + , ("wolf", "๐บ") + , ("cat", "๐ฑ") + , ("mouse", "๐ญ") + , ("hamster", "๐น") + , ("rabbit", "๐ฐ") + , ("frog", "๐ธ") + , ("tiger", "๐ฏ") + , ("koala", "๐จ") + , ("bear", "๐ป") + , ("pig", "๐ท") + , ("pig_nose", "๐ฝ") + , ("cow", "๐ฎ") + , ("boar", "๐") + , ("monkey_face", "๐ต") + , ("monkey", "๐") + , ("horse", "๐ด") + , ("sheep", "๐") + , ("elephant", "๐") + , ("panda_face", "๐ผ") + , ("penguin", "๐ง") + , ("bird", "๐ฆ") + , ("baby_chick", "๐ค") + , ("hatched_chick", "๐ฅ") + , ("hatching_chick", "๐ฃ") + , ("chicken", "๐") + , ("snake", "๐") + , ("turtle", "๐ข") + , ("bug", "๐") + , ("bee", "๐") + , ("honeybee", "๐") + , ("ant", "๐") + , ("beetle", "๐") + , ("snail", "๐") + , ("octopus", "๐") + , ("shell", "๐") + , ("tropical_fish", "๐ ") + , ("fish", "๐") + , ("dolphin", "๐ฌ") + , ("flipper", "๐ฌ") + , ("whale", "๐ณ") + , ("whale2", "๐") + , ("cow2", "๐") + , ("ram", "๐") + , ("rat", "๐") + , ("water_buffalo", "๐") + , ("tiger2", "๐
") + , ("rabbit2", "๐") + , ("dragon", "๐") + , ("racehorse", "๐") + , ("goat", "๐") + , ("rooster", "๐") + , ("dog2", "๐") + , ("pig2", "๐") + , ("mouse2", "๐") + , ("ox", "๐") + , ("dragon_face", "๐ฒ") + , ("blowfish", "๐ก") + , ("crocodile", "๐") + , ("camel", "๐ซ") + , ("dromedary_camel", "๐ช") + , ("leopard", "๐") + , ("cat2", "๐") + , ("poodle", "๐ฉ") + , ("feet", "๐พ") + , ("paw_prints", "๐พ") + , ("bouquet", "๐") + , ("cherry_blossom", "๐ธ") + , ("tulip", "๐ท") + , ("four_leaf_clover", "๐") + , ("rose", "๐น") + , ("sunflower", "๐ป") + , ("hibiscus", "๐บ") + , ("maple_leaf", "๐") + , ("leaves", "๐") + , ("fallen_leaf", "๐") + , ("herb", "๐ฟ") + , ("ear_of_rice", "๐พ") + , ("mushroom", "๐") + , ("cactus", "๐ต") + , ("palm_tree", "๐ด") + , ("evergreen_tree", "๐ฒ") + , ("deciduous_tree", "๐ณ") + , ("chestnut", "๐ฐ") + , ("seedling", "๐ฑ") + , ("blossom", "๐ผ") + , ("globe_with_meridians", "๐") + , ("sun_with_face", "๐") + , ("full_moon_with_face", "๐") + , ("new_moon_with_face", "๐") + , ("new_moon", "๐") + , ("waxing_crescent_moon", "๐") + , ("first_quarter_moon", "๐") + , ("moon", "๐") + , ("waxing_gibbous_moon", "๐") + , ("full_moon", "๐") + , ("waning_gibbous_moon", "๐") + , ("last_quarter_moon", "๐") + , ("waning_crescent_moon", "๐") + , ("last_quarter_moon_with_face", "๐") + , ("first_quarter_moon_with_face", "๐") + , ("crescent_moon", "๐") + , ("earth_africa", "๐") + , ("earth_americas", "๐") + , ("earth_asia", "๐") + , ("volcano", "๐") + , ("milky_way", "๐") + , ("stars", "๐ ") + , ("star", "โญ") + , ("sunny", "โ๏ธ") + , ("partly_sunny", "โ
") + , ("cloud", "โ๏ธ") + , ("zap", "โก") + , ("umbrella", "โ") + , ("snowflake", "โ๏ธ") + , ("snowman", "โ") + , ("cyclone", "๐") + , ("foggy", "๐") + , ("rainbow", "๐") + , ("ocean", "๐") + , ("bamboo", "๐") + , ("gift_heart", "๐") + , ("dolls", "๐") + , ("school_satchel", "๐") + , ("mortar_board", "๐") + , ("flags", "๐") + , ("fireworks", "๐") + , ("sparkler", "๐") + , ("wind_chime", "๐") + , ("rice_scene", "๐") + , ("jack_o_lantern", "๐") + , ("ghost", "๐ป") + , ("santa", "๐
") + , ("christmas_tree", "๐") + , ("gift", "๐") + , ("tanabata_tree", "๐") + , ("tada", "๐") + , ("confetti_ball", "๐") + , ("balloon", "๐") + , ("crossed_flags", "๐") + , ("crystal_ball", "๐ฎ") + , ("movie_camera", "๐ฅ") + , ("camera", "๐ท") + , ("video_camera", "๐น") + , ("vhs", "๐ผ") + , ("cd", "๐ฟ") + , ("dvd", "๐") + , ("minidisc", "๐ฝ") + , ("floppy_disk", "๐พ") + , ("computer", "๐ป") + , ("iphone", "๐ฑ") + , ("phone", "โ๏ธ") + , ("telephone", "โ๏ธ") + , ("telephone_receiver", "๐") + , ("pager", "๐") + , ("fax", "๐ ") + , ("satellite", "๐ก") + , ("tv", "๐บ") + , ("radio", "๐ป") + , ("loud_sound", "๐") + , ("sound", "๐") + , ("speaker", "๐") + , ("mute", "๐") + , ("bell", "๐") + , ("no_bell", "๐") + , ("loudspeaker", "๐ข") + , ("mega", "๐ฃ") + , ("hourglass_flowing_sand", "โณ") + , ("hourglass", "โ") + , ("alarm_clock", "โฐ") + , ("watch", "โ") + , ("unlock", "๐") + , ("lock", "๐") + , ("lock_with_ink_pen", "๐") + , ("closed_lock_with_key", "๐") + , ("key", "๐") + , ("mag_right", "๐") + , ("bulb", "๐ก") + , ("flashlight", "๐ฆ") + , ("high_brightness", "๐") + , ("low_brightness", "๐
") + , ("electric_plug", "๐") + , ("battery", "๐") + , ("mag", "๐") + , ("bathtub", "๐") + , ("bath", "๐") + , ("shower", "๐ฟ") + , ("toilet", "๐ฝ") + , ("wrench", "๐ง") + , ("nut_and_bolt", "๐ฉ") + , ("hammer", "๐จ") + , ("door", "๐ช") + , ("smoking", "๐ฌ") + , ("bomb", "๐ฃ") + , ("gun", "๐ซ") + , ("hocho", "๐ช") + , ("knife", "๐ช") + , ("pill", "๐") + , ("syringe", "๐") + , ("moneybag", "๐ฐ") + , ("yen", "๐ด") + , ("dollar", "๐ต") + , ("pound", "๐ท") + , ("euro", "๐ถ") + , ("credit_card", "๐ณ") + , ("money_with_wings", "๐ธ") + , ("calling", "๐ฒ") + , ("e-mail", "๐ง") + , ("inbox_tray", "๐ฅ") + , ("outbox_tray", "๐ค") + , ("email", "โ๏ธ") + , ("envelope", "โ๏ธ") + , ("envelope_with_arrow", "๐ฉ") + , ("incoming_envelope", "๐จ") + , ("postal_horn", "๐ฏ") + , ("mailbox", "๐ซ") + , ("mailbox_closed", "๐ช") + , ("mailbox_with_mail", "๐ฌ") + , ("mailbox_with_no_mail", "๐ญ") + , ("postbox", "๐ฎ") + , ("package", "๐ฆ") + , ("memo", "๐") + , ("pencil", "๐") + , ("page_facing_up", "๐") + , ("page_with_curl", "๐") + , ("bookmark_tabs", "๐") + , ("bar_chart", "๐") + , ("chart_with_upwards_trend", "๐") + , ("chart_with_downwards_trend", "๐") + , ("scroll", "๐") + , ("clipboard", "๐") + , ("date", "๐
") + , ("calendar", "๐") + , ("card_index", "๐") + , ("file_folder", "๐") + , ("open_file_folder", "๐") + , ("scissors", "โ๏ธ") + , ("pushpin", "๐") + , ("paperclip", "๐") + , ("black_nib", "โ๏ธ") + , ("pencil2", "โ๏ธ") + , ("straight_ruler", "๐") + , ("triangular_ruler", "๐") + , ("closed_book", "๐") + , ("green_book", "๐") + , ("blue_book", "๐") + , ("orange_book", "๐") + , ("notebook", "๐") + , ("notebook_with_decorative_cover", "๐") + , ("ledger", "๐") + , ("books", "๐") + , ("book", "๐") + , ("open_book", "๐") + , ("bookmark", "๐") + , ("name_badge", "๐") + , ("microscope", "๐ฌ") + , ("telescope", "๐ญ") + , ("newspaper", "๐ฐ") + , ("art", "๐จ") + , ("clapper", "๐ฌ") + , ("microphone", "๐ค") + , ("headphones", "๐ง") + , ("musical_score", "๐ผ") + , ("musical_note", "๐ต") + , ("notes", "๐ถ") + , ("musical_keyboard", "๐น") + , ("violin", "๐ป") + , ("trumpet", "๐บ") + , ("saxophone", "๐ท") + , ("guitar", "๐ธ") + , ("space_invader", "๐พ") + , ("video_game", "๐ฎ") + , ("black_joker", "๐") + , ("flower_playing_cards", "๐ด") + , ("mahjong", "๐") + , ("game_die", "๐ฒ") + , ("dart", "๐ฏ") + , ("football", "๐") + , ("basketball", "๐") + , ("soccer", "โฝ") + , ("baseball", "โพ๏ธ") + , ("tennis", "๐พ") + , ("8ball", "๐ฑ") + , ("rugby_football", "๐") + , ("bowling", "๐ณ") + , ("golf", "โณ") + , ("mountain_bicyclist", "๐ต") + , ("bicyclist", "๐ด") + , ("checkered_flag", "๐") + , ("horse_racing", "๐") + , ("trophy", "๐") + , ("ski", "๐ฟ") + , ("snowboarder", "๐") + , ("swimmer", "๐") + , ("surfer", "๐") + , ("fishing_pole_and_fish", "๐ฃ") + , ("coffee", "โ") + , ("tea", "๐ต") + , ("sake", "๐ถ") + , ("baby_bottle", "๐ผ") + , ("beer", "๐บ") + , ("beers", "๐ป") + , ("cocktail", "๐ธ") + , ("tropical_drink", "๐น") + , ("wine_glass", "๐ท") + , ("fork_and_knife", "๐ด") + , ("pizza", "๐") + , ("hamburger", "๐") + , ("fries", "๐") + , ("poultry_leg", "๐") + , ("meat_on_bone", "๐") + , ("spaghetti", "๐") + , ("curry", "๐") + , ("fried_shrimp", "๐ค") + , ("bento", "๐ฑ") + , ("sushi", "๐ฃ") + , ("fish_cake", "๐ฅ") + , ("rice_ball", "๐") + , ("rice_cracker", "๐") + , ("rice", "๐") + , ("ramen", "๐") + , ("stew", "๐ฒ") + , ("oden", "๐ข") + , ("dango", "๐ก") + , ("egg", "๐ณ") + , ("bread", "๐") + , ("doughnut", "๐ฉ") + , ("custard", "๐ฎ") + , ("icecream", "๐ฆ") + , ("ice_cream", "๐จ") + , ("shaved_ice", "๐ง") + , ("birthday", "๐") + , ("cake", "๐ฐ") + , ("cookie", "๐ช") + , ("chocolate_bar", "๐ซ") + , ("candy", "๐ฌ") + , ("lollipop", "๐ญ") + , ("honey_pot", "๐ฏ") + , ("apple", "๐") + , ("green_apple", "๐") + , ("tangerine", "๐") + , ("lemon", "๐") + , ("cherries", "๐") + , ("grapes", "๐") + , ("watermelon", "๐") + , ("strawberry", "๐") + , ("peach", "๐") + , ("melon", "๐") + , ("banana", "๐") + , ("pear", "๐") + , ("pineapple", "๐") + , ("sweet_potato", "๐ ") + , ("eggplant", "๐") + , ("tomato", "๐
") + , ("corn", "๐ฝ") + , ("house", "๐ ") + , ("house_with_garden", "๐ก") + , ("school", "๐ซ") + , ("office", "๐ข") + , ("post_office", "๐ฃ") + , ("hospital", "๐ฅ") + , ("bank", "๐ฆ") + , ("convenience_store", "๐ช") + , ("love_hotel", "๐ฉ") + , ("hotel", "๐จ") + , ("wedding", "๐") + , ("church", "โช") + , ("department_store", "๐ฌ") + , ("european_post_office", "๐ค") + , ("city_sunrise", "๐") + , ("city_sunset", "๐") + , ("japanese_castle", "๐ฏ") + , ("european_castle", "๐ฐ") + , ("tent", "โบ") + , ("factory", "๐ญ") + , ("tokyo_tower", "๐ผ") + , ("japan", "๐พ") + , ("mount_fuji", "๐ป") + , ("sunrise_over_mountains", "๐") + , ("sunrise", "๐
") + , ("night_with_stars", "๐") + , ("statue_of_liberty", "๐ฝ") + , ("bridge_at_night", "๐") + , ("carousel_horse", "๐ ") + , ("ferris_wheel", "๐ก") + , ("fountain", "โฒ") + , ("roller_coaster", "๐ข") + , ("ship", "๐ข") + , ("boat", "โต") + , ("sailboat", "โต") + , ("speedboat", "๐ค") + , ("rowboat", "๐ฃ") + , ("anchor", "โ") + , ("rocket", "๐") + , ("airplane", "โ๏ธ") + , ("seat", "๐บ") + , ("helicopter", "๐") + , ("steam_locomotive", "๐") + , ("tram", "๐") + , ("station", "๐") + , ("mountain_railway", "๐") + , ("train2", "๐") + , ("bullettrain_side", "๐") + , ("bullettrain_front", "๐
") + , ("light_rail", "๐") + , ("metro", "๐") + , ("monorail", "๐") + , ("train", "๐") + , ("railway_car", "๐") + , ("trolleybus", "๐") + , ("bus", "๐") + , ("oncoming_bus", "๐") + , ("blue_car", "๐") + , ("oncoming_automobile", "๐") + , ("car", "๐") + , ("red_car", "๐") + , ("taxi", "๐") + , ("oncoming_taxi", "๐") + , ("articulated_lorry", "๐") + , ("truck", "๐") + , ("rotating_light", "๐จ") + , ("police_car", "๐") + , ("oncoming_police_car", "๐") + , ("fire_engine", "๐") + , ("ambulance", "๐") + , ("minibus", "๐") + , ("bike", "๐ฒ") + , ("aerial_tramway", "๐ก") + , ("suspension_railway", "๐") + , ("mountain_cableway", "๐ ") + , ("tractor", "๐") + , ("barber", "๐") + , ("busstop", "๐") + , ("ticket", "๐ซ") + , ("vertical_traffic_light", "๐ฆ") + , ("traffic_light", "๐ฅ") + , ("warning", "โ ๏ธ") + , ("construction", "๐ง") + , ("beginner", "๐ฐ") + , ("fuelpump", "โฝ") + , ("izakaya_lantern", "๐ฎ") + , ("lantern", "๐ฎ") + , ("slot_machine", "๐ฐ") + , ("hotsprings", "โจ๏ธ") + , ("moyai", "๐ฟ") + , ("circus_tent", "๐ช") + , ("performing_arts", "๐ญ") + , ("round_pushpin", "๐") + , ("triangular_flag_on_post", "๐ฉ") + , ("jp", "๐ฏ๐ต") + , ("kr", "๐ฐ๐ท") + , ("de", "๐ฉ๐ช") + , ("cn", "๐จ๐ณ") + , ("us", "๐บ๐ธ") + , ("fr", "๐ซ๐ท") + , ("es", "๐ช๐ธ") + , ("it", "๐ฎ๐น") + , ("ru", "๐ท๐บ") + , ("gb", "๐ฌ๐ง") + , ("uk", "๐ฌ๐ง") + , ("one", "1๏ธโฃ") + , ("two", "2๏ธโฃ") + , ("three", "3๏ธโฃ") + , ("four", "4๏ธโฃ") + , ("five", "5๏ธโฃ") + , ("six", "6๏ธโฃ") + , ("seven", "7๏ธโฃ") + , ("eight", "8๏ธโฃ") + , ("nine", "9๏ธโฃ") + , ("zero", "0๏ธโฃ") + , ("keycap_ten", "๐") + , ("hash", "#๏ธโฃ") + , ("symbols", "๐ฃ") + , ("arrow_up", "โฌ๏ธ") + , ("arrow_down", "โฌ๏ธ") + , ("arrow_left", "โฌ
๏ธ") + , ("arrow_right", "โก๏ธ") + , ("capital_abcd", "๐ ") + , ("abcd", "๐ก") + , ("abc", "๐ค") + , ("arrow_upper_right", "โ๏ธ") + , ("arrow_upper_left", "โ๏ธ") + , ("arrow_lower_right", "โ๏ธ") + , ("arrow_lower_left", "โ๏ธ") + , ("left_right_arrow", "โ๏ธ") + , ("arrow_up_down", "โ๏ธ") + , ("arrows_counterclockwise", "๐") + , ("arrow_backward", "โ๏ธ") + , ("arrow_forward", "โถ๏ธ") + , ("arrow_up_small", "๐ผ") + , ("arrow_down_small", "๐ฝ") + , ("leftwards_arrow_with_hook", "โฉ๏ธ") + , ("arrow_right_hook", "โช๏ธ") + , ("information_source", "โน๏ธ") + , ("rewind", "โช") + , ("fast_forward", "โฉ") + , ("arrow_double_up", "โซ") + , ("arrow_double_down", "โฌ") + , ("arrow_heading_down", "โคต๏ธ") + , ("arrow_heading_up", "โคด๏ธ") + , ("ok", "๐") + , ("twisted_rightwards_arrows", "๐") + , ("repeat", "๐") + , ("repeat_one", "๐") + , ("new", "๐") + , ("up", "๐") + , ("cool", "๐") + , ("free", "๐") + , ("ng", "๐") + , ("signal_strength", "๐ถ") + , ("cinema", "๐ฆ") + , ("koko", "๐") + , ("u6307", "๐ฏ") + , ("u7a7a", "๐ณ") + , ("u6e80", "๐ต") + , ("u5408", "๐ด") + , ("u7981", "๐ฒ") + , ("ideograph_advantage", "๐") + , ("u5272", "๐น") + , ("u55b6", "๐บ") + , ("u6709", "๐ถ") + , ("u7121", "๐") + , ("restroom", "๐ป") + , ("mens", "๐น") + , ("womens", "๐บ") + , ("baby_symbol", "๐ผ") + , ("wc", "๐พ") + , ("potable_water", "๐ฐ") + , ("put_litter_in_its_place", "๐ฎ") + , ("parking", "๐
ฟ๏ธ") + , ("wheelchair", "โฟ") + , ("no_smoking", "๐ญ") + , ("u6708", "๐ท๏ธ") + , ("u7533", "๐ธ") + , ("sa", "๐๏ธ") + , ("m", "โ๏ธ") + , ("passport_control", "๐") + , ("baggage_claim", "๐") + , ("left_luggage", "๐
") + , ("customs", "๐") + , ("accept", "๐") + , ("secret", "ใ๏ธ") + , ("congratulations", "ใ๏ธ") + , ("cl", "๐") + , ("sos", "๐") + , ("id", "๐") + , ("no_entry_sign", "๐ซ") + , ("underage", "๐") + , ("no_mobile_phones", "๐ต") + , ("do_not_litter", "๐ฏ") + , ("non-potable_water", "๐ฑ") + , ("no_bicycles", "๐ณ") + , ("no_pedestrians", "๐ท") + , ("children_crossing", "๐ธ") + , ("no_entry", "โ") + , ("eight_spoked_asterisk", "โณ๏ธ") + , ("sparkle", "โ๏ธ") + , ("negative_squared_cross_mark", "โ") + , ("white_check_mark", "โ
") + , ("eight_pointed_black_star", "โด๏ธ") + , ("heart_decoration", "๐") + , ("vs", "๐") + , ("vibration_mode", "๐ณ") + , ("mobile_phone_off", "๐ด") + , ("a", "๐
ฐ๏ธ") + , ("b", "๐
ฑ๏ธ") + , ("ab", "๐") + , ("o2", "๐
พ๏ธ") + , ("diamond_shape_with_a_dot_inside", "๐ ") + , ("loop", "โฟ") + , ("recycle", "โป๏ธ") + , ("aries", "โ") + , ("taurus", "โ") + , ("gemini", "โ") + , ("cancer", "โ") + , ("leo", "โ") + , ("virgo", "โ") + , ("libra", "โ") + , ("scorpius", "โ") + , ("sagittarius", "โ") + , ("capricorn", "โ") + , ("aquarius", "โ") + , ("pisces", "โ") + , ("ophiuchus", "โ") + , ("six_pointed_star", "๐ฏ") + , ("atm", "๐ง") + , ("chart", "๐น") + , ("heavy_dollar_sign", "๐ฒ") + , ("currency_exchange", "๐ฑ") + , ("copyright", "ยฉ๏ธ") + , ("registered", "ยฎ๏ธ") + , ("tm", "โข๏ธ") + , ("x", "โ") + , ("bangbang", "โผ๏ธ") + , ("interrobang", "โ๏ธ") + , ("exclamation", "โ") + , ("heavy_exclamation_mark", "โ") + , ("question", "โ") + , ("grey_exclamation", "โ") + , ("grey_question", "โ") + , ("o", "โญ") + , ("top", "๐") + , ("end", "๐") + , ("back", "๐") + , ("on", "๐") + , ("soon", "๐") + , ("arrows_clockwise", "๐") + , ("clock12", "๐") + , ("clock1230", "๐ง") + , ("clock1", "๐") + , ("clock130", "๐") + , ("clock2", "๐") + , ("clock230", "๐") + , ("clock3", "๐") + , ("clock330", "๐") + , ("clock4", "๐") + , ("clock430", "๐") + , ("clock5", "๐") + , ("clock530", "๐ ") + , ("clock6", "๐") + , ("clock7", "๐") + , ("clock8", "๐") + , ("clock9", "๐") + , ("clock10", "๐") + , ("clock11", "๐") + , ("clock630", "๐ก") + , ("clock730", "๐ข") + , ("clock830", "๐ฃ") + , ("clock930", "๐ค") + , ("clock1030", "๐ฅ") + , ("clock1130", "๐ฆ") + , ("heavy_multiplication_x", "โ๏ธ") + , ("heavy_plus_sign", "โ") + , ("heavy_minus_sign", "โ") + , ("heavy_division_sign", "โ") + , ("spades", "โ ๏ธ") + , ("hearts", "โฅ๏ธ") + , ("clubs", "โฃ๏ธ") + , ("diamonds", "โฆ๏ธ") + , ("white_flower", "๐ฎ") + , ("heavy_check_mark", "โ๏ธ") + , ("ballot_box_with_check", "โ๏ธ") + , ("radio_button", "๐") + , ("link", "๐") + , ("curly_loop", "โฐ") + , ("wavy_dash", "ใฐ๏ธ") + , ("part_alternation_mark", "ใฝ๏ธ") + , ("trident", "๐ฑ") + , ("black_medium_square", "โผ๏ธ") + , ("white_medium_square", "โป๏ธ") + , ("black_medium_small_square", "โพ") + , ("white_medium_small_square", "โฝ") + , ("black_small_square", "โช๏ธ") + , ("white_small_square", "โซ๏ธ") + , ("small_red_triangle", "๐บ") + , ("black_square_button", "๐ฒ") + , ("white_square_button", "๐ณ") + , ("black_circle", "โซ") + , ("white_circle", "โช") + , ("red_circle", "๐ด") + , ("large_blue_circle", "๐ต") + , ("small_red_triangle_down", "๐ป") + , ("white_large_square", "โฌ") + , ("black_large_square", "โฌ") + , ("large_orange_diamond", "๐ถ") + , ("large_blue_diamond", "๐ท") + , ("small_orange_diamond", "๐ธ") + , ("small_blue_diamond", "๐น") + ] diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 20c842e0d..b7d268a65 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -105,6 +105,7 @@ data Extension = | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions + | Ext_emoji -- ^ Support emoji like :smile: | Ext_auto_identifiers -- ^ Automatic identifiers for headers | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} @@ -203,6 +204,7 @@ githubMarkdownExtensions = Set.fromList , Ext_intraword_underscores , Ext_strikeout , Ext_hard_line_breaks + , Ext_emoji , Ext_lists_without_preceding_blankline , Ext_shortcut_reference_links ] diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index c64860ad9..02d114e0f 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1211,7 +1211,8 @@ citeKey = try $ do firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar - rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") + rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|> + (oneOf ":/" <* lookAhead (char '/')) let key = firstChar:rest return (suppress_author, key) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d73b92fbd..7e811a966 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -39,6 +39,7 @@ import Data.Ord ( comparing ) import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition +import Text.Pandoc.Emoji (emojis) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -1467,6 +1468,7 @@ inline = choice [ whitespace , exampleRef , smart , return . B.singleton <$> charRef + , emoji , symbol , ltSign ] <?> "inline" @@ -1898,6 +1900,21 @@ rawHtmlInline = do else not . isTextTag return $ return $ B.rawInline "html" result +-- Emoji + +emojiChars :: [Char] +emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] + +emoji :: MarkdownParser (F Inlines) +emoji = try $ do + guardEnabled Ext_emoji + char ':' + emojikey <- many1 (oneOf emojiChars) + char ':' + case M.lookup emojikey emojis of + Just s -> return (return (B.str s)) + Nothing -> mzero + -- Citations cite :: MarkdownParser (F Inlines) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 44474a37a..3be47cfd4 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014 Albert Krewinkel + Copyright : Copyright (C) 2014-2015 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -1588,8 +1588,11 @@ smart :: OrgParser (F Inlines) smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, dash, ellipses]) - where orgApostrophe = + choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) + where + orgDash = dash <* updatePositions '-' + orgEllipses = ellipses <* updatePositions '.' + orgApostrophe = (char '\'' <|> char '\8217') <* updateLastPreCharPos <* updateLastForbiddenCharPos *> return (B.str "\x2019") @@ -1597,9 +1600,10 @@ smart = do singleQuoted :: OrgParser (F Inlines) singleQuoted = try $ do singleQuoteStart + updatePositions '\'' withQuoteContext InSingleQuote $ fmap B.singleQuoted . trimInlinesF . mconcat <$> - many1Till inline singleQuoteEnd + many1Till inline (singleQuoteEnd <* updatePositions '\'') -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote @@ -1607,6 +1611,7 @@ singleQuoted = try $ do doubleQuoted :: OrgParser (F Inlines) doubleQuoted = try $ do doubleQuoteStart + updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return (fmap B.doubleQuoted . trimInlinesF $ contents)) diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index f6afacb34..366ab7413 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -185,6 +185,10 @@ tests = [ testGroup "inline code" "<\n\na>" =?> para (text "<") <> para (text "a>") ] + , testGroup "emoji" + [ test markdownGH "emoji symbols" $ + ":smile: and :+1:" =?> para (text "๐ and ๐") + ] , "unbalanced brackets" =: "[[[[[[[[[[[[[[[hi" =?> para (text "[[[[[[[[[[[[[[[hi") , testGroup "backslash escapes" diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index d956e89e3..9e7399aa0 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -1250,6 +1250,7 @@ tests = ] in codeBlockWith ( "", classes, params) "code body\n" ] + , testGroup "Smart punctuation" [ test orgSmart "quote before ellipses" ("'...hi'" @@ -1270,5 +1271,13 @@ tests = , test orgSmart "Dashes are allowed at the borders of emphasis'" ("/foo---/" =?> para (emph "fooโ")) + + , test orgSmart "Single quotes can be followed by emphasized text" + ("Singles on the '/meat market/'" =?> + para ("Singles on the " <> (singleQuoted $ emph "meat market"))) + + , test orgSmart "Double quotes can be followed by emphasized text" + ("Double income, no kids: \"/DINK/\"" =?> + para ("Double income, no kids: " <> (doubleQuoted $ emph "DINK"))) ] ] |