diff options
Diffstat (limited to 'src/Text')
62 files changed, 2017 insertions, 487 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index f9d19d9dd..ee4284979 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -117,12 +117,11 @@ module Text.Pandoc , writeCustom -- * Rendering templates and default templates , module Text.Pandoc.Templates - -- * Version - , pandocVersion -- * Miscellaneous , getReader , getWriter , ToJsonFilter(..) + , pandocVersion ) where import Text.Pandoc.Definition @@ -172,23 +171,17 @@ import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options -import Text.Pandoc.Shared (safeRead, warn, mapLeft) +import Text.Pandoc.Shared (safeRead, warn, mapLeft, pandocVersion) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Error import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) -import Data.Version (showVersion) import Data.Set (Set) import qualified Data.Set as Set import Text.Parsec import Text.Parsec.Error import qualified Text.Pandoc.UTF8 as UTF8 -import Paths_pandoc (version) - --- | Version number of pandoc library. -pandocVersion :: String -pandocVersion = showVersion version parseFormatSpec :: String -> Either ParseError (String, Set Extension -> Set Extension) @@ -320,14 +313,14 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = plainExtensions -getDefaultExtensions "org" = Set.fromList [Ext_citations] +getDefaultExtensions "org" = Set.fromList [Ext_citations, + Ext_auto_identifiers] getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers] getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers, Ext_native_divs, Ext_native_spans] getDefaultExtensions "html5" = getDefaultExtensions "html" -getDefaultExtensions "epub" = Set.fromList [Ext_auto_identifiers, - Ext_raw_html, +getDefaultExtensions "epub" = Set.fromList [Ext_raw_html, Ext_native_divs, Ext_native_spans, Ext_epub_html_exts] diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs new file mode 100644 index 000000000..9d0c84243 --- /dev/null +++ b/src/Text/Pandoc/CSS.hs @@ -0,0 +1,34 @@ +module Text.Pandoc.CSS ( foldOrElse, + pickStyleAttrProps + ) +where + +import Text.Pandoc.Shared (trim) +import Text.Parsec +import Text.Parsec.String + +ruleParser :: Parser (String, String) +ruleParser = do + p <- many1 (noneOf ":") <* char ':' + v <- many1 (noneOf ":;") <* char ';' <* spaces + return (trim p, trim v) + +styleAttrParser :: Parser [(String, String)] +styleAttrParser = do + p <- many1 ruleParser + return p + +orElse :: Eq a => a -> a -> a -> a +orElse v x y = if v == x then y else x + +foldOrElse :: Eq a => a -> [a] -> a +foldOrElse v xs = foldr (orElse v) v xs + +eitherToMaybe :: Either a b -> Maybe b +eitherToMaybe (Right x) = Just x +eitherToMaybe _ = Nothing + +pickStyleAttrProps :: [String] -> String -> Maybe String +pickStyleAttrProps lookupProps styleAttr = do + styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr + foldOrElse Nothing $ map (flip lookup styles) lookupProps diff --git a/src/Text/Pandoc/Compat/Locale.hs b/src/Text/Pandoc/Compat/Locale.hs deleted file mode 100644 index ac791136c..000000000 --- a/src/Text/Pandoc/Compat/Locale.hs +++ /dev/null @@ -1,9 +0,0 @@ -{-# LANGUAGE CPP #-} -module Text.Pandoc.Compat.Locale ( defaultTimeLocale ) -where - -#if MIN_VERSION_time(1,5,0) -import Data.Time.Format ( defaultTimeLocale ) -#else -import System.Locale ( defaultTimeLocale ) -#endif diff --git a/src/Text/Pandoc/Compat/Monoid.hs b/src/Text/Pandoc/Compat/Monoid.hs index cb7ea2527..4daceb8e1 100644 --- a/src/Text/Pandoc/Compat/Monoid.hs +++ b/src/Text/Pandoc/Compat/Monoid.hs @@ -1,19 +1,16 @@ {-# LANGUAGE CPP #-} -module Text.Pandoc.Compat.Monoid ( Monoid(..) - , (<>) - ) where +module Text.Pandoc.Compat.Monoid ( (<>) ) + where #if MIN_VERSION_base(4,5,0) -import Data.Monoid ((<>), Monoid(..)) -#else -import Data.Monoid (mappend, Monoid(..)) -#endif +import Data.Monoid ((<>)) -#if MIN_VERSION_base(4,5,0) #else +import Data.Monoid + infixr 6 <> --- | An infix synonym for 'mappend'. +--- | An infix synonym for 'mappend'. (<>) :: Monoid m => m -> m -> m (<>) = mappend {-# INLINE (<>) #-} diff --git a/src/Text/Pandoc/Compat/Time.hs b/src/Text/Pandoc/Compat/Time.hs new file mode 100644 index 000000000..aa08ca224 --- /dev/null +++ b/src/Text/Pandoc/Compat/Time.hs @@ -0,0 +1,18 @@ +{-# LANGUAGE CPP #-} +#if MIN_VERSION_time(1,5,0) +module Text.Pandoc.Compat.Time ( + module Data.Time +) +where +import Data.Time + +#else +module Text.Pandoc.Compat.Time ( + module Data.Time, + defaultTimeLocale +) +where +import Data.Time +import System.Locale ( defaultTimeLocale ) + +#endif diff --git a/src/Text/Pandoc/Data.hsb b/src/Text/Pandoc/Data.hsb index 3a0bf8ac4..d408bf510 100644 --- a/src/Text/Pandoc/Data.hsb +++ b/src/Text/Pandoc/Data.hsb @@ -2,6 +2,14 @@ -- to be processed using hsb2hs module Text.Pandoc.Data (dataFiles) where import qualified Data.ByteString as B +import System.FilePath (splitDirectories) +import qualified System.FilePath.Posix as Posix +-- We ensure that the data files are stored using Posix +-- path separators (/), even on Windows. dataFiles :: [(FilePath, B.ByteString)] -dataFiles = ("README", %blob "README") : %blobs "data"
\ No newline at end of file +dataFiles = map (\(fp, contents) -> + (Posix.joinPath (splitDirectories fp), contents)) dataFiles' + +dataFiles' :: [(FilePath, B.ByteString)] +dataFiles' = ("README", %blob "README") : %blobs "data" diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs new file mode 100644 index 000000000..c98b06fa1 --- /dev/null +++ b/src/Text/Pandoc/Emoji.hs @@ -0,0 +1,905 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Emoji + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Emoji symbol lookup from canonical string identifier. +-} +module Text.Pandoc.Emoji ( emojis ) where +import qualified Data.Map as M + +emojis :: M.Map String String +emojis = M.fromList + [ ("100", "\x1f4af") + , ("1234", "\x1f522") + , ("smile", "\x1f604") + , ("smiley", "\x1f603") + , ("grinning", "\x1f600") + , ("blush", "\x1f60a") + , ("relaxed", "\x263a\fe0f") + , ("wink", "\x1f609") + , ("heart_eyes", "\x1f60d") + , ("kissing_heart", "\x1f618") + , ("kissing_closed_eyes", "\x1f61a") + , ("kissing", "\x1f617") + , ("kissing_smiling_eyes", "\x1f619") + , ("stuck_out_tongue_winking_eye", "\x1f61c") + , ("stuck_out_tongue_closed_eyes", "\x1f61d") + , ("stuck_out_tongue", "\x1f61b") + , ("flushed", "\x1f633") + , ("grin", "\x1f601") + , ("pensive", "\x1f614") + , ("relieved", "\x1f60c") + , ("unamused", "\x1f612") + , ("disappointed", "\x1f61e") + , ("persevere", "\x1f623") + , ("cry", "\x1f622") + , ("joy", "\x1f602") + , ("sob", "\x1f62d") + , ("sleepy", "\x1f62a") + , ("disappointed_relieved", "\x1f625") + , ("cold_sweat", "\x1f630") + , ("sweat_smile", "\x1f605") + , ("sweat", "\x1f613") + , ("weary", "\x1f629") + , ("tired_face", "\x1f62b") + , ("fearful", "\x1f628") + , ("scream", "\x1f631") + , ("angry", "\x1f620") + , ("rage", "\x1f621") + , ("triumph", "\x1f624") + , ("confounded", "\x1f616") + , ("laughing", "\x1f606") + , ("satisfied", "\x1f606") + , ("yum", "\x1f60b") + , ("mask", "\x1f637") + , ("sunglasses", "\x1f60e") + , ("sleeping", "\x1f634") + , ("dizzy_face", "\x1f635") + , ("astonished", "\x1f632") + , ("worried", "\x1f61f") + , ("frowning", "\x1f626") + , ("anguished", "\x1f627") + , ("smiling_imp", "\x1f608") + , ("imp", "\x1f47f") + , ("open_mouth", "\x1f62e") + , ("grimacing", "\x1f62c") + , ("neutral_face", "\x1f610") + , ("confused", "\x1f615") + , ("hushed", "\x1f62f") + , ("no_mouth", "\x1f636") + , ("innocent", "\x1f607") + , ("smirk", "\x1f60f") + , ("expressionless", "\x1f611") + , ("man_with_gua_pi_mao", "\x1f472") + , ("man_with_turban", "\x1f473") + , ("cop", "\x1f46e") + , ("construction_worker", "\x1f477") + , ("guardsman", "\x1f482") + , ("baby", "\x1f476") + , ("boy", "\x1f466") + , ("girl", "\x1f467") + , ("man", "\x1f468") + , ("woman", "\x1f469") + , ("older_man", "\x1f474") + , ("older_woman", "\x1f475") + , ("person_with_blond_hair", "\x1f471") + , ("angel", "\x1f47c") + , ("princess", "\x1f478") + , ("smiley_cat", "\x1f63a") + , ("smile_cat", "\x1f638") + , ("heart_eyes_cat", "\x1f63b") + , ("kissing_cat", "\x1f63d") + , ("smirk_cat", "\x1f63c") + , ("scream_cat", "\x1f640") + , ("crying_cat_face", "\x1f63f") + , ("joy_cat", "\x1f639") + , ("pouting_cat", "\x1f63e") + , ("japanese_ogre", "\x1f479") + , ("japanese_goblin", "\x1f47a") + , ("see_no_evil", "\x1f648") + , ("hear_no_evil", "\x1f649") + , ("speak_no_evil", "\x1f64a") + , ("skull", "\x1f480") + , ("alien", "\x1f47d") + , ("hankey", "\x1f4a9") + , ("poop", "\x1f4a9") + , ("shit", "\x1f4a9") + , ("fire", "\x1f525") + , ("sparkles", "\x2728") + , ("star2", "\x1f31f") + , ("dizzy", "\x1f4ab") + , ("boom", "\x1f4a5") + , ("collision", "\x1f4a5") + , ("anger", "\x1f4a2") + , ("sweat_drops", "\x1f4a6") + , ("droplet", "\x1f4a7") + , ("zzz", "\x1f4a4") + , ("dash", "\x1f4a8") + , ("ear", "\x1f442") + , ("eyes", "\x1f440") + , ("nose", "\x1f443") + , ("tongue", "\x1f445") + , ("lips", "\x1f444") + , ("+1", "\x1f44d") + , ("thumbsup", "\x1f44d") + , ("-1", "\x1f44e") + , ("thumbsdown", "\x1f44e") + , ("ok_hand", "\x1f44c") + , ("facepunch", "\x1f44a") + , ("punch", "\x1f44a") + , ("fist", "\x270a") + , ("v", "\x270c\fe0f") + , ("wave", "\x1f44b") + , ("hand", "\x270b") + , ("raised_hand", "\x270b") + , ("open_hands", "\x1f450") + , ("point_up_2", "\x1f446") + , ("point_down", "\x1f447") + , ("point_right", "\x1f449") + , ("point_left", "\x1f448") + , ("raised_hands", "\x1f64c") + , ("pray", "\x1f64f") + , ("point_up", "\x261d\fe0f") + , ("clap", "\x1f44f") + , ("muscle", "\x1f4aa") + , ("walking", "\x1f6b6") + , ("runner", "\x1f3c3") + , ("running", "\x1f3c3") + , ("dancer", "\x1f483") + , ("couple", "\x1f46b") + , ("family", "\x1f46a") + , ("two_men_holding_hands", "\x1f46c") + , ("two_women_holding_hands", "\x1f46d") + , ("couplekiss", "\x1f48f") + , ("couple_with_heart", "\x1f491") + , ("dancers", "\x1f46f") + , ("ok_woman", "\x1f646") + , ("no_good", "\x1f645") + , ("information_desk_person", "\x1f481") + , ("raising_hand", "\x1f64b") + , ("massage", "\x1f486") + , ("haircut", "\x1f487") + , ("nail_care", "\x1f485") + , ("bride_with_veil", "\x1f470") + , ("person_with_pouting_face", "\x1f64e") + , ("person_frowning", "\x1f64d") + , ("bow", "\x1f647") + , ("tophat", "\x1f3a9") + , ("crown", "\x1f451") + , ("womans_hat", "\x1f452") + , ("athletic_shoe", "\x1f45f") + , ("mans_shoe", "\x1f45e") + , ("shoe", "\x1f45e") + , ("sandal", "\x1f461") + , ("high_heel", "\x1f460") + , ("boot", "\x1f462") + , ("shirt", "\x1f455") + , ("tshirt", "\x1f455") + , ("necktie", "\x1f454") + , ("womans_clothes", "\x1f45a") + , ("dress", "\x1f457") + , ("running_shirt_with_sash", "\x1f3bd") + , ("jeans", "\x1f456") + , ("kimono", "\x1f458") + , ("bikini", "\x1f459") + , ("briefcase", "\x1f4bc") + , ("handbag", "\x1f45c") + , ("pouch", "\x1f45d") + , ("purse", "\x1f45b") + , ("eyeglasses", "\x1f453") + , ("ribbon", "\x1f380") + , ("closed_umbrella", "\x1f302") + , ("lipstick", "\x1f484") + , ("yellow_heart", "\x1f49b") + , ("blue_heart", "\x1f499") + , ("purple_heart", "\x1f49c") + , ("green_heart", "\x1f49a") + , ("heart", "\x2764\fe0f") + , ("broken_heart", "\x1f494") + , ("heartpulse", "\x1f497") + , ("heartbeat", "\x1f493") + , ("two_hearts", "\x1f495") + , ("sparkling_heart", "\x1f496") + , ("revolving_hearts", "\x1f49e") + , ("cupid", "\x1f498") + , ("love_letter", "\x1f48c") + , ("kiss", "\x1f48b") + , ("ring", "\x1f48d") + , ("gem", "\x1f48e") + , ("bust_in_silhouette", "\x1f464") + , ("busts_in_silhouette", "\x1f465") + , ("speech_balloon", "\x1f4ac") + , ("footprints", "\x1f463") + , ("thought_balloon", "\x1f4ad") + , ("dog", "\x1f436") + , ("wolf", "\x1f43a") + , ("cat", "\x1f431") + , ("mouse", "\x1f42d") + , ("hamster", "\x1f439") + , ("rabbit", "\x1f430") + , ("frog", "\x1f438") + , ("tiger", "\x1f42f") + , ("koala", "\x1f428") + , ("bear", "\x1f43b") + , ("pig", "\x1f437") + , ("pig_nose", "\x1f43d") + , ("cow", "\x1f42e") + , ("boar", "\x1f417") + , ("monkey_face", "\x1f435") + , ("monkey", "\x1f412") + , ("horse", "\x1f434") + , ("sheep", "\x1f411") + , ("elephant", "\x1f418") + , ("panda_face", "\x1f43c") + , ("penguin", "\x1f427") + , ("bird", "\x1f426") + , ("baby_chick", "\x1f424") + , ("hatched_chick", "\x1f425") + , ("hatching_chick", "\x1f423") + , ("chicken", "\x1f414") + , ("snake", "\x1f40d") + , ("turtle", "\x1f422") + , ("bug", "\x1f41b") + , ("bee", "\x1f41d") + , ("honeybee", "\x1f41d") + , ("ant", "\x1f41c") + , ("beetle", "\x1f41e") + , ("snail", "\x1f40c") + , ("octopus", "\x1f419") + , ("shell", "\x1f41a") + , ("tropical_fish", "\x1f420") + , ("fish", "\x1f41f") + , ("dolphin", "\x1f42c") + , ("flipper", "\x1f42c") + , ("whale", "\x1f433") + , ("whale2", "\x1f40b") + , ("cow2", "\x1f404") + , ("ram", "\x1f40f") + , ("rat", "\x1f400") + , ("water_buffalo", "\x1f403") + , ("tiger2", "\x1f405") + , ("rabbit2", "\x1f407") + , ("dragon", "\x1f409") + , ("racehorse", "\x1f40e") + , ("goat", "\x1f410") + , ("rooster", "\x1f413") + , ("dog2", "\x1f415") + , ("pig2", "\x1f416") + , ("mouse2", "\x1f401") + , ("ox", "\x1f402") + , ("dragon_face", "\x1f432") + , ("blowfish", "\x1f421") + , ("crocodile", "\x1f40a") + , ("camel", "\x1f42b") + , ("dromedary_camel", "\x1f42a") + , ("leopard", "\x1f406") + , ("cat2", "\x1f408") + , ("poodle", "\x1f429") + , ("feet", "\x1f43e") + , ("paw_prints", "\x1f43e") + , ("bouquet", "\x1f490") + , ("cherry_blossom", "\x1f338") + , ("tulip", "\x1f337") + , ("four_leaf_clover", "\x1f340") + , ("rose", "\x1f339") + , ("sunflower", "\x1f33b") + , ("hibiscus", "\x1f33a") + , ("maple_leaf", "\x1f341") + , ("leaves", "\x1f343") + , ("fallen_leaf", "\x1f342") + , ("herb", "\x1f33f") + , ("ear_of_rice", "\x1f33e") + , ("mushroom", "\x1f344") + , ("cactus", "\x1f335") + , ("palm_tree", "\x1f334") + , ("evergreen_tree", "\x1f332") + , ("deciduous_tree", "\x1f333") + , ("chestnut", "\x1f330") + , ("seedling", "\x1f331") + , ("blossom", "\x1f33c") + , ("globe_with_meridians", "\x1f310") + , ("sun_with_face", "\x1f31e") + , ("full_moon_with_face", "\x1f31d") + , ("new_moon_with_face", "\x1f31a") + , ("new_moon", "\x1f311") + , ("waxing_crescent_moon", "\x1f312") + , ("first_quarter_moon", "\x1f313") + , ("moon", "\x1f314") + , ("waxing_gibbous_moon", "\x1f314") + , ("full_moon", "\x1f315") + , ("waning_gibbous_moon", "\x1f316") + , ("last_quarter_moon", "\x1f317") + , ("waning_crescent_moon", "\x1f318") + , ("last_quarter_moon_with_face", "\x1f31c") + , ("first_quarter_moon_with_face", "\x1f31b") + , ("crescent_moon", "\x1f319") + , ("earth_africa", "\x1f30d") + , ("earth_americas", "\x1f30e") + , ("earth_asia", "\x1f30f") + , ("volcano", "\x1f30b") + , ("milky_way", "\x1f30c") + , ("stars", "\x1f320") + , ("star", "\x2b50") + , ("sunny", "\x2600\fe0f") + , ("partly_sunny", "\x26c5") + , ("cloud", "\x2601\fe0f") + , ("zap", "\x26a1") + , ("umbrella", "\x2614") + , ("snowflake", "\x2744\fe0f") + , ("snowman", "\x26c4") + , ("cyclone", "\x1f300") + , ("foggy", "\x1f301") + , ("rainbow", "\x1f308") + , ("ocean", "\x1f30a") + , ("bamboo", "\x1f38d") + , ("gift_heart", "\x1f49d") + , ("dolls", "\x1f38e") + , ("school_satchel", "\x1f392") + , ("mortar_board", "\x1f393") + , ("flags", "\x1f38f") + , ("fireworks", "\x1f386") + , ("sparkler", "\x1f387") + , ("wind_chime", "\x1f390") + , ("rice_scene", "\x1f391") + , ("jack_o_lantern", "\x1f383") + , ("ghost", "\x1f47b") + , ("santa", "\x1f385") + , ("christmas_tree", "\x1f384") + , ("gift", "\x1f381") + , ("tanabata_tree", "\x1f38b") + , ("tada", "\x1f389") + , ("confetti_ball", "\x1f38a") + , ("balloon", "\x1f388") + , ("crossed_flags", "\x1f38c") + , ("crystal_ball", "\x1f52e") + , ("movie_camera", "\x1f3a5") + , ("camera", "\x1f4f7") + , ("video_camera", "\x1f4f9") + , ("vhs", "\x1f4fc") + , ("cd", "\x1f4bf") + , ("dvd", "\x1f4c0") + , ("minidisc", "\x1f4bd") + , ("floppy_disk", "\x1f4be") + , ("computer", "\x1f4bb") + , ("iphone", "\x1f4f1") + , ("phone", "\x260e\fe0f") + , ("telephone", "\x260e\fe0f") + , ("telephone_receiver", "\x1f4de") + , ("pager", "\x1f4df") + , ("fax", "\x1f4e0") + , ("satellite", "\x1f4e1") + , ("tv", "\x1f4fa") + , ("radio", "\x1f4fb") + , ("loud_sound", "\x1f50a") + , ("sound", "\x1f509") + , ("speaker", "\x1f508") + , ("mute", "\x1f507") + , ("bell", "\x1f514") + , ("no_bell", "\x1f515") + , ("loudspeaker", "\x1f4e2") + , ("mega", "\x1f4e3") + , ("hourglass_flowing_sand", "\x23f3") + , ("hourglass", "\x231b") + , ("alarm_clock", "\x23f0") + , ("watch", "\x231a") + , ("unlock", "\x1f513") + , ("lock", "\x1f512") + , ("lock_with_ink_pen", "\x1f50f") + , ("closed_lock_with_key", "\x1f510") + , ("key", "\x1f511") + , ("mag_right", "\x1f50e") + , ("bulb", "\x1f4a1") + , ("flashlight", "\x1f526") + , ("high_brightness", "\x1f506") + , ("low_brightness", "\x1f505") + , ("electric_plug", "\x1f50c") + , ("battery", "\x1f50b") + , ("mag", "\x1f50d") + , ("bathtub", "\x1f6c1") + , ("bath", "\x1f6c0") + , ("shower", "\x1f6bf") + , ("toilet", "\x1f6bd") + , ("wrench", "\x1f527") + , ("nut_and_bolt", "\x1f529") + , ("hammer", "\x1f528") + , ("door", "\x1f6aa") + , ("smoking", "\x1f6ac") + , ("bomb", "\x1f4a3") + , ("gun", "\x1f52b") + , ("hocho", "\x1f52a") + , ("knife", "\x1f52a") + , ("pill", "\x1f48a") + , ("syringe", "\x1f489") + , ("moneybag", "\x1f4b0") + , ("yen", "\x1f4b4") + , ("dollar", "\x1f4b5") + , ("pound", "\x1f4b7") + , ("euro", "\x1f4b6") + , ("credit_card", "\x1f4b3") + , ("money_with_wings", "\x1f4b8") + , ("calling", "\x1f4f2") + , ("e-mail", "\x1f4e7") + , ("inbox_tray", "\x1f4e5") + , ("outbox_tray", "\x1f4e4") + , ("email", "\x2709\fe0f") + , ("envelope", "\x2709\fe0f") + , ("envelope_with_arrow", "\x1f4e9") + , ("incoming_envelope", "\x1f4e8") + , ("postal_horn", "\x1f4ef") + , ("mailbox", "\x1f4eb") + , ("mailbox_closed", "\x1f4ea") + , ("mailbox_with_mail", "\x1f4ec") + , ("mailbox_with_no_mail", "\x1f4ed") + , ("postbox", "\x1f4ee") + , ("package", "\x1f4e6") + , ("memo", "\x1f4dd") + , ("pencil", "\x1f4dd") + , ("page_facing_up", "\x1f4c4") + , ("page_with_curl", "\x1f4c3") + , ("bookmark_tabs", "\x1f4d1") + , ("bar_chart", "\x1f4ca") + , ("chart_with_upwards_trend", "\x1f4c8") + , ("chart_with_downwards_trend", "\x1f4c9") + , ("scroll", "\x1f4dc") + , ("clipboard", "\x1f4cb") + , ("date", "\x1f4c5") + , ("calendar", "\x1f4c6") + , ("card_index", "\x1f4c7") + , ("file_folder", "\x1f4c1") + , ("open_file_folder", "\x1f4c2") + , ("scissors", "\x2702\fe0f") + , ("pushpin", "\x1f4cc") + , ("paperclip", "\x1f4ce") + , ("black_nib", "\x2712\fe0f") + , ("pencil2", "\x270f\fe0f") + , ("straight_ruler", "\x1f4cf") + , ("triangular_ruler", "\x1f4d0") + , ("closed_book", "\x1f4d5") + , ("green_book", "\x1f4d7") + , ("blue_book", "\x1f4d8") + , ("orange_book", "\x1f4d9") + , ("notebook", "\x1f4d3") + , ("notebook_with_decorative_cover", "\x1f4d4") + , ("ledger", "\x1f4d2") + , ("books", "\x1f4da") + , ("book", "\x1f4d6") + , ("open_book", "\x1f4d6") + , ("bookmark", "\x1f516") + , ("name_badge", "\x1f4db") + , ("microscope", "\x1f52c") + , ("telescope", "\x1f52d") + , ("newspaper", "\x1f4f0") + , ("art", "\x1f3a8") + , ("clapper", "\x1f3ac") + , ("microphone", "\x1f3a4") + , ("headphones", "\x1f3a7") + , ("musical_score", "\x1f3bc") + , ("musical_note", "\x1f3b5") + , ("notes", "\x1f3b6") + , ("musical_keyboard", "\x1f3b9") + , ("violin", "\x1f3bb") + , ("trumpet", "\x1f3ba") + , ("saxophone", "\x1f3b7") + , ("guitar", "\x1f3b8") + , ("space_invader", "\x1f47e") + , ("video_game", "\x1f3ae") + , ("black_joker", "\x1f0cf") + , ("flower_playing_cards", "\x1f3b4") + , ("mahjong", "\x1f004") + , ("game_die", "\x1f3b2") + , ("dart", "\x1f3af") + , ("football", "\x1f3c8") + , ("basketball", "\x1f3c0") + , ("soccer", "\x26bd") + , ("baseball", "\x26be\fe0f") + , ("tennis", "\x1f3be") + , ("8ball", "\x1f3b1") + , ("rugby_football", "\x1f3c9") + , ("bowling", "\x1f3b3") + , ("golf", "\x26f3") + , ("mountain_bicyclist", "\x1f6b5") + , ("bicyclist", "\x1f6b4") + , ("checkered_flag", "\x1f3c1") + , ("horse_racing", "\x1f3c7") + , ("trophy", "\x1f3c6") + , ("ski", "\x1f3bf") + , ("snowboarder", "\x1f3c2") + , ("swimmer", "\x1f3ca") + , ("surfer", "\x1f3c4") + , ("fishing_pole_and_fish", "\x1f3a3") + , ("coffee", "\x2615") + , ("tea", "\x1f375") + , ("sake", "\x1f376") + , ("baby_bottle", "\x1f37c") + , ("beer", "\x1f37a") + , ("beers", "\x1f37b") + , ("cocktail", "\x1f378") + , ("tropical_drink", "\x1f379") + , ("wine_glass", "\x1f377") + , ("fork_and_knife", "\x1f374") + , ("pizza", "\x1f355") + , ("hamburger", "\x1f354") + , ("fries", "\x1f35f") + , ("poultry_leg", "\x1f357") + , ("meat_on_bone", "\x1f356") + , ("spaghetti", "\x1f35d") + , ("curry", "\x1f35b") + , ("fried_shrimp", "\x1f364") + , ("bento", "\x1f371") + , ("sushi", "\x1f363") + , ("fish_cake", "\x1f365") + , ("rice_ball", "\x1f359") + , ("rice_cracker", "\x1f358") + , ("rice", "\x1f35a") + , ("ramen", "\x1f35c") + , ("stew", "\x1f372") + , ("oden", "\x1f362") + , ("dango", "\x1f361") + , ("egg", "\x1f373") + , ("bread", "\x1f35e") + , ("doughnut", "\x1f369") + , ("custard", "\x1f36e") + , ("icecream", "\x1f366") + , ("ice_cream", "\x1f368") + , ("shaved_ice", "\x1f367") + , ("birthday", "\x1f382") + , ("cake", "\x1f370") + , ("cookie", "\x1f36a") + , ("chocolate_bar", "\x1f36b") + , ("candy", "\x1f36c") + , ("lollipop", "\x1f36d") + , ("honey_pot", "\x1f36f") + , ("apple", "\x1f34e") + , ("green_apple", "\x1f34f") + , ("tangerine", "\x1f34a") + , ("lemon", "\x1f34b") + , ("cherries", "\x1f352") + , ("grapes", "\x1f347") + , ("watermelon", "\x1f349") + , ("strawberry", "\x1f353") + , ("peach", "\x1f351") + , ("melon", "\x1f348") + , ("banana", "\x1f34c") + , ("pear", "\x1f350") + , ("pineapple", "\x1f34d") + , ("sweet_potato", "\x1f360") + , ("eggplant", "\x1f346") + , ("tomato", "\x1f345") + , ("corn", "\x1f33d") + , ("house", "\x1f3e0") + , ("house_with_garden", "\x1f3e1") + , ("school", "\x1f3eb") + , ("office", "\x1f3e2") + , ("post_office", "\x1f3e3") + , ("hospital", "\x1f3e5") + , ("bank", "\x1f3e6") + , ("convenience_store", "\x1f3ea") + , ("love_hotel", "\x1f3e9") + , ("hotel", "\x1f3e8") + , ("wedding", "\x1f492") + , ("church", "\x26ea") + , ("department_store", "\x1f3ec") + , ("european_post_office", "\x1f3e4") + , ("city_sunrise", "\x1f307") + , ("city_sunset", "\x1f306") + , ("japanese_castle", "\x1f3ef") + , ("european_castle", "\x1f3f0") + , ("tent", "\x26fa") + , ("factory", "\x1f3ed") + , ("tokyo_tower", "\x1f5fc") + , ("japan", "\x1f5fe") + , ("mount_fuji", "\x1f5fb") + , ("sunrise_over_mountains", "\x1f304") + , ("sunrise", "\x1f305") + , ("night_with_stars", "\x1f303") + , ("statue_of_liberty", "\x1f5fd") + , ("bridge_at_night", "\x1f309") + , ("carousel_horse", "\x1f3a0") + , ("ferris_wheel", "\x1f3a1") + , ("fountain", "\x26f2") + , ("roller_coaster", "\x1f3a2") + , ("ship", "\x1f6a2") + , ("boat", "\x26f5") + , ("sailboat", "\x26f5") + , ("speedboat", "\x1f6a4") + , ("rowboat", "\x1f6a3") + , ("anchor", "\x2693") + , ("rocket", "\x1f680") + , ("airplane", "\x2708\fe0f") + , ("seat", "\x1f4ba") + , ("helicopter", "\x1f681") + , ("steam_locomotive", "\x1f682") + , ("tram", "\x1f68a") + , ("station", "\x1f689") + , ("mountain_railway", "\x1f69e") + , ("train2", "\x1f686") + , ("bullettrain_side", "\x1f684") + , ("bullettrain_front", "\x1f685") + , ("light_rail", "\x1f688") + , ("metro", "\x1f687") + , ("monorail", "\x1f69d") + , ("train", "\x1f68b") + , ("railway_car", "\x1f683") + , ("trolleybus", "\x1f68e") + , ("bus", "\x1f68c") + , ("oncoming_bus", "\x1f68d") + , ("blue_car", "\x1f699") + , ("oncoming_automobile", "\x1f698") + , ("car", "\x1f697") + , ("red_car", "\x1f697") + , ("taxi", "\x1f695") + , ("oncoming_taxi", "\x1f696") + , ("articulated_lorry", "\x1f69b") + , ("truck", "\x1f69a") + , ("rotating_light", "\x1f6a8") + , ("police_car", "\x1f693") + , ("oncoming_police_car", "\x1f694") + , ("fire_engine", "\x1f692") + , ("ambulance", "\x1f691") + , ("minibus", "\x1f690") + , ("bike", "\x1f6b2") + , ("aerial_tramway", "\x1f6a1") + , ("suspension_railway", "\x1f69f") + , ("mountain_cableway", "\x1f6a0") + , ("tractor", "\x1f69c") + , ("barber", "\x1f488") + , ("busstop", "\x1f68f") + , ("ticket", "\x1f3ab") + , ("vertical_traffic_light", "\x1f6a6") + , ("traffic_light", "\x1f6a5") + , ("warning", "\x26a0\fe0f") + , ("construction", "\x1f6a7") + , ("beginner", "\x1f530") + , ("fuelpump", "\x26fd") + , ("izakaya_lantern", "\x1f3ee") + , ("lantern", "\x1f3ee") + , ("slot_machine", "\x1f3b0") + , ("hotsprings", "\x2668\fe0f") + , ("moyai", "\x1f5ff") + , ("circus_tent", "\x1f3aa") + , ("performing_arts", "\x1f3ad") + , ("round_pushpin", "\x1f4cd") + , ("triangular_flag_on_post", "\x1f6a9") + , ("jp", "\x1f1ef\1f1f5") + , ("kr", "\x1f1f0\1f1f7") + , ("de", "\x1f1e9\1f1ea") + , ("cn", "\x1f1e8\1f1f3") + , ("us", "\x1f1fa\1f1f8") + , ("fr", "\x1f1eb\1f1f7") + , ("es", "\x1f1ea\1f1f8") + , ("it", "\x1f1ee\1f1f9") + , ("ru", "\x1f1f7\1f1fa") + , ("gb", "\x1f1ec\1f1e7") + , ("uk", "\x1f1ec\1f1e7") + , ("one", "1\fe0f\20e3") + , ("two", "2\fe0f\20e3") + , ("three", "3\fe0f\20e3") + , ("four", "4\fe0f\20e3") + , ("five", "5\fe0f\20e3") + , ("six", "6\fe0f\20e3") + , ("seven", "7\fe0f\20e3") + , ("eight", "8\fe0f\20e3") + , ("nine", "9\fe0f\20e3") + , ("zero", "0\fe0f\20e3") + , ("keycap_ten", "\x1f51f") + , ("hash", "#\fe0f\20e3") + , ("symbols", "\x1f523") + , ("arrow_up", "\x2b06\fe0f") + , ("arrow_down", "\x2b07\fe0f") + , ("arrow_left", "\x2b05\fe0f") + , ("arrow_right", "\x27a1\fe0f") + , ("capital_abcd", "\x1f520") + , ("abcd", "\x1f521") + , ("abc", "\x1f524") + , ("arrow_upper_right", "\x2197\fe0f") + , ("arrow_upper_left", "\x2196\fe0f") + , ("arrow_lower_right", "\x2198\fe0f") + , ("arrow_lower_left", "\x2199\fe0f") + , ("left_right_arrow", "\x2194\fe0f") + , ("arrow_up_down", "\x2195\fe0f") + , ("arrows_counterclockwise", "\x1f504") + , ("arrow_backward", "\x25c0\fe0f") + , ("arrow_forward", "\x25b6\fe0f") + , ("arrow_up_small", "\x1f53c") + , ("arrow_down_small", "\x1f53d") + , ("leftwards_arrow_with_hook", "\x21a9\fe0f") + , ("arrow_right_hook", "\x21aa\fe0f") + , ("information_source", "\x2139\fe0f") + , ("rewind", "\x23ea") + , ("fast_forward", "\x23e9") + , ("arrow_double_up", "\x23eb") + , ("arrow_double_down", "\x23ec") + , ("arrow_heading_down", "\x2935\fe0f") + , ("arrow_heading_up", "\x2934\fe0f") + , ("ok", "\x1f197") + , ("twisted_rightwards_arrows", "\x1f500") + , ("repeat", "\x1f501") + , ("repeat_one", "\x1f502") + , ("new", "\x1f195") + , ("up", "\x1f199") + , ("cool", "\x1f192") + , ("free", "\x1f193") + , ("ng", "\x1f196") + , ("signal_strength", "\x1f4f6") + , ("cinema", "\x1f3a6") + , ("koko", "\x1f201") + , ("u6307", "\x1f22f") + , ("u7a7a", "\x1f233") + , ("u6e80", "\x1f235") + , ("u5408", "\x1f234") + , ("u7981", "\x1f232") + , ("ideograph_advantage", "\x1f250") + , ("u5272", "\x1f239") + , ("u55b6", "\x1f23a") + , ("u6709", "\x1f236") + , ("u7121", "\x1f21a") + , ("restroom", "\x1f6bb") + , ("mens", "\x1f6b9") + , ("womens", "\x1f6ba") + , ("baby_symbol", "\x1f6bc") + , ("wc", "\x1f6be") + , ("potable_water", "\x1f6b0") + , ("put_litter_in_its_place", "\x1f6ae") + , ("parking", "\x1f17f\fe0f") + , ("wheelchair", "\x267f") + , ("no_smoking", "\x1f6ad") + , ("u6708", "\x1f237\fe0f") + , ("u7533", "\x1f238") + , ("sa", "\x1f202\fe0f") + , ("m", "\x24c2\fe0f") + , ("passport_control", "\x1f6c2") + , ("baggage_claim", "\x1f6c4") + , ("left_luggage", "\x1f6c5") + , ("customs", "\x1f6c3") + , ("accept", "\x1f251") + , ("secret", "\x3299\fe0f") + , ("congratulations", "\x3297\fe0f") + , ("cl", "\x1f191") + , ("sos", "\x1f198") + , ("id", "\x1f194") + , ("no_entry_sign", "\x1f6ab") + , ("underage", "\x1f51e") + , ("no_mobile_phones", "\x1f4f5") + , ("do_not_litter", "\x1f6af") + , ("non-potable_water", "\x1f6b1") + , ("no_bicycles", "\x1f6b3") + , ("no_pedestrians", "\x1f6b7") + , ("children_crossing", "\x1f6b8") + , ("no_entry", "\x26d4") + , ("eight_spoked_asterisk", "\x2733\fe0f") + , ("sparkle", "\x2747\fe0f") + , ("negative_squared_cross_mark", "\x274e") + , ("white_check_mark", "\x2705") + , ("eight_pointed_black_star", "\x2734\fe0f") + , ("heart_decoration", "\x1f49f") + , ("vs", "\x1f19a") + , ("vibration_mode", "\x1f4f3") + , ("mobile_phone_off", "\x1f4f4") + , ("a", "\x1f170\fe0f") + , ("b", "\x1f171\fe0f") + , ("ab", "\x1f18e") + , ("o2", "\x1f17e\fe0f") + , ("diamond_shape_with_a_dot_inside", "\x1f4a0") + , ("loop", "\x27bf") + , ("recycle", "\x267b\fe0f") + , ("aries", "\x2648") + , ("taurus", "\x2649") + , ("gemini", "\x264a") + , ("cancer", "\x264b") + , ("leo", "\x264c") + , ("virgo", "\x264d") + , ("libra", "\x264e") + , ("scorpius", "\x264f") + , ("sagittarius", "\x2650") + , ("capricorn", "\x2651") + , ("aquarius", "\x2652") + , ("pisces", "\x2653") + , ("ophiuchus", "\x26ce") + , ("six_pointed_star", "\x1f52f") + , ("atm", "\x1f3e7") + , ("chart", "\x1f4b9") + , ("heavy_dollar_sign", "\x1f4b2") + , ("currency_exchange", "\x1f4b1") + , ("copyright", "©\fe0f") + , ("registered", "®\fe0f") + , ("tm", "\x2122\fe0f") + , ("x", "\x274c") + , ("bangbang", "\x203c\fe0f") + , ("interrobang", "\x2049\fe0f") + , ("exclamation", "\x2757") + , ("heavy_exclamation_mark", "\x2757") + , ("question", "\x2753") + , ("grey_exclamation", "\x2755") + , ("grey_question", "\x2754") + , ("o", "\x2b55") + , ("top", "\x1f51d") + , ("end", "\x1f51a") + , ("back", "\x1f519") + , ("on", "\x1f51b") + , ("soon", "\x1f51c") + , ("arrows_clockwise", "\x1f503") + , ("clock12", "\x1f55b") + , ("clock1230", "\x1f567") + , ("clock1", "\x1f550") + , ("clock130", "\x1f55c") + , ("clock2", "\x1f551") + , ("clock230", "\x1f55d") + , ("clock3", "\x1f552") + , ("clock330", "\x1f55e") + , ("clock4", "\x1f553") + , ("clock430", "\x1f55f") + , ("clock5", "\x1f554") + , ("clock530", "\x1f560") + , ("clock6", "\x1f555") + , ("clock7", "\x1f556") + , ("clock8", "\x1f557") + , ("clock9", "\x1f558") + , ("clock10", "\x1f559") + , ("clock11", "\x1f55a") + , ("clock630", "\x1f561") + , ("clock730", "\x1f562") + , ("clock830", "\x1f563") + , ("clock930", "\x1f564") + , ("clock1030", "\x1f565") + , ("clock1130", "\x1f566") + , ("heavy_multiplication_x", "\x2716\fe0f") + , ("heavy_plus_sign", "\x2795") + , ("heavy_minus_sign", "\x2796") + , ("heavy_division_sign", "\x2797") + , ("spades", "\x2660\fe0f") + , ("hearts", "\x2665\fe0f") + , ("clubs", "\x2663\fe0f") + , ("diamonds", "\x2666\fe0f") + , ("white_flower", "\x1f4ae") + , ("heavy_check_mark", "\x2714\fe0f") + , ("ballot_box_with_check", "\x2611\fe0f") + , ("radio_button", "\x1f518") + , ("link", "\x1f517") + , ("curly_loop", "\x27b0") + , ("wavy_dash", "\x3030\fe0f") + , ("part_alternation_mark", "\x303d\fe0f") + , ("trident", "\x1f531") + , ("black_medium_square", "\x25fc\fe0f") + , ("white_medium_square", "\x25fb\fe0f") + , ("black_medium_small_square", "\x25fe") + , ("white_medium_small_square", "\x25fd") + , ("black_small_square", "\x25aa\fe0f") + , ("white_small_square", "\x25ab\fe0f") + , ("small_red_triangle", "\x1f53a") + , ("black_square_button", "\x1f532") + , ("white_square_button", "\x1f533") + , ("black_circle", "\x26ab") + , ("white_circle", "\x26aa") + , ("red_circle", "\x1f534") + , ("large_blue_circle", "\x1f535") + , ("small_red_triangle_down", "\x1f53b") + , ("white_large_square", "\x2b1c") + , ("black_large_square", "\x2b1b") + , ("large_orange_diamond", "\x1f536") + , ("large_blue_diamond", "\x1f537") + , ("small_orange_diamond", "\x1f538") + , ("small_blue_diamond", "\x1f539") + ] diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 73d1e8f08..0a4e08175 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} {- Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> @@ -33,6 +34,9 @@ module Text.Pandoc.Error (PandocError(..), handleError) where import Text.Parsec.Error import Text.Parsec.Pos hiding (Line) import Text.Pandoc.Compat.Except +import GHC.Generics (Generic) +import Data.Generics (Typeable) +import Control.Exception (Exception) type Input = String @@ -40,8 +44,9 @@ data PandocError = -- | Generic parse failure ParseFailure String -- | Error thrown by a Parsec parser | ParsecError Input ParseError - deriving (Show) + deriving (Show, Typeable, Generic) +instance Exception PandocError instance Error PandocError where strMsg = ParseFailure diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 7489afc8e..5c775c908 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -49,7 +49,6 @@ import Data.ByteString (ByteString, unpack) import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as BL import Data.Char (isDigit) -import Control.Applicative import Control.Monad import Data.Bits import Data.Binary diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 1246cdc8f..eea25fadf 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -37,10 +37,10 @@ module Text.Pandoc.MediaBag ( extractMediaBag ) where import System.FilePath +import qualified System.FilePath.Posix as Posix import System.Directory (createDirectoryIfMissing) import qualified Data.Map as M import qualified Data.ByteString.Lazy as BL -import Data.Monoid (Monoid) import Control.Monad (when) import Text.Pandoc.MIME (MimeType, getMimeTypeDef) import qualified Text.Pandoc.UTF8 as UTF8 @@ -67,7 +67,7 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (splitPath fp) (mime, contents) mediamap) + MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap) where mime = fromMaybe fallback mbMime fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp @@ -77,14 +77,14 @@ insertMedia fp mbMime contents (MediaBag mediamap) = lookupMedia :: FilePath -> MediaBag -> Maybe (MimeType, BL.ByteString) -lookupMedia fp (MediaBag mediamap) = M.lookup (splitPath fp) mediamap +lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(String, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldWithKey (\fp (mime,contents) -> - (((joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap + (((Posix.joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap -- | Extract contents of MediaBag to a given directory. Print informational -- messages if 'verbose' is true. @@ -95,7 +95,7 @@ extractMediaBag :: Bool extractMediaBag verbose dir (MediaBag mediamap) = do sequence_ $ M.foldWithKey (\fp (_ ,contents) -> - ((writeMedia verbose dir (joinPath fp, contents)):)) [] mediamap + ((writeMedia verbose dir (Posix.joinPath fp, contents)):)) [] mediamap writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () writeMedia verbose dir (subpath, bs) = do diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 060fa6c05..1dc3bad3a 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} {- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> @@ -52,9 +52,9 @@ import qualified Data.Set as Set import Data.Default import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.MediaBag (MediaBag) -import Data.Monoid import Data.Data (Data) import Data.Typeable (Typeable) +import GHC.Generics (Generic) -- | Individually selectable syntax extensions. data Extension = @@ -107,6 +107,7 @@ data Extension = | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions + | Ext_emoji -- ^ Support emoji like :smile: | Ext_auto_identifiers -- ^ Automatic identifiers for headers | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} @@ -115,7 +116,7 @@ data Extension = | Ext_line_blocks -- ^ RST style line blocks | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML | Ext_shortcut_reference_links -- ^ Shortcut reference links - deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable) + deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) pandocExtensions :: Set Extension pandocExtensions = Set.fromList @@ -207,6 +208,7 @@ githubMarkdownExtensions = Set.fromList , Ext_intraword_underscores , Ext_strikeout , Ext_hard_line_breaks + , Ext_emoji , Ext_lists_without_preceding_blankline , Ext_shortcut_reference_links ] @@ -227,6 +229,15 @@ multimarkdownExtensions = Set.fromList , Ext_implicit_header_references , Ext_auto_identifiers , Ext_mmd_header_identifiers + , Ext_implicit_figures + -- Note: MMD's syntax for superscripts and subscripts + -- is a bit more permissive than pandoc's, allowing + -- e^2 and a~1 instead of e^2^ and a~1~, so even with + -- these options we don't have full support for MMD + -- superscripts and subscripts, but there's no reason + -- not to include these: + , Ext_superscript + , Ext_subscript ] strictExtensions :: Set Extension @@ -251,7 +262,7 @@ data ReaderOptions = ReaderOptions{ , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info , readerTrackChanges :: TrackChanges -} deriving (Show, Read, Data, Typeable) +} deriving (Show, Read, Data, Typeable, Generic) instance Default ReaderOptions where def = ReaderOptions{ @@ -273,7 +284,7 @@ instance Default ReaderOptions -- Writer options -- -data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable) +data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable, Generic) data HTMLMathMethod = PlainMath | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js @@ -283,18 +294,18 @@ data HTMLMathMethod = PlainMath | MathML (Maybe String) -- url of MathMLinHTML.js | MathJax String -- url of MathJax.js | KaTeX String String -- url of stylesheet and katex.js - deriving (Show, Read, Eq, Data, Typeable) + deriving (Show, Read, Eq, Data, Typeable, Generic) data CiteMethod = Citeproc -- use citeproc to render them | Natbib -- output natbib cite commands | Biblatex -- output biblatex cite commands - deriving (Show, Read, Eq, Data, Typeable) + deriving (Show, Read, Eq, Data, Typeable, Generic) -- | Methods for obfuscating email addresses in HTML. data ObfuscationMethod = NoObfuscation | ReferenceObfuscation | JavascriptObfuscation - deriving (Show, Read, Eq, Data, Typeable) + deriving (Show, Read, Eq, Data, Typeable, Generic) -- | Varieties of HTML slide shows. data HTMLSlideVariant = S5Slides @@ -303,13 +314,13 @@ data HTMLSlideVariant = S5Slides | DZSlides | RevealJsSlides | NoSlides - deriving (Show, Read, Eq, Data, Typeable) + deriving (Show, Read, Eq, Data, Typeable, Generic) -- | Options for accepting or rejecting MS Word track-changes. data TrackChanges = AcceptChanges | RejectChanges | AllChanges - deriving (Show, Read, Eq, Data, Typeable) + deriving (Show, Read, Eq, Data, Typeable, Generic) -- | Options for writers data WriterOptions = WriterOptions @@ -357,7 +368,7 @@ data WriterOptions = WriterOptions , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader , writerVerbose :: Bool -- ^ Verbose debugging output , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine - } deriving (Show, Data, Typeable) + } deriving (Show, Data, Typeable, Generic) instance Default WriterOptions where def = WriterOptions { writerStandalone = False diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 25a90f08f..38e8b8a9d 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -34,6 +34,7 @@ import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as BC import qualified Data.ByteString as BS +import Text.Pandoc.Compat.Monoid ((<>)) import System.Exit (ExitCode (..)) import System.FilePath import System.IO (stderr, stdout) @@ -42,13 +43,12 @@ import Data.Digest.Pure.SHA (showDigest, sha1) import System.Environment import Control.Monad (unless, when, (<=<)) import qualified Control.Exception as E -import Control.Applicative ((<$)) import Data.List (isInfixOf) import Data.Maybe (fromMaybe) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition import Text.Pandoc.Walk (walkM) -import Text.Pandoc.Shared (fetchItem', warn, withTempDir) +import Text.Pandoc.Shared (fetchItem', warn, withTempDir, inDirectory) import Text.Pandoc.Options (WriterOptions(..)) import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Process (pipeProcess) @@ -72,7 +72,9 @@ makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do doc' <- handleImages opts tmpdir doc let source = writer opts doc' args = writerLaTeXArgs opts - tex2pdf' (writerVerbose opts) args tmpdir program source + case program of + "context" -> context2pdf (writerVerbose opts) tmpdir source + _ -> tex2pdf' (writerVerbose opts) args tmpdir program source handleImages :: WriterOptions -> FilePath -- ^ temp dir to store images @@ -156,9 +158,6 @@ tex2pdf' verbose args tmpDir program source = do (ExitSuccess, Nothing) -> return $ Left "" (ExitSuccess, Just pdf) -> return $ Right pdf -(<>) :: ByteString -> ByteString -> ByteString -(<>) = B.append - -- parsing output extractMsg :: ByteString -> ByteString @@ -170,6 +169,14 @@ extractMsg log' = do then log' else BC.unlines (msg'' ++ lineno) +extractConTeXtMsg :: ByteString -> ByteString +extractConTeXtMsg log' = do + let msg' = take 1 $ + dropWhile (not . ("tex error" `BC.isPrefixOf`)) $ BC.lines log' + if null msg' + then log' + else BC.unlines msg' + -- running tex programs -- Run a TeX program on an input bytestring and return (exit code, @@ -228,3 +235,55 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do else return Nothing return (exit, out <> err, pdf) +context2pdf :: Bool -- ^ Verbose output + -> FilePath -- ^ temp directory for output + -> String -- ^ ConTeXt source + -> IO (Either ByteString ByteString) +context2pdf verbose tmpDir source = inDirectory tmpDir $ do + let file = "input.tex" + UTF8.writeFile file source +#ifdef _WINDOWS + -- note: we want / even on Windows, for TexLive + let tmpDir' = changePathSeparators tmpDir +#else + let tmpDir' = tmpDir +#endif + let programArgs = ["--batchmode"] ++ [file] + env' <- getEnvironment + let sep = searchPathSeparator:[] + let texinputs = maybe (".." ++ sep) ((".." ++ sep) ++) + $ lookup "TEXINPUTS" env' + let env'' = ("TEXINPUTS", texinputs) : + [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] + when verbose $ do + putStrLn $ "[makePDF] temp dir:" + putStrLn tmpDir' + putStrLn $ "[makePDF] Command line:" + putStrLn $ "context" ++ " " ++ unwords (map show programArgs) + putStr "\n" + putStrLn $ "[makePDF] Environment:" + mapM_ print env'' + putStr "\n" + putStrLn $ "[makePDF] Contents of " ++ file ++ ":" + B.readFile file >>= B.putStr + putStr "\n" + (exit, out, err) <- pipeProcess (Just env'') "context" programArgs BL.empty + when verbose $ do + B.hPutStr stdout out + B.hPutStr stderr err + putStr "\n" + let pdfFile = replaceExtension file ".pdf" + pdfExists <- doesFileExist pdfFile + mbPdf <- if pdfExists + -- We read PDF as a strict bytestring to make sure that the + -- temp directory is removed on Windows. + -- See https://github.com/jgm/pandoc/issues/1192. + then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + else return Nothing + let log' = out <> err + case (exit, mbPdf) of + (ExitFailure _, _) -> do + let logmsg = extractConTeXtMsg log' + return $ Left logmsg + (ExitSuccess, Nothing) -> return $ Left "" + (ExitSuccess, Just pdf) -> return $ Right pdf diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 6b9565a51..c79c8fffc 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -186,12 +186,11 @@ import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, parseMacroDefinitions) import Text.Pandoc.Compat.TagSoupEntity ( lookupEntity ) import Text.Pandoc.Asciify (toAsciiChar) +import Text.Pandoc.Compat.Monoid ((<>)) import Data.Default import qualified Data.Set as Set import Control.Monad.Reader import Control.Monad.Identity -import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative) -import Data.Monoid import Data.Maybe (catMaybes) import Text.Pandoc.Error @@ -1213,7 +1212,8 @@ citeKey = try $ do firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar - rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") + rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|> + (oneOf ":/" <* lookAhead (char '/')) let key = firstChar:rest return (suppress_author, key) diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index bb0091ca5..88b7dd09e 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -77,10 +77,10 @@ where import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..)) import Data.Foldable (toList) import Data.List (intercalate) -import Data.Monoid import Data.String import Control.Monad.State import Data.Char (isSpace) +import Text.Pandoc.Compat.Monoid ((<>)) data RenderState a = RenderState{ output :: [a] -- ^ In reverse order diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index db438e26d..e8fe92e27 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -8,16 +8,15 @@ import Text.XML.Light import Text.Pandoc.Compat.TagSoupEntity (lookupEntity) import Data.Either (rights) import Data.Generics -import Data.Monoid import Data.Char (isSpace) import Control.Monad.State -import Control.Applicative ((<$>)) import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Compat.Except import Data.Default +import Data.Foldable (asum) {- @@ -194,7 +193,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] indexterm - A wrapper for terms to be indexed [x] info - A wrapper for information about a component or other block. (DocBook v5) [x] informalequation - A displayed mathematical equation without a title -[ ] informalexample - A displayed example without a title +[x] informalexample - A displayed example without a title [ ] informalfigure - A untitled figure [ ] informaltable - A table without a title [ ] initializer - The initializer for a FieldSynopsis @@ -498,7 +497,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] warning - An admonition set off from the text [x] wordasword - A word meant specifically as a word and not representing anything else -[ ] xref - A cross reference to another part of the document +[x] xref - A cross reference to another part of the document [ ] year - The year of publication of a document [x] ?asciidoc-br? - line break from asciidoc docbook output -} @@ -511,6 +510,7 @@ data DBState = DBState{ dbSectionLevel :: Int , dbAcceptsMeta :: Bool , dbBook :: Bool , dbFigureTitle :: Inlines + , dbContent :: [Content] } deriving Show instance Default DBState where @@ -519,13 +519,14 @@ instance Default DBState where , dbMeta = mempty , dbAcceptsMeta = False , dbBook = False - , dbFigureTitle = mempty } + , dbFigureTitle = mempty + , dbContent = [] } readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs - where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp' - inp' = handleInstructions inp + where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree + tree = normalizeTree . parseXML . handleInstructions $ inp -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. @@ -611,6 +612,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags "important","caution","note","tip","warning","qandadiv", "question","answer","abstract","itemizedlist","orderedlist", "variablelist","article","book","table","informaltable", + "informalexample", "screen","programlisting","example","calloutlist"] isBlockElement _ = False @@ -656,7 +658,7 @@ getMediaobject e = do let (caption, title) = if isNull figTitle then (getCaption e, "") else (return figTitle, "fig:") - liftM (imageWith imageUrl title attr) caption + liftM (imageWith attr imageUrl title) caption getBlocks :: Element -> DB Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -775,6 +777,8 @@ parseBlock (Elem e) = "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e "table" -> parseTable "informaltable" -> parseTable + "informalexample" -> divWith ("", ["informalexample"], []) <$> + getBlocks e "literallayout" -> codeBlockWithLang "screen" -> codeBlockWithLang "programlisting" -> codeBlockWithLang @@ -956,7 +960,13 @@ parseInline (Elem e) = "keycombo" -> keycombo <$> (mapM parseInline $ elContent e) "menuchoice" -> menuchoice <$> (mapM parseInline $ filter isGuiMenu $ elContent e) - "xref" -> return $ str "?" -- so at least you know something is there + "xref" -> do + content <- dbContent <$> get + let linkend = attrValue "linkend" e + let title = case attrValue "endterm" e of + "" -> maybe "???" xrefTitleByElem (findElementById linkend content) + endterm -> maybe "???" strContent (findElementById endterm content) + return $ link ('#' : linkend) "" (singleton (Str title)) "email" -> return $ link ("mailto:" ++ strContent e) "" $ str $ strContent e "uri" -> return $ link (strContent e) "" $ str $ strContent e @@ -968,7 +978,7 @@ parseInline (Elem e) = _ -> ('#' : attrValue "linkend" e) let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, words $ attrValue "role" e, []) - return $ linkWith href "" attr ils' + return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of "bold" -> strong <$> innerInlines @@ -1018,3 +1028,26 @@ parseInline (Elem e) = isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x || named "guimenuitem" x isGuiMenu _ = False + + findElementById idString content + = asum [filterElement (\x -> attrValue "id" x == idString) el | Elem el <- content] + + -- Use the 'xreflabel' attribute for getting the title of a xref link; + -- if there's no such attribute, employ some heuristics based on what + -- docbook-xsl does. + xrefTitleByElem el + | not (null xrefLabel) = xrefLabel + | otherwise = case qName (elName el) of + "chapter" -> descendantContent "title" el + "sect1" -> descendantContent "title" el + "sect2" -> descendantContent "title" el + "sect3" -> descendantContent "title" el + "sect4" -> descendantContent "title" el + "sect5" -> descendantContent "title" el + "cmdsynopsis" -> descendantContent "command" el + "funcsynopsis" -> descendantContent "function" el + _ -> qName (elName el) ++ "_title" + where + xrefLabel = attrValue "xreflabel" el + descendantContent name = maybe "???" strContent + . findElement (QName name Nothing Nothing) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index b80280553..439e2d3e4 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -85,14 +85,12 @@ import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Data.List (delete, (\\), intersect) -import Data.Monoid import Text.TeXMath (writeTeX) import Data.Default (Default) import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State -import Control.Applicative ((<$>)) import Data.Sequence (ViewL(..), viewl) import qualified Data.Sequence as Seq (null) @@ -206,11 +204,15 @@ runElemToInlines :: RunElem -> Inlines runElemToInlines (TextRun s) = text s runElemToInlines (LnBrk) = linebreak runElemToInlines (Tab) = space +runElemToInlines (SoftHyphen) = text "\xad" +runElemToInlines (NoBreakHyphen) = text "\x2011" runElemToString :: RunElem -> String runElemToString (TextRun s) = s runElemToString (LnBrk) = ['\n'] runElemToString (Tab) = ['\t'] +runElemToString (SoftHyphen) = ['\xad'] +runElemToString (NoBreakHyphen) = ['\x2011'] runToString :: Run -> String runToString (Run _ runElems) = concatMap runElemToString runElems @@ -501,6 +503,10 @@ bodyPartToBlocks (ListItem pPr numId lvl levelInfo parparts) = do ] blks <- bodyPartToBlocks (Paragraph pPr parparts) return $ divWith ("", ["list-item"], kvs) blks +bodyPartToBlocks (DummyListItem pPr _ parparts) = + let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)} + in + bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Tbl _ _ _ []) = return $ para mempty bodyPartToBlocks (Tbl cap _ look (r:rs)) = do diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index cce80fb48..5910a476b 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -59,7 +59,7 @@ import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader -import Control.Applicative ((<$>), (<|>)) +import Control.Applicative ((<|>)) import qualified Data.Map as M import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) @@ -75,6 +75,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes , envFont :: Maybe Font , envCharStyles :: CharStyleMap , envParStyles :: ParStyleMap + , envLocation :: DocumentLocation } deriving Show @@ -87,7 +88,7 @@ instance Error DocxError where type D = ExceptT DocxError (Reader ReaderEnv) runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx ) re +runD dx re = runReader (runExceptT dx) re maybeToD :: Maybe a -> D a maybeToD (Just a) = return a @@ -140,7 +141,10 @@ data AbstractNumb = AbstractNumb String [Level] -- (ilvl, format, string, start) type Level = (String, String, String, Maybe Integer) -data Relationship = Relationship (RelId, Target) +data DocumentLocation = InDocument | InFootnote | InEndnote + deriving (Eq,Show) + +data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces @@ -174,6 +178,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] | ListItem ParagraphStyle String String Level [ParPart] + | DummyListItem ParagraphStyle String [ParPart] | Tbl String TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -208,7 +213,7 @@ data Run = Run RunStyle [RunElem] | InlineDrawing FilePath B.ByteString deriving Show -data RunElem = TextRun String | LnBrk | Tab +data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show data VertAlign = BaseLn | SupScrpt | SubScrpt @@ -238,7 +243,6 @@ defaultRunStyle = RunStyle { isBold = Nothing , rUnderline = Nothing , rStyle = Nothing} - type Target = String type Anchor = String type URL = String @@ -255,7 +259,8 @@ archiveToDocx archive = do rels = archiveToRelationships archive media = archiveToMedia archive (styles, parstyles) = archiveToStyles archive - rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles + rEnv = + ReaderEnv notes numbering rels media Nothing styles parstyles InDocument doc <- runD (archiveToDocument archive) rEnv return $ Docx doc @@ -362,29 +367,30 @@ archiveToNotes zf = in Notes ns fn en -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = - let (dir, name) = splitFileName fp - in - (dir == "word/_rels/") && ((takeExtension name) == ".rels") +filePathToRelType :: FilePath -> Maybe DocumentLocation +filePathToRelType "word/_rels/document.xml.rels" = Just InDocument +filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote +filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote +filePathToRelType _ = Nothing -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = +relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship +relElemToRelationship relType element | qName (elName element) == "Relationship" = do relId <- findAttr (QName "Id" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - + return $ Relationship relType relId target +relElemToRelationship _ _ = Nothing + +filePathToRelationships :: Archive -> FilePath -> [Relationship] +filePathToRelationships ar fp | Just relType <- filePathToRelType fp + , Just entry <- findEntryByPath fp ar + , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = + mapMaybe (relElemToRelationship relType) $ elChildren relElems +filePathToRelationships _ _ = [] + archiveToRelationships :: Archive -> [Relationship] archiveToRelationships archive = - let relPaths = filter filePathIsRel (filesInArchive archive) - entries = mapMaybe (\f -> findEntryByPath f archive) relPaths - relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems - in - rels + concatMap (filePathToRelationships archive) $ filesInArchive archive filePathIsMedia :: FilePath -> Bool filePathIsMedia fp = @@ -409,6 +415,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls return lvl + numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | qName (elName element) == "num" && @@ -560,7 +567,7 @@ elemToBodyPart ns element num <- asks envNumbering case lookupLevel numId lvl num of Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> throwError WrongElem + Nothing -> return $ DummyListItem parstyle lvl parparts elemToBodyPart ns element | isElem ns "w" "p" element = do sty <- asks envParStyles @@ -573,7 +580,7 @@ elemToBodyPart ns element Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts Nothing -> - throwError WrongElem + return $ DummyListItem parstyle lvl parparts Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do @@ -596,13 +603,16 @@ elemToBodyPart ns element return $ Tbl caption grid tblLook rows elemToBodyPart _ _ = throwError WrongElem -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = - lookup relid (map (\(Relationship pair) -> pair) rels) +lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target +lookupRelationship docLocation relid rels = + lookup (docLocation, relid) pairs + where + pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels expandDrawingId :: String -> D (FilePath, B.ByteString) expandDrawingId s = do - target <- asks (lookupRelationship s . envRelationships) + location <- asks envLocation + target <- asks (lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -657,9 +667,10 @@ elemToParPart ns element elemToParPart ns element | isElem ns "w" "hyperlink" element , Just relId <- findAttr (elemName ns "r" "id") element = do + location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships - case lookupRelationship relId rels of + case lookupRelationship location relId rels of Just target -> do case findAttr (elemName ns "w" "anchor") element of Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs @@ -699,7 +710,7 @@ elemToRun ns element , Just fnId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupFootnote fnId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Footnote bps Nothing -> return $ Footnote [] elemToRun ns element @@ -708,7 +719,7 @@ elemToRun ns element , Just enId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupEndnote enId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Endnote bps Nothing -> return $ Endnote [] elemToRun ns element @@ -877,6 +888,8 @@ elemToRunElem ns element map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab + | isElem ns "w" "softHyphen" element = return SoftHyphen + | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen | isElem ns "w" "sym" element = return (getSymChar ns element) | otherwise = throwError WrongElem where diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs index 8269ca88d..c93b40119 100644 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -8,7 +8,6 @@ module Text.Pandoc.Readers.Docx.Reducible ( concatReduce import Text.Pandoc.Builder -import Data.Monoid import Data.List import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) import qualified Data.Sequence as Seq (null) diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index fb86f1286..79aa540f6 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -16,6 +16,7 @@ import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField) import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.MIME (MimeType) import qualified Text.Pandoc.Builder as B import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry @@ -25,9 +26,7 @@ import System.FilePath ( takeFileName, (</>), dropFileName, normalise , dropFileName , splitFileName ) import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) -import Control.Applicative ((<$>)) import Control.Monad (guard, liftM, when) -import Data.Monoid (mempty, (<>)) import Data.List (isPrefixOf, isInfixOf) import Data.Maybe (mapMaybe, fromMaybe) import qualified Data.Map as M (Map, lookup, fromList, elems) @@ -181,7 +180,6 @@ getManifest archive = do fixInternalReferences :: FilePath -> Pandoc -> Pandoc fixInternalReferences pathToFile = (walk $ renameImages root) - . (walk normalisePath) . (walk $ fixBlockIRs filename) . (walk $ fixInlineIRs filename) where @@ -196,12 +194,6 @@ fixInlineIRs s (Link attr t ('#':url, tit)) = Link attr t (addHash s url, tit) fixInlineIRs _ v = v -normalisePath :: Inline -> Inline -normalisePath (Link attr t (url, tit)) = - let (path, uid) = span (/= '#') url in - Link attr t (takeFileName path ++ uid, tit) -normalisePath s = s - prependHash :: [String] -> Inline -> Inline prependHash ps l@(Link attr is (url, tit)) | or [s `isPrefixOf` url | s <- ps] = @@ -223,7 +215,7 @@ fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEP addHash :: String -> String -> String addHash _ "" = "" -addHash s ident = s ++ "#" ++ ident +addHash s ident = takeFileName s ++ "#" ++ ident removeEPUBAttrs :: [(String, String)] -> [(String, String)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5a93e0d5b..85e9a0743 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,13 +50,14 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk +import qualified Data.Map as M import Data.Maybe ( fromMaybe, isJust) import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero, void, unless ) import Control.Arrow ((***)) -import Control.Applicative ( (<$>), (<$), (<*), (*>), (<|>)) -import Data.Monoid (mconcat, Monoid, mempty, (<>), First (..)) +import Control.Applicative ( (<|>) ) +import Data.Monoid (First (..)) import Text.Printf (printf) import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) @@ -64,7 +65,8 @@ import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) import Network.URI (isURI) import Text.Pandoc.Error - +import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Parsec.Error @@ -74,8 +76,9 @@ readHtml :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readHtml opts inp = mapLeft (ParseFailure . getError) . flip runReader def $ - runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing) - "source" tags + runParserT parseDoc + (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty) + "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -100,7 +103,9 @@ data HTMLState = HTMLState { parserState :: ParserState, noteTable :: [(String, Blocks)], - baseHref :: Maybe String + baseHref :: Maybe String, + identifiers :: [String], + headerMap :: M.Map Inlines String } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -252,6 +257,22 @@ pListItem nonItem = do let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr) (liDiv <>) <$> pInTags "li" block <* skipMany nonItem +parseListStyleType :: String -> ListNumberStyle +parseListStyleType "lower-roman" = LowerRoman +parseListStyleType "upper-roman" = UpperRoman +parseListStyleType "lower-alpha" = LowerAlpha +parseListStyleType "upper-alpha" = UpperAlpha +parseListStyleType "decimal" = Decimal +parseListStyleType _ = DefaultStyle + +parseTypeAttr :: String -> ListNumberStyle +parseTypeAttr "i" = LowerRoman +parseTypeAttr "I" = UpperRoman +parseTypeAttr "a" = LowerAlpha +parseTypeAttr "A" = UpperAlpha +parseTypeAttr "1" = Decimal +parseTypeAttr _ = DefaultStyle + pOrderedList :: TagParser Blocks pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) @@ -261,23 +282,19 @@ pOrderedList = try $ do sta' = if all isDigit sta then read sta else 1 - sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> - case lookup "type" attribs of - Just "1" -> Decimal - Just "I" -> UpperRoman - Just "i" -> LowerRoman - Just "A" -> UpperAlpha - Just "a" -> LowerAlpha - _ -> DefaultStyle + + pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"] + + typeAttr = fromMaybe "" $ lookup "type" attribs + classAttr = fromMaybe "" $ lookup "class" attribs + styleAttr = fromMaybe "" $ lookup "style" attribs + listStyle = fromMaybe "" $ pickListStyle styleAttr + + sty' = foldOrElse DefaultStyle + [ parseTypeAttr typeAttr + , parseListStyleType classAttr + , parseListStyleType listStyle + ] let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && not (t ~== TagClose "ol")) @@ -330,9 +347,16 @@ pRawTag = do pDiv :: TagParser Blocks pDiv = try $ do guardEnabled Ext_native_divs - TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True) - contents <- pInTags "div" block - return $ B.divWith (mkAttr attr) contents + let isDivLike "div" = True + isDivLike "section" = True + isDivLike _ = False + TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) + contents <- pInTags tag block + let (ident, classes, kvs) = mkAttr attr + let classes' = if tag == "section" + then "section":classes + else classes + return $ B.divWith (ident, classes', kvs) contents pRawHtmlBlock :: TagParser Blocks pRawHtmlBlock = do @@ -385,9 +409,10 @@ pHeader = try $ do let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] + attr' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle then mempty -- skip a representation of the title in the body - else B.headerWith (ident, classes, keyvals) level contents + else B.headerWith attr' level contents pHrule :: TagParser Blocks pHrule = do @@ -587,7 +612,7 @@ pLink = try $ do let uid = fromAttrib "id" tag let cls = words $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ B.linkWith (escapeURI url) title (uid, cls, []) lab + return $ B.linkWith (uid, cls, []) (escapeURI url) title lab pImage :: TagParser Inlines pImage = do @@ -605,7 +630,7 @@ pImage = do "" -> [] v -> [(k, v)] let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] - return $ B.imageWith (escapeURI url) title (uid, cls, kvs) (B.text alt) + return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) pCode :: TagParser Inlines pCode = try $ do @@ -618,12 +643,11 @@ pSpan = try $ do guardEnabled Ext_native_spans TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) contents <- pInTags "span" inline - let attr' = mkAttr attr - return $ case attr' of - ("",[],[("style",s)]) - | filter (`notElem` " \t;") s == "font-variant:small-caps" -> - B.smallcaps contents - _ -> B.spanWith (mkAttr attr) contents + let isSmallCaps = fontVariant == "small-caps" + where styleAttr = fromMaybe "" $ lookup "style" attr + fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr + let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) + return $ tag contents pRawHtmlInline :: TagParser Inlines pRawHtmlInline = do @@ -920,6 +944,7 @@ htmlTag f = try $ do parseOptions{ optTagWarning = True } inp guard $ f next case next of + TagWarning _ -> fail "encountered TagWarning" TagComment s | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar @@ -967,6 +992,14 @@ isSpace _ = False -- Instances +instance HasIdentifierList HTMLState where + extractIdentifierList = identifiers + updateIdentifierList f s = s{ identifiers = f (identifiers s) } + +instance HasHeaderMap HTMLState where + extractHeaderMap = headerMap + updateHeaderMap f s = s{ headerMap = f (headerMap s) } + -- This signature should be more general -- MonadReader HTMLLocal m => HasQuoteContext st m instance HasQuoteContext st (Reader HTMLLocal) where @@ -976,9 +1009,6 @@ instance HasQuoteContext st (Reader HTMLLocal) where instance HasReaderOptions HTMLState where extractReaderOptions = extractReaderOptions . parserState -instance Default HTMLState where - def = HTMLState def [] Nothing - instance HasMeta HTMLState where setMeta s b st = st {parserState = setMeta s b $ parserState st} deleteMeta s st = st {parserState = deleteMeta s $ parserState st} diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index aa2534afc..16f3d7ef3 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -16,8 +16,8 @@ module Text.Pandoc.Readers.Haddock import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Shared (trim, splitBy) -import Data.Monoid import Data.List (intersperse, stripPrefix) import Data.Maybe (fromMaybe) import Text.Pandoc.Definition diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index def429232..673deba14 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -46,8 +46,7 @@ import Data.Char ( chr, ord, isLetter, isAlphaNum ) import Control.Monad.Trans (lift) import Control.Monad import Text.Pandoc.Builder -import Control.Applicative -import Data.Monoid +import Control.Applicative ((<|>), many, optional) import Data.Maybe (fromMaybe, maybeToList) import System.Environment (getEnv) import System.FilePath (replaceExtension, (</>), takeExtension, addExtension) @@ -171,17 +170,23 @@ quoted' f starter ender = do try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs doubleQuote :: LP Inlines -doubleQuote = - quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") - <|> quoted' doubleQuoted (string "“") (void $ char '”') - -- the following is used by babel for localized quotes: - <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") - <|> quoted' doubleQuoted (string "\"") (void $ char '"') +doubleQuote = do + smart <- getOption readerSmart + if smart + then quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") + <|> quoted' doubleQuoted (string "“") (void $ char '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") + <|> quoted' doubleQuoted (string "\"") (void $ char '"') + else str <$> many1 (oneOf "`'“”\"") singleQuote :: LP Inlines -singleQuote = - quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) - <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) +singleQuote = do + smart <- getOption readerSmart + if smart + then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) + <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) + else str <$> many1 (oneOf "`\'‘’") inline :: LP Inlines inline = (mempty <$ comment) @@ -235,7 +240,9 @@ blocks = mconcat <$> many block getRawCommand :: String -> LP String getRawCommand name' = do - rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) + rawargs <- withRaw (many (try (optional sp *> opt)) *> + option "" (try (optional sp *> dimenarg)) *> + many braced) return $ '\\' : name' ++ snd rawargs lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v @@ -528,6 +535,7 @@ inlineCommands = M.fromList $ mkImage options src) , ("enquote", enquote) , ("cite", citation "cite" AuthorInText False) + , ("Cite", citation "cite" AuthorInText False) , ("citep", citation "citep" NormalCitation False) , ("citep*", citation "citep*" NormalCitation False) , ("citeal", citation "citeal" NormalCitation False) @@ -597,8 +605,8 @@ mkImage options src = do case takeExtension src of "" -> do defaultExt <- getOption readerDefaultImageExtension - return $ imageWith (addExtension src defaultExt) "" attr alt - _ -> return $ imageWith src "" attr alt + return $ imageWith attr (addExtension src defaultExt) "" alt + _ -> return $ imageWith attr src "" alt inNote :: Inlines -> Inlines inNote ils = @@ -824,10 +832,10 @@ tok :: LP Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar opt :: LP Inlines -opt = bracketed inline <* optional sp +opt = bracketed inline skipopts :: LP () -skipopts = skipMany opt +skipopts = skipMany (opt *> optional sp) inlineText :: LP Inlines inlineText = str <$> many1 inlineChar @@ -893,7 +901,7 @@ verbatimEnv' = fmap snd <$> string "\\begin" name <- braced' guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", - "minted", "alltt"] + "minted", "alltt", "comment"] manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}") blob' :: IncludeParser @@ -1030,6 +1038,8 @@ environments = M.fromList , ("figure", env "figure" $ resetCaption *> skipopts *> blocks >>= addImageCaption) , ("center", env "center" blocks) + , ("longtable", env "longtable" $ + resetCaption *> skipopts *> blocks >>= addTableCaption) , ("table", env "table" $ resetCaption *> skipopts *> blocks >>= addTableCaption) , ("tabular*", env "tabular" $ simpTable True) @@ -1044,6 +1054,7 @@ environments = M.fromList , ("code", guardEnabled Ext_literate_haskell *> (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code")) + , ("comment", mempty <$ verbEnv "comment") , ("verbatim", codeBlock <$> verbEnv "verbatim") , ("Verbatim", do options <- option [] keyvals let kvs = [ (if k == "firstnumber" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ebca7e83d..fd16a5f75 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -39,6 +39,7 @@ import Data.Ord ( comparing ) import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition +import Text.Pandoc.Emoji (emojis) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -47,7 +48,7 @@ import qualified Data.HashMap.Strict as H import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.Vector as V -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.XML (fromEntities) @@ -55,8 +56,6 @@ import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) -import Data.Monoid (mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>)) import Control.Monad import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup @@ -64,6 +63,7 @@ import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error type MarkdownParser = Parser [Char] ParserState @@ -328,23 +328,22 @@ stopLine = try $ (string "---" <|> string "...") >> blankline >> return () mmdTitleBlock :: MarkdownParser () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block - kvPairs <- many1 kvPair + firstPair <- kvPair False + restPairs <- many (kvPair True) + let kvPairs = firstPair : restPairs blanklines updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: MarkdownParser (String, MetaValue) -kvPair = try $ do +kvPair :: Bool -> MarkdownParser (String, MetaValue) +kvPair allowEmpty = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - skipMany1 spaceNoNewline - val <- manyTill anyChar + val <- trim <$> manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ not . null . trim $ val + guard $ allowEmpty || not (null val) let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val + let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val return (key',val') - where - spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -506,9 +505,15 @@ block = do header :: MarkdownParser (F Blocks) header = setextHeader <|> atxHeader <?> "header" +atxChar :: MarkdownParser Char +atxChar = do + exts <- getOption readerExtensions + return $ if Set.member Ext_literate_haskell exts + then '=' else '#' + atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do - level <- many1 (char '#') >>= return . length + level <- atxChar >>= many1 . char >>= return . length notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces @@ -524,7 +529,7 @@ atxClosing :: MarkdownParser Attr atxClosing = try $ do attr' <- option nullAttr (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) - skipMany (char '#') + skipMany . char =<< atxChar skipSpaces attr <- option attr' (guardEnabled Ext_header_attributes >> attributes) @@ -636,7 +641,11 @@ keyValAttr = try $ do val <- enclosed (char '"') (char '"') litChar <|> enclosed (char '\'') (char '\'') litChar <|> many (escapedChar' <|> noneOf " \t\n\r}") - return $ \(id',cs,kvs) -> (id',cs,kvs ++ [(key,val)]) + return $ \(id',cs,kvs) -> + case key of + "id" -> (val,cs,kvs) + "class" -> (id',cs ++ words val,kvs) + _ -> (id',cs,kvs ++ [(key,val)]) specialAttr :: MarkdownParser (Attr -> Attr) specialAttr = do @@ -1316,7 +1325,7 @@ removeOneLeadingSpace xs = gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines -pipeBreak :: MarkdownParser [Alignment] +pipeBreak :: MarkdownParser ([Alignment], [Int]) pipeBreak = try $ do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1326,14 +1335,22 @@ pipeBreak = try $ do guard $ not (null rest && not openPipe) optional (char '|') blankline - return (first:rest) + return $ unzip (first:rest) pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do - (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak - lines' <- sequence <$> many pipeTableRow - let widths = replicate (length aligns) 0.0 - return $ (aligns, widths, heads, lines') + nonindentSpaces + lookAhead nonspaceChar + (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak + (lines', rawRows) <- unzip <$> many (withRaw pipeTableRow) + let maxlength = maximum $ map length rawRows + numColumns <- getOption readerColumns + let widths = if maxlength > numColumns + then map (\len -> + fromIntegral (len + 1) / fromIntegral numColumns) + seplengths + else replicate (length aligns) 0.0 + return $ (aligns, widths, heads, sequence lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1343,7 +1360,7 @@ sepPipe = try $ do -- parse a row, also returning probable alignments for org-table cells pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = do - nonindentSpaces + skipMany spaceChar openPipe <- (True <$ char '|') <|> return False let cell = mconcat <$> many (notFollowedBy (blankline <|> char '|') >> inline) @@ -1362,19 +1379,20 @@ pipeTableRow = do ils' | B.isNull ils' -> mempty | otherwise -> B.plain $ ils') cells' -pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') - many1 (char '-') + pipe <- many1 (char '-') right <- optionMaybe (char ':') skipMany spaceChar + let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right return $ - case (left,right) of - (Nothing,Nothing) -> AlignDefault - (Just _,Nothing) -> AlignLeft - (Nothing,Just _) -> AlignRight - (Just _,Just _) -> AlignCenter + ((case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter), len) -- Succeed only if current line contains a pipe. scanForPipe :: Parser [Char] st () @@ -1453,6 +1471,7 @@ inline = choice [ whitespace , exampleRef , smart , return . B.singleton <$> charRef + , emoji , symbol , ltSign ] <?> "inline" @@ -1652,7 +1671,7 @@ endline = try $ do notFollowedBy (inList >> listStart) guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart - guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header + guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) notFollowedByHtmlCloser @@ -1705,16 +1724,16 @@ link = try $ do setState $ st{ stateAllowLinks = True } regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) -regLink :: (String -> String -> Attr -> Inlines -> Inlines) +regLink :: (Attr -> String -> String -> Inlines -> Inlines) -> F Inlines -> MarkdownParser (F Inlines) regLink constructor lab = try $ do (src, tit) <- source attr <- option nullAttr $ guardEnabled Ext_common_link_attributes >> attributes - return $ constructor src tit attr <$> lab + return $ constructor attr src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: (String -> String -> Attr -> Inlines -> Inlines) +referenceLink :: (Attr -> String -> String -> Inlines -> Inlines) -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1743,10 +1762,10 @@ referenceLink constructor (lab, raw) = do then do headerKeys <- asksF stateHeaderKeys case M.lookup key headerKeys of - Just ((src, tit), _) -> constructor src tit nullAttr <$> lab + Just ((src, tit), _) -> constructor nullAttr src tit <$> lab Nothing -> makeFallback else makeFallback - Just ((src,tit), attr) -> constructor src tit attr <$> lab + Just ((src,tit), attr) -> constructor attr src tit <$> lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1780,9 +1799,9 @@ image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor src = case takeExtension src of - "" -> B.imageWith (addExtension src defaultExt) - _ -> B.imageWith src + let constructor attr' src = case takeExtension src of + "" -> B.imageWith attr' (addExtension src defaultExt) + _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) note :: MarkdownParser (F Inlines) @@ -1886,6 +1905,21 @@ rawHtmlInline = do else not . isTextTag return $ return $ B.rawInline "html" result +-- Emoji + +emojiChars :: [Char] +emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] + +emoji :: MarkdownParser (F Inlines) +emoji = try $ do + guardEnabled Ext_emoji + char ':' + emojikey <- many1 (oneOf emojiChars) + char ':' + case M.lookup emojikey emojis of + Just s -> return (return (B.str s)) + Nothing -> mzero + -- Citations cite :: MarkdownParser (F Inlines) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 6f7da2586..24b3f5c7e 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -38,15 +38,14 @@ module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Options import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) import Text.Pandoc.Parsing hiding ( nested ) import Text.Pandoc.Walk ( walk ) import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim ) -import Data.Monoid (mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad import Data.List (intersperse, intercalate, isPrefixOf ) import Text.HTML.TagSoup @@ -252,8 +251,8 @@ parseAttr = try $ do skipMany spaceChar k <- many1 letter char '=' - char '"' - v <- many1Till (satisfy (/='\n')) (char '"') + v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) + <|> many1 nonspaceChar return (k,v) tableStart :: MWParser () @@ -588,7 +587,7 @@ image = try $ do let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.imageWith fname ("fig:" ++ stringify caption) attr caption + return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption imageOption :: MWParser String imageOption = try $ char '|' *> opt diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 94ea9e3a2..4ec164e19 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -34,7 +34,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) import Text.Pandoc.Error -import Control.Applicative -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 19ddba36b..b2e5f2e67 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -9,9 +9,7 @@ import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.XML.Light import Text.Pandoc.Compat.TagSoupEntity (lookupEntity) import Data.Generics -import Data.Monoid import Control.Monad.State -import Control.Applicative ((<$>), (<$)) import Data.Default import Text.Pandoc.Compat.Except import Text.Pandoc.Error diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 1c8ec51bc..a925c1d84 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -36,7 +36,6 @@ import Codec.Archive.Zip import qualified Text.XML.Light as XML import qualified Data.ByteString.Lazy as B -import Data.Monoid ( mempty ) import Text.Pandoc.Definition import Text.Pandoc.Error diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs index 310ca028e..30f96c557 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -44,9 +44,9 @@ import qualified Control.Category as Cat import Control.Arrow import Control.Monad -import Data.Monoid import Data.Foldable +import Text.Pandoc.Compat.Monoid import Text.Pandoc.Readers.Odt.Arrows.Utils import Text.Pandoc.Readers.Odt.Generic.Fallible diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs index 9710973b3..8c9ee0539 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -42,12 +42,11 @@ module Text.Pandoc.Readers.Odt.Arrows.Utils where import Control.Arrow import Control.Monad ( join, MonadPlus(..) ) -import Data.Monoid import qualified Data.Foldable as F import Text.Pandoc.Readers.Odt.Generic.Fallible import Text.Pandoc.Readers.Odt.Generic.Utils - +import Text.Pandoc.Compat.Monoid and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') and2 = (&&&) @@ -130,24 +129,23 @@ joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z joinOn = arr.uncurry -- | Applies a function to the uncurried result-pair of an arrow-application. --- (The §-symbol was chosen to evoke an association with pairs through the --- shared first character) -(>>§) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d -a >>§ f = a >>^ uncurry f +-- (The %-symbol was chosen to evoke an association with pairs.) +(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d +a >>% f = a >>^ uncurry f --- | '(>>§)' with its arguments flipped -(§<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d -(§<<) = flip (>>§) +-- | '(>>%)' with its arguments flipped +(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d +(%<<) = flip (>>%) -- | Precomposition with an uncurried function -(§>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r -f §>> a = uncurry f ^>> a +(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r +f %>> a = uncurry f ^>> a -- | Precomposition with an uncurried function (right to left variant) -(<<§) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r -(<<§) = flip (§>>) +(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r +(<<%) = flip (%>>) -infixr 2 >>§, §<<, §>>, <<§ +infixr 2 >>%, %<<, %>>, <<% -- | Duplicate a value and apply an arrow to the second instance. @@ -272,7 +270,7 @@ newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where mempty = CoEval $ returnV mempty - (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>§ mappend + (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend -- | Evaluates a collection of arrows in a parallel fashion. -- @@ -434,29 +432,29 @@ a ^>>?^? f = a ^>> Left ^|||^ f a >>?! f = a >>> right f --- -(>>?§) :: (ArrowChoice a, Monoid f) +(>>?%) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f (b,b') -> (b -> b' -> c) -> FallibleArrow a x f c -a >>?§ f = a >>?^ (uncurry f) +a >>?% f = a >>?^ (uncurry f) --- -(^>>?§) :: (ArrowChoice a, Monoid f) +(^>>?%) :: (ArrowChoice a, Monoid f) => (x -> Either f (b,b')) -> (b -> b' -> c) -> FallibleArrow a x f c -a ^>>?§ f = arr a >>?^ (uncurry f) +a ^>>?% f = arr a >>?^ (uncurry f) --- -(>>?§?) :: (ArrowChoice a, Monoid f) +(>>?%?) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f (b,b') -> (b -> b' -> (Either f c)) -> FallibleArrow a x f c -a >>?§? f = a >>?^? (uncurry f) +a >>?%? f = a >>?^? (uncurry f) infixr 1 >>?, >>?^, >>?^? infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! -infixr 1 >>?§, ^>>?§, >>?§? +infixr 1 >>?%, ^>>?%, >>?%? -- | Keep values that are Right, replace Left values by a constant. ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 9bb585b8e..1f1c57646 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -44,7 +44,6 @@ import Control.Applicative hiding ( liftA, liftA2, liftA3 ) import qualified Data.Map as M import Data.List ( find ) -import Data.Monoid import Data.Maybe import qualified Text.XML.Light as XML @@ -146,7 +145,7 @@ type OdtReaderSafe a b = XMLReaderSafe ReaderState a b fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b fromStyles f = keepingTheValue (getExtraState >>^ styleSet) - >>§ f + >>% f -- getStyleByName :: OdtReader StyleName Style @@ -163,7 +162,7 @@ lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice -- switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle) switchCurrentListStyle = keepingTheValue getExtraState - >>§ swapCurrentListStyle + >>% swapCurrentListStyle >>> first setExtraState >>^ snd @@ -171,7 +170,7 @@ switchCurrentListStyle = keepingTheValue getExtraState pushStyle :: OdtReaderSafe Style Style pushStyle = keepingTheValue ( ( keepingTheValue getExtraState - >>§ pushStyle' + >>% pushStyle' ) >>> setExtraState ) @@ -471,7 +470,7 @@ matchingElement :: (Monoid e) matchingElement ns name reader = (ns, name, asResultAccumulator reader) where asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m) - asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>§ (<>) + asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% (<>) -- matchChildContent' :: (Monoid result) @@ -498,14 +497,14 @@ matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback -- -- | Open Document allows several consecutive spaces if they are marked up read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines -read_plain_text = fst ^&&& read_plain_text' >>§ recover +read_plain_text = fst ^&&& read_plain_text' >>% recover where -- fallible version read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines read_plain_text' = ( second ( arr extractText ) >>^ spreadChoice >>?! second text ) - >>?§ (<>) + >>?% (<>) -- extractText :: XML.Content -> Fallible String extractText (XML.Text cData) = succeedWith (XML.cdData cData) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs index 5922164c9..d0fdc228f 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -41,9 +41,8 @@ module Text.Pandoc.Readers.Odt.Generic.Fallible where import Control.Applicative import Control.Monad - +import Text.Pandoc.Compat.Monoid ((<>)) import qualified Data.Foldable as F -import Data.Monoid -- | Default for now. Will probably become a class at some point. type Failure = () diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index ec7e0ea5e..8c03d1a09 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -123,7 +123,6 @@ import Control.Arrow import qualified Data.Map as M import qualified Data.Foldable as F import Data.Default -import Data.Monoid ( Monoid ) import Data.Maybe import qualified Text.XML.Light as XML @@ -332,7 +331,7 @@ convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA where setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v modifyWithA = keepingTheValue (moreState ^>> a) - >>^ spreadChoice >>?§ flip replaceExtraState + >>^ spreadChoice >>?% flip replaceExtraState -- | First sets the extra state to the new value. Then produces a new -- extra state with a converter that uses the new state. Finally, the @@ -414,14 +413,14 @@ elemName :: (NameSpaceID nsID) -> XMLConverter nsID extraState x XML.QName elemName nsID name = lookupNSiri nsID &&& lookupNSprefix nsID - >>§ XML.QName name + >>% XML.QName name -- | Checks if a given element matches both a specified namespace id -- and a specified element name elemNameIs :: (NameSpaceID nsID) => nsID -> ElementName -> XMLConverter nsID extraState XML.Element Bool -elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>§ hasThatName +elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName where hasThatName e iri = let elName = XML.elName e in XML.qName elName == name && XML.qURI elName == iri @@ -462,8 +461,8 @@ currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> (XML.qName >>^ (&&).(== name) ) ^&&&^ (XML.qIRI >>^ (==) ) - ) >>§ (.) - ) &&& lookupNSiri nsID >>§ ($) + ) >>% (.) + ) &&& lookupNSiri nsID >>% ($) -} -- @@ -488,7 +487,7 @@ findChildren :: (NameSpaceID nsID) -> XMLConverter nsID extraState x [XML.Element] findChildren nsID name = elemName nsID name &&& getCurrentElement - >>§ XML.findChildren + >>% XML.findChildren -- filterChildren :: (XML.Element -> Bool) @@ -509,7 +508,7 @@ findChild' :: (NameSpaceID nsID) -> XMLConverter nsID extraState x (Maybe XML.Element) findChild' nsID name = elemName nsID name &&& getCurrentElement - >>§ XML.findChild + >>% XML.findChild -- findChild :: (NameSpaceID nsID) @@ -597,7 +596,7 @@ isThatTheAttrValue :: (NameSpaceID nsID) isThatTheAttrValue nsID attrName = keepingTheValue (findAttr nsID attrName) - >>§ right.(==) + >>% right.(==) -- | Lookup value in a dictionary, fail if no attribute found or value -- not in dictionary @@ -670,7 +669,7 @@ findAttr' :: (NameSpaceID nsID) -> XMLConverter nsID extraState x (Maybe AttributeValue) findAttr' nsID attrName = elemName nsID attrName &&& getCurrentElement - >>§ XML.findAttr + >>% XML.findAttr -- | Return value as string or fail findAttr :: (NameSpaceID nsID) @@ -788,7 +787,7 @@ prepareIteration :: (NameSpaceID nsID) -> XMLConverter nsID extraState b [(b, XML.Element)] prepareIteration nsID name = keepingTheValue (findChildren nsID name) - >>§ distributeValue + >>% distributeValue -- | Applies a converter to every child element of a specific type. -- Collects results in a 'Monoid'. @@ -878,9 +877,9 @@ makeMatcherE nsID name c = ( second ( elemNameIs nsID name >>^ bool Nothing (Just tryC) ) - >>§ (<|>) + >>% (<|>) ) &&&^ snd - where tryC = (fst ^&&& executeThere c >>§ recover) &&&^ snd + where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd -- Helper function: The @c@ is actually a converter that is to be selected by -- matching XML content to the first two parameters. @@ -900,14 +899,14 @@ makeMatcherC nsID name c = ( second ( contentToElem >>^ bool Nothing (Just cWithJump) ) ) - >>§ (<|>) + >>% (<|>) ) &&&^ snd where cWithJump = ( fst ^&&& ( second contentToElem >>> spreadChoice ^>>? executeThere c ) - >>§ recover) + >>% recover) &&&^ snd contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element contentToElem = arr $ \e -> case e of diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs index e28056814..deb009998 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -107,4 +107,4 @@ nsIDs = [ ("http://www.w3.org/1999/xhtml" , NsXHtml ), ("http://www.w3.org/2002/xforms" , NsXForms ), ("http://www.w3.org/1999/xlink" , NsXLink ) - ]
\ No newline at end of file + ] diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 1cf87cc59..96cfed0b3 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -78,7 +78,6 @@ import qualified Data.Map as M import qualified Data.Set as S import Data.List ( unfoldr ) import Data.Default -import Data.Monoid import Data.Maybe import qualified Text.XML.Light as XML @@ -175,7 +174,7 @@ findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch) findPitch = ( lookupAttr NsStyle "font-pitch" `ifFailedDo` findAttr NsStyle "font-name" >>? ( keepingTheValue getExtraState - >>§ M.lookup + >>% M.lookup >>^ maybeToChoice ) ) @@ -362,11 +361,11 @@ instance Read XslUnit where estimateInMillimeter :: Int -> XslUnit -> Int estimateInMillimeter n XslUnitMM = n estimateInMillimeter n XslUnitCM = n * 10 -estimateInMillimeter n XslUnitInch = n * 25 -- * 25.4 -estimateInMillimeter n XslUnitPoints = n `div` 3 -- * 1/72 * 25.4 -estimateInMillimeter n XslUnitPica = n * 4 -- * 12 * 1/72 * 25.4 -estimateInMillimeter n XslUnitPixel = n `div`3 -- * 1/72 * 25.4 -estimateInMillimeter n XslUnitEM = n * 7 -- * 16 * 1/72 * 25.4 +estimateInMillimeter n XslUnitInch = n * 25 -- \* 25.4 +estimateInMillimeter n XslUnitPoints = n `div` 3 -- \* 1/72 * 25.4 +estimateInMillimeter n XslUnitPica = n * 4 -- \* 12 * 1/72 * 25.4 +estimateInMillimeter n XslUnitPixel = n `div`3 -- \* 1/72 * 25.4 +estimateInMillimeter n XslUnitEM = n * 7 -- \* 16 * 1/72 * 25.4 ---- @@ -385,7 +384,7 @@ getListLevelStyle level ListStyle{..} = let (lower , exactHit , _) = M.splitLookup level levelStyles in exactHit <|> fmap fst (M.maxView lower) -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1] - -- ^ simpler, but in general less efficient + -- \^ simpler, but in general less efficient data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType , listItemPrefix :: Maybe String @@ -448,7 +447,7 @@ readAllStyles :: StyleReader _x Styles readAllStyles = ( readFontPitches >>?! ( readAutomaticStyles &&& readStyles )) - >>?§? chooseMax + >>?%? chooseMax -- all top elements are always on the same hierarchy level -- diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 980f63504..3be47cfd4 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -2,7 +2,7 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-} {- -Copyright (C) 2014-2015 Albert Krewinkel <tarleb@moltkeplatz.de> +Copyright (C) 2014-2015 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,19 +21,20 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014 Albert Krewinkel + Copyright : Copyright (C) 2014-2015 Albert Krewinkel License : GNU GPL, version 2 or above - Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de> + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Conversion of org-mode formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Org ( readOrg ) where import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>) - , trimInlines ) +import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), + trimInlines ) import Text.Pandoc.Definition +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF @@ -45,8 +46,6 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Applicative ( Applicative, pure - , (<$>), (<$), (<*>), (<*), (*>) ) import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks, local) @@ -55,7 +54,6 @@ import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust) -import Data.Monoid (Monoid, mconcat, mempty, mappend) import Network.HTTP (urlEncode) import Text.Pandoc.Error @@ -70,6 +68,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) +instance HasIdentifierList OrgParserState where + extractIdentifierList = orgStateIdentifiers + updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) } + +instance HasHeaderMap OrgParserState where + extractHeaderMap = orgStateHeaderMap + updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) } + parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks @@ -135,6 +141,9 @@ data OrgParserState = OrgParserState , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable + , orgStateParserContext :: ParserContext + , orgStateIdentifiers :: [String] + , orgStateHeaderMap :: M.Map Inlines String } instance Default OrgParserLocal where @@ -174,6 +183,9 @@ defaultOrgParserState = OrgParserState , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] + , orgStateParserContext = NullState + , orgStateIdentifiers = [] + , orgStateHeaderMap = M.empty } recordAnchorId :: String -> OrgParser () @@ -282,6 +294,23 @@ blanklines = <* updateLastPreCharPos <* updateLastForbiddenCharPos +-- | Succeeds when we're in list context. +inList :: OrgParser () +inList = do + ctx <- orgStateParserContext <$> getState + guard (ctx == ListItemState) + +-- | Parse in different context +withContext :: ParserContext -- ^ New parser context + -> OrgParser a -- ^ Parser to run in that context + -> OrgParser a +withContext context parser = do + oldContext <- orgStateParserContext <$> getState + updateState $ \s -> s{ orgStateParserContext = context } + result <- parser + updateState $ \s -> s{ orgStateParserContext = oldContext } + return result + -- -- parsing blocks -- @@ -397,7 +426,7 @@ verseBlock blkProp = try $ do ignHeaders content <- rawBlockContent blkProp fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString parseInlines) (lines content) + <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content) exportsCode :: [(String, String)] -> Bool exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs @@ -504,10 +533,16 @@ rundocBlockClass :: String rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgParamValue +blockOption = try $ do + argKey <- orgArgKey + paramValue <- option "yes" orgParamValue + return (argKey, paramValue) inlineBlockOption :: OrgParser (String, String) -inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue +inlineBlockOption = try $ do + argKey <- orgArgKey + paramValue <- option "yes" orgInlineParamValue + return (argKey, paramValue) orgArgKey :: OrgParser String orgArgKey = try $ @@ -516,11 +551,17 @@ orgArgKey = try $ orgParamValue :: OrgParser String orgParamValue = try $ - skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces + skipSpaces + *> notFollowedBy (char ':' ) + *> many1 (noneOf "\t\n\r ") + <* skipSpaces orgInlineParamValue :: OrgParser String orgInlineParamValue = try $ - skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces + skipSpaces + *> notFollowedBy (char ':') + *> many1 (noneOf "\t\n\r ]") + <* skipSpaces orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" @@ -668,7 +709,10 @@ header = try $ do title <- manyTill inline (lookAhead headerEnd) tags <- headerEnd let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags - return $ B.header level <$> inlns + st <- getState + let inlines = runF inlns st + attr <- registerHeader nullAttr inlines + return $ pure (B.headerWith attr level inlines) where tagToInlineF :: String -> F Inlines tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty @@ -687,7 +731,7 @@ headerTags = try $ headerStart :: OrgParser Int headerStart = try $ - (length <$> many1 (char '*')) <* many1 (char ' ') + (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos -- Don't use (or need) the reader wrapper here, we want hline to be @@ -879,9 +923,13 @@ noteBlock = try $ do paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ do ils <- parseInlines - nl <- option False (newline >> return True) - try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >> - return (B.para <$> ils)) + nl <- option False (newline *> return True) + -- Read block as paragraph, except if we are in a list context and the block + -- is directly followed by a list item, in which case the block is read as + -- plain text. + try (guard nl + *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart)) + *> return (B.para <$> ils)) <|> (return (B.plain <$> ils)) inlinesTillNewline :: OrgParser (F Inlines) @@ -946,19 +994,22 @@ definitionListItem :: OrgParser Int -> OrgParser (F (Inlines, [Blocks])) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength - term <- manyTill (noneOf "\n\r") (try $ string "::") + term <- manyTill (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString parseInlines term contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' + where + definitionMarker = + spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline) -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser (F Blocks) -listItem start = try $ do +listItem start = try . withContext ListItemState $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) @@ -1537,8 +1588,11 @@ smart :: OrgParser (F Inlines) smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, dash, ellipses]) - where orgApostrophe = + choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) + where + orgDash = dash <* updatePositions '-' + orgEllipses = ellipses <* updatePositions '.' + orgApostrophe = (char '\'' <|> char '\8217') <* updateLastPreCharPos <* updateLastForbiddenCharPos *> return (B.str "\x2019") @@ -1546,9 +1600,10 @@ smart = do singleQuoted :: OrgParser (F Inlines) singleQuoted = try $ do singleQuoteStart + updatePositions '\'' withQuoteContext InSingleQuote $ fmap B.singleQuoted . trimInlinesF . mconcat <$> - many1Till inline singleQuoteEnd + many1Till inline (singleQuoteEnd <* updatePositions '\'') -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote @@ -1556,6 +1611,7 @@ singleQuoted = try $ do doubleQuoted :: OrgParser (F Inlines) doubleQuoted = try $ do doubleQuoteStart + updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return (fmap B.doubleQuoted . trimInlinesF $ contents)) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4138d65ea..0e5bb2a87 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -44,13 +44,11 @@ import Data.List ( findIndex, intersperse, intercalate, import Data.Maybe (fromMaybe) import qualified Data.Map as M import Text.Printf ( printf ) -import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure) -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import qualified Text.Pandoc.Builder as B -import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) import Data.Char (toLower, isHexDigit, isSpace) - +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error -- | Parse reStructuredText string and return Pandoc document. @@ -614,20 +612,22 @@ directive' = do return mempty -- TODO: --- - Silently ignores illegal fields -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: String -> [(String, String)] -> RSTParser Blocks addNewRole roleString fields = do (role, parentRole) <- parseFromString inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState - let (baseRole, baseFmt, baseAttr) = - maybe (parentRole, Nothing, nullAttr) id $ - M.lookup parentRole customRoles + let getBaseRole (r, f, a) roles = + case M.lookup r roles of + Just (r', f', a') -> getBaseRole (r', f', a') roles + Nothing -> (r, f, a) + (baseRole, baseFmt, baseAttr) = + getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt annotate :: [String] -> [String] annotate = maybe id (:) $ - if parentRole == "code" + if baseRole == "code" then lookup "language" fields else Nothing attr = let (ident, classes, keyValues) = baseAttr @@ -636,12 +636,12 @@ addNewRole roleString fields = do -- warn about syntax we ignore flip mapM_ fields $ \(key, _) -> case key of - "language" -> when (parentRole /= "code") $ addWarning Nothing $ + "language" -> when (baseRole /= "code") $ addWarning Nothing $ "ignoring :language: field because the parent of role :" ++ - role ++ ": is :" ++ parentRole ++ ": not :code:" - "format" -> when (parentRole /= "raw") $ addWarning Nothing $ + role ++ ": is :" ++ baseRole ++ ": not :code:" + "format" -> when (baseRole /= "raw") $ addWarning Nothing $ "ignoring :format: field because the parent of role :" ++ - role ++ ": is :" ++ parentRole ++ ": not :raw:" + role ++ ": is :" ++ baseRole ++ ": not :raw:" _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ ": in definition of role :" ++ role ++ ": in" when (parentRole == "raw" && countKeys "format" > 1) $ @@ -1138,7 +1138,7 @@ referenceLink = try $ do Just val -> return val -- if anonymous link, remove key so it won't be used again when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } - return $ B.linkWith src tit attr label' + return $ B.linkWith attr src tit label' autoURI :: RSTParser Inlines autoURI = do diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 07b414431..fc2bdc069 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -38,8 +38,6 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, macro, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Data.Monoid (Monoid, mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad import Text.Printf (printf) import Debug.Trace (trace) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index ec1da896d..3db01faf4 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -64,9 +64,8 @@ import Text.HTML.TagSoup.Match import Data.List ( intercalate ) import Data.Char ( digitToInt, isUpper) import Control.Monad ( guard, liftM, when ) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Printf -import Control.Applicative ((<$>), (*>), (<*), (<$)) -import Data.Monoid import Debug.Trace (trace) import Text.Pandoc.Error @@ -81,11 +80,12 @@ readTextile opts s = -- | Generate a Pandoc ADT from a textile document parseTextile :: Parser [Char] ParserState Pandoc parseTextile = do - -- textile allows raw HTML and does smart punctuation by default + -- textile allows raw HTML and does smart punctuation by default, + -- but we do not enable smart punctuation unless it is explicitly + -- asked for, for better conversion to other light markup formats oldOpts <- stateOptions `fmap` getState updateState $ \state -> state{ stateOptions = - oldOpts{ readerSmart = True - , readerParseRaw = True + oldOpts{ readerParseRaw = True , readerOldDashes = True } } many blankline @@ -535,6 +535,7 @@ link = try $ do image :: Parser [Char] ParserState Inlines image = try $ do char '!' >> notFollowedBy space + _ <- attributes -- ignore for now, until we have image attributes src <- manyTill anyChar' (lookAhead $ oneOf "!(") alt <- option "" (try $ (char '(' >> manyTill anyChar' (char ')'))) char '!' diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 304d6d4c5..58841f2ce 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -33,17 +33,15 @@ module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags where import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks, (<>) - , trimInlines ) +import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL) import Text.Pandoc.Parsing hiding (space, spaces, uri, macro) -import Control.Applicative ((<$>), (<$), (<*>), (<*), (*>)) import Data.Char (toLower) import Data.List (transpose, intersperse, intercalate) import Data.Maybe (fromMaybe) -import Data.Monoid (Monoid, mconcat, mempty, mappend) --import Network.URI (isURI) -- Not sure whether to use this function import Control.Monad (void, guard, when) import Data.Default @@ -53,7 +51,7 @@ import Text.Pandoc.Error import Data.Time.LocalTime (getZonedTime) import Text.Pandoc.Compat.Directory(getModificationTime) import Data.Time.Format (formatTime) -import Text.Pandoc.Compat.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Time (defaultTimeLocale) import System.IO.Error (catchIOError) type T2T = ParserT String ParserState (Reader T2TMeta) diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index a77127286..390a7a21a 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -46,7 +46,7 @@ import Text.Pandoc.MIME (MimeType) import Text.Pandoc.UTF8 (toString) import Text.Pandoc.Options (WriterOptions(..)) import Data.List (isPrefixOf) -import Control.Applicative +import Control.Applicative ((<|>)) import Text.Parsec (runParserT, ParsecT) import qualified Text.Parsec as P import Control.Monad.Trans (lift) @@ -135,7 +135,7 @@ pCSSUrl media sourceURL d = P.try $ do 'd':'a':'t':'a':':':_ -> return fallback u -> do let url' = if isURI u then u else d </> u enc <- lift $ getDataURI media sourceURL "" url' - return (B.pack enc) + return (B.pack $ "url(" ++ enc ++ ")") getDataURI :: MediaBag -> Maybe String -> MimeType -> String diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index a86e5da95..96dbec6f6 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -92,7 +92,9 @@ module Text.Pandoc.Shared ( -- * Safe read safeRead, -- * Temp directory - withTempDir + withTempDir, + -- * Version + pandocVersion ) where import Text.Pandoc.Definition @@ -106,35 +108,38 @@ import System.Exit (exitWith, ExitCode(..)) import Data.Char ( toLower, isLower, isUpper, isAlpha, isLetter, isDigit, isSpace ) import Data.List ( find, stripPrefix, intercalate ) +import Data.Version ( showVersion ) import qualified Data.Map as M import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI ) import qualified Data.Set as Set import System.Directory -import System.FilePath (joinPath, splitDirectories, pathSeparator, isPathSeparator) +import System.FilePath (splitDirectories, isPathSeparator) +import qualified System.FilePath.Posix as Posix import Text.Pandoc.MIME (MimeType, getMimeType) import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import qualified Control.Exception as E -import Control.Applicative ((<$>)) import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) -import Text.Pandoc.Compat.Locale (defaultTimeLocale) -import Data.Time +import Text.Pandoc.Compat.Time import Data.Time.Clock.POSIX import System.IO (stderr) import System.IO.Temp import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) +import Text.Pandoc.Compat.Monoid ((<>)) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 -import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) import qualified Data.Text as T (toUpper, pack, unpack) import Data.ByteString.Lazy (toChunks, fromChunks) import qualified Data.ByteString.Lazy as BL +import Paths_pandoc (version) + +import Codec.Archive.Zip #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -155,7 +160,6 @@ import Network.HTTP.Client.TLS (tlsManagerSettings) import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType) import Network (withSocketsDo) -import Codec.Archive.Zip #else import Network.URI (parseURI) import Network.HTTP (findHeader, rspBody, @@ -163,6 +167,10 @@ import Network.HTTP (findHeader, rspBody, import Network.Browser (browse, setAllowRedirects, setOutHandler, request) #endif +-- | Version number of pandoc library. +pandocVersion :: String +pandocVersion = showVersion version + -- -- List processing -- @@ -278,9 +286,12 @@ toRomanNumeral x = _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" --- | Escape whitespace in URI. +-- | Escape whitespace and some punctuation characters in URI. escapeURI :: String -> String -escapeURI = escapeURIString (not . isSpace) +escapeURI = escapeURIString (not . needsEscaping) + where needsEscaping c = isSpace c || c `elem` + ['<','>','|','"','{','}','[',']','^', '`'] + -- | Convert tabs to spaces and filter out DOS line endings. -- Tabs will be preserved if tab stop is set to 0. @@ -310,7 +321,12 @@ tabFilter tabStop = normalizeDate :: String -> Maybe String normalizeDate s = fmap (formatTime defaultTimeLocale "%F") (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day) - where parsetimeWith = parseTime defaultTimeLocale + where parsetimeWith = +#if MIN_VERSION_time(1,5,0) + parseTimeM True defaultTimeLocale +#else + parseTime defaultTimeLocale +#endif formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"] @@ -539,6 +555,7 @@ stringify = query go . walk deNote go (Str x) = x go (Code _ x) = x go (Math _ x) = x + go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 go LineBreak = " " go _ = "" deNote (Note _) = Str "" @@ -830,7 +847,7 @@ readDefaultDataFile fname = case lookup (makeCanonical fname) dataFiles of Nothing -> err 97 $ "Could not find data file " ++ fname Just contents -> return contents - where makeCanonical = joinPath . transformPathParts . splitDirectories + where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories transformPathParts = reverse . foldl go [] go as "." = as go (_:as) ".." = as @@ -838,7 +855,6 @@ readDefaultDataFile fname = #else getDataFileName fname' >>= checkExistence >>= BS.readFile where fname' = if fname == "README" then fname else "data" </> fname -#endif checkExistence :: FilePath -> IO FilePath checkExistence fn = do @@ -846,6 +862,7 @@ checkExistence fn = do if exists then return fn else err 97 ("Could not find data file " ++ fn) +#endif -- | Read file from specified user data directory or, if not found there, from -- Cabal data directory. @@ -897,9 +914,9 @@ fetchItem' media sourceURL s = do -- | Read from a URL and return raw data and maybe mime type. openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) openURL u - | Just u' <- stripPrefix "data:" u = - let mime = takeWhile (/=',') u' - contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u' + | Just u'' <- stripPrefix "data:" u = + let mime = takeWhile (/=',') u'' + contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'' in return $ Right (decodeLenient contents, Just mime) #ifdef HTTP_CLIENT | otherwise = withSocketsDo $ E.try $ do @@ -965,14 +982,14 @@ hush (Right x) = Just x -- > collapseFilePath "parent/foo/.." == "parent" -- > collapseFilePath "/parent/foo/../../bar" == "/bar" collapseFilePath :: FilePath -> FilePath -collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories +collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories where go rs "." = rs go r@(p:rs) ".." = case p of ".." -> ("..":r) (checkPathSeperator -> Just True) -> ("..":r) _ -> rs - go _ (checkPathSeperator -> Just True) = [[pathSeparator]] + go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]] go rs x = x:rs isSingleton [] = Nothing isSingleton [x] = Just x diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index b3243d752..a010433fa 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -98,11 +98,10 @@ import Control.Monad (guard, when) import Data.Aeson (ToJSON(..), Value(..)) import qualified Text.Parsec as P import Text.Parsec.Text (Parser) -import Control.Applicative +import Text.Pandoc.Compat.Monoid ((<>)) import qualified Data.Text as T import Data.Text (Text) import Data.Text.Encoding (encodeUtf8) -import Text.Pandoc.Compat.Monoid ((<>), Monoid(..)) import Data.List (intersperse) import System.FilePath ((</>), (<.>)) import qualified Data.Map as M @@ -118,6 +117,7 @@ import Text.Blaze (preEscapedText, Html) import Data.ByteString.Lazy (ByteString, fromChunks) import Text.Pandoc.Shared (readDataFileUTF8, ordNub) import Data.Vector ((!?)) +import Control.Applicative (many, (<|>)) -- | Get default template for the specified writer. getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 4e8c96907..174b00dac 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -51,7 +51,6 @@ import Control.Monad.State import qualified Data.Map as M import Data.Aeson (Value(String), fromJSON, toJSON, Result(..)) import qualified Data.Text as T -import Control.Applicative ((<*), (*>)) data WriterState = WriterState { defListMarker :: String , orderedListLevel :: Int diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 56fcd4b0b..bbc5f7132 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -81,16 +81,21 @@ pandocToConTeXt options (Pandoc meta blocks) = do "subsubsubsection","subsubsubsubsection"]) $ defField "body" main $ defField "number-sections" (writerNumberSections options) - $ defField "mainlang" (maybe "" - (reverse . takeWhile (/=',') . reverse) - (lookup "lang" $ writerVariables options)) $ metadata + let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $ + getField "lang" context) + $ defField "context-dir" (toContextDir $ getField "dir" context) + $ context return $ if writerStandalone options - then renderTemplate' (writerTemplate options) context + then renderTemplate' (writerTemplate options) context' else main --- escape things as needed for ConTeXt +toContextDir :: Maybe String -> String +toContextDir (Just "rtl") = "r2l" +toContextDir (Just "ltr") = "l2r" +toContextDir _ = "" +-- | escape things as needed for ConTeXt escapeCharForConTeXt :: WriterOptions -> Char -> String escapeCharForConTeXt opts ch = let ligatures = writerTeXLigatures opts in @@ -156,13 +161,22 @@ blockToConTeXt (CodeBlock _ str) = -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline blockToConTeXt (RawBlock _ _ ) = return empty -blockToConTeXt (Div (ident,_,_) bs) = do - contents <- blockListToConTeXt bs - if null ident - then return contents - else return $ - ("\\reference" <> brackets (text $ toLabel ident) <> braces empty <> - "%") $$ contents +blockToConTeXt (Div (ident,_,kvs) bs) = do + let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" + let wrapRef txt = if null ident + then txt + else ("\\reference" <> brackets (text $ toLabel ident) <> + braces empty <> "%") $$ txt + wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "righttoleft" + Just "ltr" -> align "lefttoright" + _ -> id + wrapLang txt = case lookup "lang" kvs of + Just lng -> "\\start\\language[" + <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop" + Nothing -> txt + wrapBlank txt = blankline <> txt <> blankline + fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs blockToConTeXt (BulletList lst) = do contents <- mapM listItemToConTeXt lst return $ ("\\startitemize" <> if isTightList lst @@ -358,7 +372,16 @@ inlineToConTeXt (Note contents) = do then text "\\footnote{" <> nest 2 contents' <> char '}' else text "\\startbuffer " <> nest 2 contents' <> text "\\stopbuffer\\footnote{\\getbuffer}" -inlineToConTeXt (Span _ ils) = inlineListToConTeXt ils +inlineToConTeXt (Span (_,_,kvs) ils) = do + let wrapDir txt = case lookup "dir" kvs of + Just "rtl" -> braces $ "\\righttoleft " <> txt + Just "ltr" -> braces $ "\\lefttoright " <> txt + _ -> txt + wrapLang txt = case lookup "lang" kvs of + Just lng -> "\\start\\language[" <> text (fromBcp47' lng) + <> "]" <> txt <> "\\stop " + Nothing -> txt + fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: Attr @@ -385,3 +408,38 @@ sectionHeader (ident,classes,_) hdrLevel lst = do then char '\\' <> chapter <> braces contents else contents <> blankline +fromBcp47' :: String -> String +fromBcp47' = fromBcp47 . splitBy (=='-') + +-- Takes a list of the constituents of a BCP 47 language code +-- and irons out ConTeXt's exceptions +-- https://tools.ietf.org/html/bcp47#section-2.1 +-- http://wiki.contextgarden.net/Language_Codes +fromBcp47 :: [String] -> String +fromBcp47 [] = "" +fromBcp47 ("ar":"SY":_) = "ar-sy" +fromBcp47 ("ar":"IQ":_) = "ar-iq" +fromBcp47 ("ar":"JO":_) = "ar-jo" +fromBcp47 ("ar":"LB":_) = "ar-lb" +fromBcp47 ("ar":"DZ":_) = "ar-dz" +fromBcp47 ("ar":"MA":_) = "ar-ma" +fromBcp47 ("de":"1901":_) = "deo" +fromBcp47 ("de":"DE":_) = "de-de" +fromBcp47 ("de":"AT":_) = "de-at" +fromBcp47 ("de":"CH":_) = "de-ch" +fromBcp47 ("el":"poly":_) = "agr" +fromBcp47 ("en":"US":_) = "en-us" +fromBcp47 ("en":"GB":_) = "en-gb" +fromBcp47 ("grc":_) = "agr" +fromBcp47 x = fromIso $ head x + where + fromIso "cz" = "cs" + fromIso "el" = "gr" + fromIso "eu" = "ba" + fromIso "he" = "il" + fromIso "jp" = "ja" + fromIso "uk" = "ua" + fromIso "vi" = "vn" + fromIso "zh" = "cn" + fromIso l = l + diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 8f2810932..e89828911 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -44,7 +44,6 @@ import Scripting.Lua (LuaState, StackValue, callfunc) import Text.Pandoc.Writers.Shared import qualified Scripting.Lua as Lua import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Monoid import Control.Monad (when) import Control.Exception import qualified Data.Map as M diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index e3444d257..d2c39e3b9 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -39,7 +39,6 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Readers.TeXMath import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf ) import Data.Char ( toLower ) -import Control.Applicative ((<$>)) import Data.Monoid ( Any(..) ) import Text.Pandoc.Highlighting ( languages, languagesByExtension ) import Text.Pandoc.Pretty diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index e9f256210..dd4a4b258 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -35,13 +35,11 @@ import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 import qualified Data.Map as M import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Compat.Monoid ((<>)) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Data.Time.Clock -import Data.Time.Format import System.Environment -import Text.Pandoc.Compat.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Time import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.ImageSize @@ -62,9 +60,10 @@ import Data.Unique (hashUnique, newUnique) import System.Random (randomRIO) import Text.Printf (printf) import qualified Control.Exception as E +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) -import Control.Applicative ((<$>), (<|>), (<*>)) +import Control.Applicative ((<|>)) import Data.Maybe (fromMaybe, mapMaybe, maybeToList) import Data.Char (ord) @@ -181,8 +180,8 @@ renumIds f renumMap = map (renumId f renumMap) -- | Certain characters are invalid in XML even if escaped. -- See #1992 -stripInvalidChars :: Pandoc -> Pandoc -stripInvalidChars = bottomUp (filter isValidChar) +stripInvalidChars :: String -> String +stripInvalidChars = filter isValidChar -- | See XML reference isValidChar :: Char -> Bool @@ -208,10 +207,10 @@ writeDocx :: WriterOptions -- ^ Writer options -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = stripInvalidChars . walk fixDisplayMath $ doc + let doc' = walk fixDisplayMath $ doc username <- lookup "USERNAME" <$> getEnvironment utctime <- getCurrentTime - distArchive <- getDefaultReferenceDocx Nothing + distArchive <- getDefaultReferenceDocx datadir refArchive <- case writerReferenceDocx opts of Just f -> liftM (toArchive . toLazy) $ B.readFile f Nothing -> getDefaultReferenceDocx datadir @@ -973,7 +972,7 @@ formattedString str = do return [ mknode "w:r" [] $ props ++ [ mknode (if inDel then "w:delText" else "w:t") - [("xml:space","preserve")] str ] ] + [("xml:space","preserve")] (stripInvalidChars str) ] ] setFirstPara :: WS () setFirstPara = modify $ \s -> s { stFirstPara = True } @@ -1070,8 +1069,8 @@ inlineToOpenXML opts (Note bs) = do [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker - let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs - insertNoteRef (Para ils : xs) = Para (notemarkerXml : ils) : xs + let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs + insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs oldListLevel <- gets stListLevel oldParaProperties <- gets stParaProperties diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index ebd5f8d70..730b31fe8 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -55,7 +55,6 @@ import Network.URI ( isURI ) import Control.Monad ( zipWithM ) import Control.Monad.State ( modify, State, get, evalState ) import Control.Monad.Reader ( ReaderT, runReaderT, ask, local ) -import Control.Applicative ( (<$>) ) data WriterState = WriterState { stNotes :: Bool -- True if there are notes diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index c3e295c8f..f4989c8ea 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -37,16 +37,14 @@ import System.Environment ( getEnv ) import Text.Printf (printf) import System.FilePath ( takeExtension, takeFileName ) import System.FilePath.Glob ( namesMatching ) +import Network.HTTP ( urlEncode ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.SelfContained ( makeSelfContained ) import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) -import Control.Applicative ((<$>)) import Data.Time.Clock.POSIX ( getPOSIXTime ) -import Data.Time (getCurrentTime,UTCTime, formatTime) -import Text.Pandoc.Compat.Locale ( defaultTimeLocale ) -import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim +import Text.Pandoc.Compat.Time +import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim , normalizeDate, readDataFile, stringify, warn , hierarchicalize, fetchItem' ) import qualified Text.Pandoc.Shared as S (Element(..)) @@ -65,7 +63,7 @@ import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs , strContent, lookupAttr, Node(..), QName(..), parseXML , onlyElems, node, ppElement) import Text.Pandoc.UUID (getRandomUUID) -import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) +import Text.Pandoc.Writers.HTML ( writeHtml ) import Data.Char ( toLower, isDigit, isAlphaNum ) import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import qualified Control.Exception as E @@ -818,7 +816,8 @@ transformTag :: WriterOptions -> Tag String -> IO (Tag String) transformTag opts mediaRef tag@(TagOpen name attr) - | name `elem` ["video", "source", "img", "audio"] = do + | name `elem` ["video", "source", "img", "audio"] && + lookup "data-external" attr == Nothing = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag newsrc <- modifyMediaRef opts mediaRef src @@ -874,10 +873,11 @@ transformInline :: WriterOptions transformInline opts mediaRef (Image attr lab (src,tit)) = do newsrc <- modifyMediaRef opts mediaRef src return $ Image attr lab (newsrc, tit) -transformInline opts _ (x@(Math _ _)) - | WebTeX _ <- writerHTMLMathMethod opts = do - raw <- makeSelfContained opts $ writeHtmlInline opts x - return $ RawInline (Format "html") raw +transformInline opts mediaRef (x@(Math t m)) + | WebTeX url <- writerHTMLMathMethod opts = do + newsrc <- modifyMediaRef opts mediaRef (url ++ urlEncode m) + let mathclass = if t == DisplayMath then "display" else "inline" + return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")] transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw @@ -885,11 +885,6 @@ transformInline opts mediaRef (RawInline fmt raw) return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x -writeHtmlInline :: WriterOptions -> Inline -> String -writeHtmlInline opts z = trimr $ - writeHtmlString opts{ writerStandalone = False } - $ Pandoc nullMeta [Plain [z]] - (!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index ab158b38d..67d398a4d 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -31,6 +31,7 @@ Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options @@ -67,9 +68,7 @@ import Text.XML.Light.Output import Text.XML.Light (unode, elChildren, unqual) import qualified Text.XML.Light as XML import System.FilePath (takeExtension) -import Data.Monoid import Data.Aeson (Value) -import Control.Applicative ((<$>)) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes @@ -195,9 +194,6 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "revealjs-url" ("reveal.js" :: String) $ defField "s5-url" ("s5/default" :: String) $ defField "html5" (writerHtml5 opts) $ - defField "center" (case lookupMeta "center" meta of - Just (MetaBool False) -> False - _ -> True) $ metadata return (thebody, context) @@ -310,11 +306,9 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen $ if titleSlide -- title slides have no content of their own then filter isSec elements - else if slide - then case splitBy isPause elements of - [] -> [] - (x:xs) -> x ++ concatMap inDiv xs - else elements + else case splitBy isPause elements of + [] -> [] + (x:xs) -> x ++ concatMap inDiv xs let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && @@ -471,12 +465,15 @@ blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = do blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents -blockToHtml opts (Div attr@(_,classes,_) bs) = do +blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do let speakerNotes = "notes" `elem` classes -- we don't want incremental output inside speaker notes, see #1394 let opts' = if speakerNotes then opts{ writerIncremental = False } else opts contents <- blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts + let (divtag, classes') = if writerHtml5 opts && "section" `elem` classes + then (H5.section, filter (/= "section") classes) + else (H.div, classes) return $ if speakerNotes then case writerSlideVariant opts of @@ -485,7 +482,7 @@ blockToHtml opts (Div attr@(_,classes,_) bs) = do ! (H5.customAttribute "role" "note") NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty - else addAttrs opts attr $ H.div $ contents' + else addAttrs opts (ident, classes', kvs) $ divtag $ contents' blockToHtml opts (RawBlock f str) | f == Format "html" = return $ preEscapedString str | f == Format "latex" = @@ -565,6 +562,9 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do let attribs = (if startnum /= 1 then [A.start $ toValue startnum] else []) ++ + (if numstyle == Example + then [A.class_ "example"] + else []) ++ (if numstyle /= DefaultStyle then if writerHtml5 opts then [A.type_ $ @@ -615,8 +615,15 @@ blockToHtml opts (Table capt aligns widths headers rows') = do return $ H.thead (nl opts >> contents) >> nl opts body' <- liftM (\x -> H.tbody (nl opts >> mconcat x)) $ zipWithM (tableRowToHtml opts aligns) [1..] rows' - return $ H.table $ nl opts >> captionDoc >> coltags >> head' >> - body' >> nl opts + let tbl = H.table $ + nl opts >> captionDoc >> coltags >> head' >> body' >> nl opts + let totalWidth = sum widths + -- When widths of columns are < 100%, we need to set width for the whole + -- table, or some browsers give us skinny columns with lots of space between: + return $ if totalWidth == 0 || totalWidth == 1 + then tbl + else tbl ! A.style (toValue $ "width:" ++ + show (round (totalWidth * 100) :: Int) ++ "%;") tableRowToHtml :: WriterOptions -> [Alignment] diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index a3188c647..118d42d7d 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -327,8 +327,8 @@ inlineToHaddock _ (RawInline f str) inlineToHaddock _ (LineBreak) = return cr inlineToHaddock _ Space = return space inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst -inlineToHaddock opts (Link _ txt (src, _)) = do - linktext <- inlineListToHaddock opts txt +inlineToHaddock _ (Link _ txt (src, _)) = do + let linktext = text $ escapeString $ stringify txt let useAuto = isURI src && case txt of [Str s] | escapeURI s == src -> True diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 2bbd3b44f..eb6d135ca 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -16,6 +16,7 @@ into InDesign with File -> Place. module Text.Pandoc.Writers.ICML (writeICML) where import Text.Pandoc.Definition import Text.Pandoc.XML +import Text.Pandoc.Readers.TeXMath (texMathToInlines) import Text.Pandoc.Writers.Shared import Text.Pandoc.Shared (splitBy, fetchItem, warn) import Text.Pandoc.Options @@ -24,7 +25,6 @@ import Text.Pandoc.Pretty import Text.Pandoc.ImageSize import Data.List (isPrefixOf, isInfixOf, stripPrefix) import Data.Text as Text (breakOnAll, pack) -import Data.Monoid (mappend) import Control.Monad.State import Network.URI (isURI) import System.FilePath (pathSeparator) @@ -415,7 +415,8 @@ inlineToICML opts style (Cite _ lst) = inlinesToICML opts (citeName:style) lst inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str inlineToICML _ style Space = charStyle style space inlineToICML _ style LineBreak = charStyle style $ text lineSeparator -inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math +inlineToICML opts style (Math mt str) = + cat <$> mapM (inlineToICML opts style) (texMathToInlines mt str) inlineToICML _ _ (RawInline f str) | f == Format "icml" = return $ text str | otherwise = return empty diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 5857723a6..9e15e0be7 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -38,10 +38,11 @@ import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Printf ( printf ) import Network.URI ( isURI, unEscapeString ) -import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse ) +import Data.Aeson (object, (.=)) +import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, nub, nubBy ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) -import Data.Maybe ( fromMaybe ) -import Data.Aeson.Types ( (.:), parseMaybe, withObject ) +import Data.Maybe ( fromMaybe, isJust ) +import qualified Data.Text as T import Control.Applicative ((<|>)) import Control.Monad.State import qualified Text.Parsec as P @@ -121,7 +122,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do Right r -> r Left _ -> "" case lookup "documentclass" (writerVariables options) `mplus` - parseMaybe (withObject "object" (.: "documentclass")) metadata of + fmap stringify (lookupMeta "documentclass" meta) of Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True} | otherwise -> return () Nothing | documentClass `elem` bookClasses @@ -145,11 +146,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta - let (mainlang, otherlang) = - case (reverse . splitBy (==',') . filter (/=' ')) `fmap` - getField "lang" metadata of - Just (m:os) -> (m, reverse os) - _ -> ("", []) + let docLangs = nub $ query (extract "lang") blocks + let hasStringValue x = isJust (getField x metadata :: Maybe String) let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if stBook st @@ -174,8 +172,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "euro" (stUsesEuro st) $ defField "listings" (writerListings options || stLHS st) $ defField "beamer" (writerBeamer options) $ - defField "mainlang" mainlang $ - defField "otherlang" otherlang $ (if stHighlighting st then defField "highlighting-macros" (styleToLaTeX $ writerHighlightStyle options ) @@ -186,9 +182,56 @@ pandocToLaTeX options (Pandoc meta blocks) = do Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ + -- set lang to something so polyglossia/babel is included + defField "lang" (if null docLangs then ""::String else "en") $ + defField "otherlangs" docLangs $ + defField "colorlinks" (any hasStringValue + ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $ + defField "dir" (if (null $ query (extract "dir") blocks) + then ""::String + else "ltr") $ metadata + let toPolyObj lang = object [ "name" .= T.pack name + , "options" .= T.pack opts ] + where + (name, opts) = toPolyglossia lang + let lang = maybe [] (splitBy (=='-')) $ getField "lang" context + otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context + let context' = + defField "babel-lang" (toBabel lang) + $ defField "babel-otherlangs" (map toBabel otherlangs) + $ defField "babel-newcommands" (concatMap (\(poly, babel) -> + -- \textspanish and \textgalician are already used by babel + -- save them as \oritext... and let babel use that + if poly `elem` ["spanish", "galician"] + then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++ + "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++ + "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext" + ++ poly ++ "}}\n" ++ + "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++ + "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" + ++ poly ++ "}{##2}}}\n" + else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" + ++ babel ++ "}{#2}}\n" ++ + "\\newenvironment{" ++ poly ++ "}[1]{\\begin{otherlanguage}{" + ++ babel ++ "}}{\\end{otherlanguage}}\n" + ) + -- eliminate duplicates that have same polyglossia name + $ nubBy (\a b -> fst a == fst b) + -- find polyglossia and babel names of languages used in the document + $ map (\l -> + let lng = splitBy (=='-') l + in (fst $ toPolyglossia lng, toBabel lng) + ) + docLangs ) + $ defField "polyglossia-lang" (toPolyObj lang) + $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs) + $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of + Just "rtl" -> True + _ -> False) + $ context return $ if writerStandalone options - then renderTemplate' template context + then renderTemplate' template context' else main -- | Convert Elements to LaTeX @@ -234,7 +277,7 @@ stringToLaTeX ctx (x:xs) = do '^' -> "\\^{}" ++ rest '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows | otherwise -> "\\textbackslash{}" ++ rest - '|' -> "\\textbar{}" ++ rest + '|' | not isUrl -> "\\textbar{}" ++ rest '<' -> "\\textless{}" ++ rest '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as @@ -292,9 +335,12 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) if writerListings opts then query hasCode elts else []) - let allowframebreaks = "allowframebreaks" `elem` classes + let frameoptions = ["allowdisplaybreaks", "allowframebreaks", + "b", "c", "t", "environment", + "label", "plain", "shrink"] let optionslist = ["fragile" | fragile] ++ - ["allowframebreaks" | allowframebreaks] + [k | k <- classes, k `elem` frameoptions] ++ + [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions] let options = if null optionslist then "" else "[" ++ intercalate "," optionslist ++ "]" @@ -322,34 +368,53 @@ isLineBreakOrSpace _ = False blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc blockToLaTeX Null = return empty -blockToLaTeX (Div (identifier,classes,_) bs) = do +blockToLaTeX (Div (identifier,classes,kvs) bs) = do beamer <- writerBeamer `fmap` gets stOptions ref <- toLabel identifier let linkAnchor = if null identifier then empty - else "\\hyperdef{}" <> braces (text ref) <> - braces ("\\label" <> braces (text ref)) - contents <- blockListToLaTeX bs - if beamer && "notes" `elem` classes -- speaker notes - then return $ "\\note" <> braces contents - else return (linkAnchor $$ contents) + else "\\hypertarget" <> braces (text ref) <> + braces empty + let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + let wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "RTL" + Just "ltr" -> align "LTR" + _ -> id + wrapLang txt = case lookup "lang" kvs of + Just lng -> let (l, o) = toPolyglossiaEnv lng + ops = if null o + then "" + else brackets $ text o + in inCmd "begin" (text l) <> ops + $$ blankline <> txt <> blankline + $$ inCmd "end" (text l) + Nothing -> txt + wrapNotes txt = if beamer && "notes" `elem` classes + then "\\note" <> braces txt -- speaker notes + else linkAnchor $$ txt + fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure blockToLaTeX (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do inNote <- gets stInNote + modify $ \st -> st{ stInMinipage = True, stNotes = [] } capt <- inlineListToLaTeX txt + notes <- gets stNotes + modify $ \st -> st{ stInMinipage = False, stNotes = [] } + -- We can't have footnotes in the list of figures, so remove them: + captForLof <- if null notes + then return empty + else brackets <$> inlineListToLaTeX (walk deNote txt) img <- inlineToLaTeX (Image attr txt (src,tit)) - let (ident, _, _) = attr - idn <- toLabel ident - let label = if null ident - then empty - else "\\label" <> braces (text idn) + let footnotes = notesToLaTeX notes return $ if inNote -- can't have figures in notes then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" else "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - ("\\caption" <> braces capt) $$ label $$ "\\end{figure}" + ("\\caption" <> captForLof <> braces capt) $$ + "\\end{figure}" $$ + footnotes -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do beamer <- writerBeamer `fmap` gets stOptions @@ -378,7 +443,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do ref <- toLabel identifier let linkAnchor = if null identifier then empty - else "\\hyperdef{}" <> braces (text ref) <> + else "\\hypertarget" <> braces (text ref) <> braces ("\\label" <> braces (text ref)) let lhsCodeBlock = do modify $ \s -> s{ stLHS = True } @@ -591,19 +656,21 @@ tableCellToLaTeX header (width, align, blocks) = do return $ ("\\begin{minipage}" <> valign <> braces (text (printf "%.2f\\columnwidth" width)) <> (halign <> "\\strut" <> cr <> cellContents <> cr) <> - "\\strut\\end{minipage}") - $$ case notes of - [] -> empty - ns -> (case length ns of + "\\strut\\end{minipage}") $$ + notesToLaTeX notes + +notesToLaTeX :: [Doc] -> Doc +notesToLaTeX [] = empty +notesToLaTeX ns = (case length ns of n | n > 1 -> "\\addtocounter" <> braces "footnote" <> braces (text $ show $ 1 - n) | otherwise -> empty) - $$ - vcat (intersperse - ("\\addtocounter" <> braces "footnote" <> braces "1") - $ map (\x -> "\\footnotetext" <> braces x) - $ reverse ns) + $$ + vcat (intersperse + ("\\addtocounter" <> braces "footnote" <> braces "1") + $ map (\x -> "\\footnotetext" <> braces x) + $ reverse ns) listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst @@ -665,8 +732,7 @@ sectionHeader unnumbered ref level lst = do let level' = if book || writerChapters opts then level - 1 else level internalLinks <- gets stInternalLinks let refLabel x = (if ref `elem` internalLinks - then text "\\hyperdef" - <> braces empty + then text "\\hypertarget" <> braces lab <> braces x else x) @@ -731,22 +797,29 @@ isQuoted _ = False -- | Convert inline element to LaTeX inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc -inlineToLaTeX (Span (id',classes,_) ils) = do +inlineToLaTeX (Span (id',classes,kvs) ils) = do let noEmph = "csl-no-emph" `elem` classes let noStrong = "csl-no-strong" `elem` classes let noSmallCaps = "csl-no-smallcaps" `elem` classes + let rtl = ("dir","rtl") `elem` kvs + let ltr = ("dir","ltr") `elem` kvs ref <- toLabel id' let linkAnchor = if null id' then empty - else "\\hyperdef{}" <> braces (text ref) <> - braces ("\\label" <> braces (text ref)) + else "\\protect\\hypertarget" <> braces (text ref) <> + braces empty fmap (linkAnchor <>) ((if noEmph then inCmd "textup" else id) . (if noStrong then inCmd "textnormal" else id) . (if noSmallCaps then inCmd "textnormal" else id) . - (if not (noEmph || noStrong || noSmallCaps) - then braces - else id)) `fmap` inlineListToLaTeX ils + (if rtl then inCmd "RL" else id) . + (if ltr then inCmd "LR" else id) . + (case lookup "lang" kvs of + Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng + ops = if null o then "" else brackets (text o) + in \c -> char '\\' <> "text" <> text l <> ops <> braces c + Nothing -> id) + ) `fmap` inlineListToLaTeX ils inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = @@ -831,22 +904,22 @@ inlineToLaTeX Space = return space inlineToLaTeX (Link _ txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt lab <- toLabel ident - return $ text "\\hyperref" <> brackets (text lab) <> braces contents + return $ text "\\protect\\hyperlink" <> braces (text lab) <> braces contents inlineToLaTeX (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) return $ text $ "\\url{" ++ src' ++ "}" [Str x] | Just rest <- stripPrefix "mailto:" src, escapeURI x == rest -> -- email autolink do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) contents <- inlineListToLaTeX txt return $ "\\href" <> braces (text src') <> braces ("\\nolinkurl" <> braces contents) _ -> do contents <- inlineListToLaTeX txt - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) return $ text ("\\href{" ++ src' ++ "}{") <> contents <> char '}' inlineToLaTeX (Image attr _ (source, _)) = do @@ -869,7 +942,7 @@ inlineToLaTeX (Image attr _ (source, _)) = do source' = if isURI source then source else unEscapeString source - source'' <- stringToLaTeX URLString source' + source'' <- stringToLaTeX URLString (escapeURI source') inHeading <- gets stInHeading return $ (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <> @@ -1001,3 +1074,173 @@ citationsToBiblatex _ = return empty getListingsLanguage :: [String] -> Maybe String getListingsLanguage [] = Nothing getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs + +-- Extract a key from divs and spans +extract :: String -> Block -> [String] +extract key (Div attr _) = lookKey key attr +extract key (Plain ils) = concatMap (extractInline key) ils +extract key (Para ils) = concatMap (extractInline key) ils +extract key (Header _ _ ils) = concatMap (extractInline key) ils +extract _ _ = [] + +-- Extract a key from spans +extractInline :: String -> Inline -> [String] +extractInline key (Span attr _) = lookKey key attr +extractInline _ _ = [] + +-- Look up a key in an attribute and give a list of its values +lookKey :: String -> Attr -> [String] +lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs + +-- In environments \Arabic instead of \arabic is used +toPolyglossiaEnv :: String -> (String, String) +toPolyglossiaEnv l = + case toPolyglossia $ (splitBy (=='-')) l of + ("arabic", o) -> ("Arabic", o) + x -> x + +-- Takes a list of the constituents of a BCP 47 language code and +-- converts it to a Polyglossia (language, options) tuple +-- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf +toPolyglossia :: [String] -> (String, String) +toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria") +toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya") +toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco") +toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania") +toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia") +toPolyglossia ("de":"1901":_) = ("german", "spelling=old") +toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old") +toPolyglossia ("de":"AT":_) = ("german", "variant=austrian") +toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old") +toPolyglossia ("de":"CH":_) = ("german", "variant=swiss") +toPolyglossia ("de":_) = ("german", "") +toPolyglossia ("dsb":_) = ("lsorbian", "") +toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly") +toPolyglossia ("en":"AU":_) = ("english", "variant=australian") +toPolyglossia ("en":"CA":_) = ("english", "variant=canadian") +toPolyglossia ("en":"GB":_) = ("english", "variant=british") +toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand") +toPolyglossia ("en":"UK":_) = ("english", "variant=british") +toPolyglossia ("en":"US":_) = ("english", "variant=american") +toPolyglossia ("grc":_) = ("greek", "variant=ancient") +toPolyglossia ("hsb":_) = ("usorbian", "") +toPolyglossia ("sl":_) = ("slovenian", "") +toPolyglossia x = (commonFromBcp47 x, "") + +-- Takes a list of the constituents of a BCP 47 language code and +-- converts it to a Babel language string. +-- http://mirrors.concertpass.com/tex-archive/macros/latex/required/babel/base/babel.pdf +-- Note that the PDF unfortunately does not contain a complete list of supported languages. +toBabel :: [String] -> String +toBabel ("de":"1901":_) = "german" +toBabel ("de":"AT":"1901":_) = "austrian" +toBabel ("de":"AT":_) = "naustrian" +toBabel ("de":_) = "ngerman" +toBabel ("dsb":_) = "lowersorbian" +toBabel ("el":"polyton":_) = "polutonikogreek" +toBabel ("en":"AU":_) = "australian" +toBabel ("en":"CA":_) = "canadian" +toBabel ("en":"GB":_) = "british" +toBabel ("en":"NZ":_) = "newzealand" +toBabel ("en":"UK":_) = "british" +toBabel ("en":"US":_) = "american" +toBabel ("fr":"CA":_) = "canadien" +toBabel ("fra":"aca":_) = "acadian" +toBabel ("grc":_) = "polutonikogreek" +toBabel ("hsb":_) = "uppersorbian" +toBabel ("sl":_) = "slovene" +toBabel x = commonFromBcp47 x + +-- Takes a list of the constituents of a BCP 47 language code +-- and converts it to a string shared by Babel and Polyglossia. +-- https://tools.ietf.org/html/bcp47#section-2.1 +commonFromBcp47 :: [String] -> String +commonFromBcp47 [] = "" +commonFromBcp47 ("pt":"BR":_) = "brazilian" +commonFromBcp47 x = fromIso $ head x + where + fromIso "af" = "afrikaans" + fromIso "am" = "amharic" + fromIso "ar" = "arabic" + fromIso "ast" = "asturian" + fromIso "bg" = "bulgarian" + fromIso "bn" = "bengali" + fromIso "bo" = "tibetan" + fromIso "br" = "breton" + fromIso "ca" = "catalan" + fromIso "cy" = "welsh" + fromIso "cz" = "czech" + fromIso "cop" = "coptic" + fromIso "da" = "danish" + fromIso "dv" = "divehi" + fromIso "el" = "greek" + fromIso "en" = "english" + fromIso "eo" = "esperanto" + fromIso "es" = "spanish" + fromIso "et" = "estonian" + fromIso "eu" = "basque" + fromIso "fa" = "farsi" + fromIso "fi" = "finnish" + fromIso "fr" = "french" + fromIso "fur" = "friulan" + fromIso "ga" = "irish" + fromIso "gd" = "scottish" + fromIso "gl" = "galician" + fromIso "he" = "hebrew" + fromIso "hi" = "hindi" + fromIso "hr" = "croatian" + fromIso "hy" = "armenian" + fromIso "hu" = "magyar" + fromIso "ia" = "interlingua" + fromIso "id" = "indonesian" + fromIso "ie" = "interlingua" + fromIso "is" = "icelandic" + fromIso "it" = "italian" + fromIso "jp" = "japanese" + fromIso "km" = "khmer" + fromIso "kn" = "kannada" + fromIso "ko" = "korean" + fromIso "la" = "latin" + fromIso "lo" = "lao" + fromIso "lt" = "lithuanian" + fromIso "lv" = "latvian" + fromIso "ml" = "malayalam" + fromIso "mn" = "mongolian" + fromIso "mr" = "marathi" + fromIso "nb" = "norsk" + fromIso "nl" = "dutch" + fromIso "nn" = "nynorsk" + fromIso "no" = "norsk" + fromIso "nqo" = "nko" + fromIso "oc" = "occitan" + fromIso "pl" = "polish" + fromIso "pms" = "piedmontese" + fromIso "pt" = "portuguese" + fromIso "rm" = "romansh" + fromIso "ro" = "romanian" + fromIso "ru" = "russian" + fromIso "sa" = "sanskrit" + fromIso "se" = "samin" + fromIso "sk" = "slovak" + fromIso "sq" = "albanian" + fromIso "sr" = "serbian" + fromIso "sv" = "swedish" + fromIso "syr" = "syriac" + fromIso "ta" = "tamil" + fromIso "te" = "telugu" + fromIso "th" = "thai" + fromIso "tk" = "turkmen" + fromIso "tr" = "turkish" + fromIso "uk" = "ukrainian" + fromIso "ur" = "urdu" + fromIso "vi" = "vietnamese" + fromIso _ = "" + +deNote :: Inline -> Inline +deNote (Note _) = RawInline (Format "latex") "" +deNote x = x diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 71fd145e2..b8b1c1fdd 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -85,6 +85,8 @@ pandocToMan opts (Pandoc meta blocks) = do let context = defField "body" main $ setFieldsFromTitle $ defField "has-tables" hasTables + $ defField "hyphenate" True + $ defField "pandoc-version" pandocVersion $ metadata if writerStandalone opts then return $ renderTemplate' (writerTemplate opts) context diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 019a0e272..898e6c32d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -40,7 +40,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) import Data.Maybe (fromMaybe) import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy ) -import Data.Char ( isSpace, isPunctuation ) +import Data.Char ( isSpace, isPunctuation, ord, chr ) import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.State @@ -260,10 +260,13 @@ tableOfContents opts headers = -- | Converts an Element to a list item for a table of contents, elementToListItem :: WriterOptions -> Element -> [Block] -elementToListItem opts (Sec lev _ _ headerText subsecs) - = Plain headerText : +elementToListItem opts (Sec lev _nums (ident,_,_) headerText subsecs) + = Plain headerLink : [ BulletList (map (elementToListItem opts) subsecs) | not (null subsecs) && lev < writerTOCDepth opts ] + where headerLink = if null ident + then headerText + else [Link nullAttr headerText ('#':ident, "")] elementToListItem _ (Blk _) = [] attrsToMarkdown :: Attr -> Doc @@ -780,14 +783,25 @@ inlineToMarkdown opts (Superscript lst) = do then "^" <> contents <> "^" else if isEnabled Ext_raw_html opts then "<sup>" <> contents <> "</sup>" - else contents + else case (render Nothing contents) of + ds | all (\d -> d >= '0' && d <= '9') ds + -> text (map toSuperscript ds) + _ -> contents + where toSuperscript '1' = '\x00B9' + toSuperscript '2' = '\x00B2' + toSuperscript '3' = '\x00B3' + toSuperscript c = chr (0x2070 + (ord c - 48)) inlineToMarkdown opts (Subscript lst) = do contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_subscript opts then "~" <> contents <> "~" else if isEnabled Ext_raw_html opts then "<sub>" <> contents <> "</sub>" - else contents + else case (render Nothing contents) of + ds | all (\d -> d >= '0' && d <= '9') ds + -> text (map toSubscript ds) + _ -> contents + where toSubscript c = chr (0x2080 + (ord c - 48)) inlineToMarkdown opts (SmallCaps lst) = do plain <- gets stPlain if not plain && diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index f342dc4f5..2343ff1a8 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -63,6 +63,8 @@ prettyBlock (Table caption aligns widths header rows) = prettyRow header $$ prettyList (map prettyRow rows) where prettyRow cols = prettyList (map (prettyList . map prettyBlock) cols) +prettyBlock (Div attr blocks) = + text ("Div " <> show attr) $$ prettyList (map prettyBlock blocks) prettyBlock block = text $ show block -- | Prettyprint Pandoc document. diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index f7df74246..835e79ce7 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -37,7 +37,6 @@ import Text.TeXMath import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip -import Control.Applicative ((<$>)) import Text.Pandoc.Options ( WriterOptions(..) ) import Text.Pandoc.Shared ( stringify, fetchItem', warn, getDefaultReferenceODT ) diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index c7563d751..519136861 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {- Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> @@ -37,8 +38,7 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.HTML (writeHtmlString) import Text.Pandoc.Writers.Markdown (writeMarkdown) import Text.Pandoc.Pretty -import Data.Time -import Text.Pandoc.Compat.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Time import qualified Text.Pandoc.Builder as B -- | Convert Pandoc document to string in OPML format. @@ -69,8 +69,13 @@ showDateTimeRFC822 :: UTCTime -> String showDateTimeRFC822 = formatTime defaultTimeLocale "%a, %d %b %Y %X %Z" convertDate :: [Inline] -> String -convertDate ils = maybe "" showDateTimeRFC822 - $ parseTime defaultTimeLocale "%F" =<< (normalizeDate $ stringify ils) +convertDate ils = maybe "" showDateTimeRFC822 $ +#if MIN_VERSION_time(1,5,0) + parseTimeM True +#else + parseTime +#endif + defaultTimeLocale "%F" =<< (normalizeDate $ stringify ils) -- | Convert an Element to OPML. elementToOPML :: WriterOptions -> Element -> Doc diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 7b964e2d2..dad6b431e 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -37,7 +37,6 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Pretty import Text.Printf ( printf ) -import Control.Applicative ( (<$>) ) import Control.Arrow ( (***), (>>>) ) import Control.Monad.State hiding ( when ) import Data.Char (chr, isDigit) @@ -192,8 +191,7 @@ writeOpenDocument opts (Pandoc meta blocks) = listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l) listStyles = map listStyle (stListStyles s) - automaticStyles = inTagsIndented "office:automatic-styles" $ vcat $ - reverse $ styles ++ listStyles + automaticStyles = vcat $ reverse $ styles ++ listStyles context = defField "body" body $ defField "automatic-styles" (render' automaticStyles) $ metadata diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 24da7b9e1..75967fa2a 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -40,7 +40,6 @@ import Text.Pandoc.Pretty import Text.Pandoc.Templates (renderTemplate') import Data.List ( intersect, intersperse, transpose ) import Control.Monad.State -import Control.Applicative ( (<$>) ) data WriterState = WriterState { stNotes :: [[Block]] diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index a65d6f8bb..94c54c250 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -43,7 +43,6 @@ import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose ) import Network.URI (isURI) import Text.Pandoc.Pretty import Control.Monad.State -import Control.Applicative ( (<$>) ) import Data.Char (isSpace, toLower) type Refs = [([Inline], Target)] @@ -82,7 +81,9 @@ pandocToRST (Pandoc meta blocks) = do (fmap (render colwidth) . blockListToRST) (fmap (trimr . render colwidth) . inlineListToRST) $ deleteMeta "title" $ deleteMeta "subtitle" meta - body <- blockListToRST' True $ normalizeHeadings 1 blocks + body <- blockListToRST' True $ if writerStandalone opts + then normalizeHeadings 1 blocks + else blocks notes <- liftM (reverse . stNotes) get >>= notesToRST -- note that the notes may contain refs, so we do them first refs <- liftM (reverse . stLinks) get >>= refsToRST @@ -102,7 +103,8 @@ pandocToRST (Pandoc meta blocks) = do then return $ renderTemplate' (writerTemplate opts) context else return main where - normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' + normalizeHeadings lev (Header l a i:bs) = + Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' where (cont,bs') = break (headerLtEq l) bs headerLtEq level (Header l' _ _) = l' <= level headerLtEq _ _ = False @@ -344,7 +346,8 @@ blockListToRST = blockListToRST' False -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc inlineListToRST lst = - mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat + mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= + return . hcat where -- remove spaces after displaymath, as they screw up indentation: removeSpaceAfterDisplayMath (Math DisplayMath x : zs) = Math DisplayMath x : dropWhile (==Space) zs @@ -352,8 +355,8 @@ inlineListToRST lst = removeSpaceAfterDisplayMath [] = [] insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed insertBS (x:y:z:zs) - | isComplex y && surroundComplex x z = - x : y : RawInline "rst" "\\ " : insertBS (z:zs) + | isComplex y && (surroundComplex x z) = + x : y : insertBS (z : zs) insertBS (x:y:zs) | isComplex x && not (okAfterComplex y) = x : RawInline "rst" "\\ " : insertBS (y : zs) @@ -394,6 +397,8 @@ inlineListToRST lst = isComplex (Image _ _ _) = True isComplex (Code _ _) = True isComplex (Math _ _) = True + isComplex (Cite _ (x:_)) = isComplex x + isComplex (Span _ (x:_)) = isComplex x isComplex _ = False -- | Convert Pandoc inline element to RST. diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 456bf19c9..1d5734c96 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -40,12 +40,12 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) import Data.List ( intercalate ) import Control.Monad.State -import Control.Applicative ((<$>)) import Data.Char ( isSpace ) data WriterState = WriterState { stNotes :: [String] -- Footnotes , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" + , stStartNum :: Maybe Int -- Start number if first list item , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list } @@ -53,7 +53,8 @@ data WriterState = WriterState { writeTextile :: WriterOptions -> Pandoc -> String writeTextile opts document = evalState (pandocToTextile opts document) - WriterState { stNotes = [], stListLevel = [], stUseTags = False } + WriterState { stNotes = [], stListLevel = [], stStartNum = Nothing, + stUseTags = False } -- | Return Textile representation of document. pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String @@ -220,7 +221,7 @@ blockToTextile opts x@(BulletList items) = do modify $ \s -> s { stListLevel = init (stListLevel s) } return $ vcat contents ++ (if level > 1 then "" else "\n") -blockToTextile opts x@(OrderedList attribs items) = do +blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do oldUseTags <- liftM stUseTags get let useTags = oldUseTags || not (isSimpleList x) if useTags @@ -229,10 +230,14 @@ blockToTextile opts x@(OrderedList attribs items) = do return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ "\n</ol>\n" else do - modify $ \s -> s { stListLevel = stListLevel s ++ "#" } + modify $ \s -> s { stListLevel = stListLevel s ++ "#" + , stStartNum = if start > 1 + then Just start + else Nothing } level <- get >>= return . length . stListLevel contents <- mapM (listItemToTextile opts) items - modify $ \s -> s { stListLevel = init (stListLevel s) } + modify $ \s -> s { stListLevel = init (stListLevel s), + stStartNum = Nothing } return $ vcat contents ++ (if level > 1 then "" else "\n") blockToTextile opts (DefinitionList items) = do @@ -260,8 +265,13 @@ listItemToTextile opts items = do if useTags then return $ "<li>" ++ contents ++ "</li>" else do - marker <- get >>= return . stListLevel - return $ marker ++ " " ++ contents + marker <- gets stListLevel + mbstart <- gets stStartNum + case mbstart of + Just n -> do + modify $ \s -> s{ stStartNum = Nothing } + return $ marker ++ show n ++ " " ++ contents + Nothing -> return $ marker ++ " " ++ contents -- | Convert definition list item (label, list of blocks) to Textile. definitionListItemToTextile :: WriterOptions @@ -278,8 +288,8 @@ isSimpleList :: Block -> Bool isSimpleList x = case x of BulletList items -> all isSimpleListItem items - OrderedList (num, sty, _) items -> all isSimpleListItem items && - num == 1 && sty `elem` [DefaultStyle, Decimal] + OrderedList (_, sty, _) items -> all isSimpleListItem items && + sty `elem` [DefaultStyle, Decimal] _ -> False -- | True if list item can be handled with the simple wiki syntax. False if |