diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 116 |
1 files changed, 86 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 8e3ac3665..3ac677943 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,26 +34,25 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text. Markdown: <http://daringfireball.net/projects/markdown/> -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where -import Control.Monad.Except (throwError) import Control.Monad.Reader import Control.Monad.State import Data.Char (chr, isPunctuation, isSpace, ord) import Data.Default import qualified Data.HashMap.Strict as H +import qualified Data.Map as M import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose) import Data.Maybe (fromMaybe) import Data.Monoid (Any (..)) import Data.Ord (comparing) import qualified Data.Set as Set +import Data.Text (Text) import qualified Data.Text as T import qualified Data.Vector as V import Data.Yaml (Value (Array, Bool, Number, Object, String)) import Network.HTTP (urlEncode) -import Network.URI (isURI) import Text.HTML.TagSoup (Tag (..), isTagText, parseTags) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) @@ -91,6 +90,9 @@ instance Default WriterEnv data WriterState = WriterState { stNotes :: Notes , stRefs :: Refs + , stKeys :: M.Map Key + (M.Map (Target, Attr) Int) + , stLastIdx :: Int , stIds :: Set.Set String , stNoteNum :: Int } @@ -98,12 +100,14 @@ data WriterState = WriterState { stNotes :: Notes instance Default WriterState where def = WriterState{ stNotes = [] , stRefs = [] + , stKeys = M.empty + , stLastIdx = 0 , stIds = Set.empty , stNoteNum = 1 } -- | Convert Pandoc to Markdown. -writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMarkdown opts document = evalMD (pandocToMarkdown opts{ writerWrapText = if isEnabled Ext_hard_line_breaks opts @@ -113,7 +117,7 @@ writeMarkdown opts document = -- | Convert Pandoc to plain text (like markdown, but without links, -- pictures, or inline formatting). -writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m String +writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m Text writePlain opts document = evalMD (pandocToMarkdown opts document) def{ envPlain = True } def @@ -177,15 +181,17 @@ jsonToYaml (Number n) = text $ show n jsonToYaml _ = empty -- | Return markdown representation of document. -pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m String +pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m Text pandocToMarkdown opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing isPlain <- asks envPlain + let render' :: Doc -> Text + render' = render colwidth . chomp metadata <- metaToJSON' - (fmap (render colwidth) . blockListToMarkdown opts) - (fmap (render colwidth) . blockToMarkdown opts . Plain) + (fmap render' . blockListToMarkdown opts) + (fmap render' . blockToMarkdown opts . Plain) meta let title' = maybe empty text $ getField "title" metadata let authors' = maybe [] (map text) $ getField "author" metadata @@ -213,8 +219,6 @@ pandocToMarkdown opts (Pandoc meta blocks) = do else blocks body <- blockListToMarkdown opts blocks' notesAndRefs' <- notesAndRefs opts - let render' :: Doc -> String - render' = render colwidth . chomp let main = render' $ body <> notesAndRefs' let context = defField "toc" (render' toc) $ defField "body" main @@ -241,7 +245,7 @@ keyToMarkdown opts (label', (src, tit), attr) = do else space <> "\"" <> text tit <> "\"" return $ nest 2 $ hang 2 ("[" <> label' <> "]:" <> space) (text src <> tit') - <> linkAttributes opts attr + <+> linkAttributes opts attr -- | Return markdown representation of notes. notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc @@ -471,6 +475,8 @@ blockToMarkdown' opts (Header level attr inlines) = do space <> attrsToMarkdown attr | otherwise -> empty contents <- inlineListToMarkdown opts $ + -- ensure no newlines; see #3736 + walk lineBreakToSpace $ if level == 1 && plain then capitalize inlines else inlines @@ -568,7 +574,7 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do gridTable opts blockListToMarkdown (all null headers) aligns' widths' headers rows | isEnabled Ext_raw_html opts -> fmap (id,) $ - text <$> + (text . T.unpack) <$> (writeHtml5String def $ Pandoc nullMeta [t]) | otherwise -> return $ (id, text "[TABLE]") return $ nst $ tbl $$ caption'' $$ blankline @@ -788,7 +794,7 @@ blockListToMarkdown opts blocks = do isListBlock _ = False commentSep = if isEnabled Ext_raw_html opts then RawBlock "html" "<!-- -->\n" - else RawBlock "markdown" " " + else RawBlock "markdown" " \n" mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat getKey :: Doc -> Key @@ -798,20 +804,49 @@ getKey = toKey . render Nothing -- Prefer label if possible; otherwise, generate a unique key. getReference :: PandocMonad m => Attr -> Doc -> Target -> MD m Doc getReference attr label target = do - st <- get - let keys = map (\(l,_,_) -> getKey l) (stRefs st) - case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of + refs <- gets stRefs + case find (\(_,t,a) -> t == target && a == attr) refs of Just (ref, _, _) -> return ref Nothing -> do - label' <- case getKey label `elem` keys of - True -> -- label is used; generate numerical label - case find (\n -> Key n `notElem` keys) $ - map show [1..(10000 :: Integer)] of - Just x -> return $ text x - Nothing -> throwError $ PandocSomeError "no unique label" - False -> return label - modify (\s -> s{ stRefs = (label', target, attr) : stRefs st }) - return label' + keys <- gets stKeys + case M.lookup (getKey label) keys of + Nothing -> do -- no other refs with this label + (lab', idx) <- if isEmpty label + then do + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + return (text (show i), i) + else return (label, 0) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) idx mempty) + (stKeys s) }) + return lab' + + Just km -> do -- we have refs with this label + case M.lookup (target, attr) km of + Just i -> do + let lab' = label <> if i == 0 + then mempty + else text (show i) + -- make sure it's in stRefs; it may be + -- a duplicate that was printed in a previous + -- block: + when ((lab', target, attr) `notElem` refs) $ + modify (\s -> s{ + stRefs = (lab', target, attr) : refs }) + return lab' + Nothing -> do -- but this one is to a new target + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + let lab' = text (show i) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) i km) + (stKeys s) }) + return lab' -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc @@ -821,7 +856,8 @@ inlineListToMarkdown opts lst = do where go [] = return empty go (i:is) = case i of (Link _ _ _) -> case is of - -- If a link is followed by another link or '[' we don't shortcut + -- If a link is followed by another link, or '[', '(' or ':' + -- then we don't shortcut (Link _ _ _):_ -> unshortcutable Space:(Link _ _ _):_ -> unshortcutable Space:(Str('[':_)):_ -> unshortcutable @@ -831,9 +867,17 @@ inlineListToMarkdown opts lst = do SoftBreak:(Str('[':_)):_ -> unshortcutable SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable SoftBreak:(Cite _ _):_ -> unshortcutable + LineBreak:(Link _ _ _):_ -> unshortcutable + LineBreak:(Str('[':_)):_ -> unshortcutable + LineBreak:(RawInline _ ('[':_)):_ -> unshortcutable + LineBreak:(Cite _ _):_ -> unshortcutable (Cite _ _):_ -> unshortcutable Str ('[':_):_ -> unshortcutable + Str ('(':_):_ -> unshortcutable + Str (':':_):_ -> unshortcutable (RawInline _ ('[':_)):_ -> unshortcutable + (RawInline _ ('(':_)):_ -> unshortcutable + (RawInline _ (':':_)):_ -> unshortcutable (RawInline _ (' ':'[':_)):_ -> unshortcutable _ -> shortcutable _ -> shortcutable @@ -890,12 +934,14 @@ inlineToMarkdown opts (Span attrs ils) = do isEnabled Ext_native_spans opts -> tagWithAttrs "span" attrs <> contents <> text "</span>" | otherwise -> contents +inlineToMarkdown _ (Emph []) = return empty inlineToMarkdown opts (Emph lst) = do plain <- asks envPlain contents <- inlineListToMarkdown opts lst return $ if plain then "_" <> contents <> "_" else "*" <> contents <> "*" +inlineToMarkdown _ (Strong []) = return empty inlineToMarkdown opts (Strong lst) = do plain <- asks envPlain if plain @@ -903,6 +949,7 @@ inlineToMarkdown opts (Strong lst) = do else do contents <- inlineListToMarkdown opts lst return $ "**" <> contents <> "**" +inlineToMarkdown _ (Strikeout []) = return empty inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst return $ if isEnabled Ext_strikeout opts @@ -910,6 +957,7 @@ inlineToMarkdown opts (Strikeout lst) = do else if isEnabled Ext_raw_html opts then "<s>" <> contents <> "</s>" else contents +inlineToMarkdown _ (Superscript []) = return empty inlineToMarkdown opts (Superscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do contents <- inlineListToMarkdown opts lst @@ -922,6 +970,7 @@ inlineToMarkdown opts (Superscript lst) = in case mapM toSuperscript rendered of Just r -> text r Nothing -> text $ "^(" ++ rendered ++ ")" +inlineToMarkdown _ (Subscript []) = return empty inlineToMarkdown opts (Subscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do contents <- inlineListToMarkdown opts lst @@ -1064,7 +1113,8 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) + (text . T.unpack . T.strip) <$> + writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) | otherwise = do plain <- asks envPlain linktext <- inlineListToMarkdown opts txt @@ -1103,7 +1153,8 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [img]]) + (text . T.unpack . T.strip) <$> + writeHtml5String def (Pandoc nullMeta [Plain [img]]) | otherwise = do plain <- asks envPlain let txt = if null alternate || alternate == [Str source] @@ -1154,3 +1205,8 @@ toSubscript c Just $ chr (0x2080 + (ord c - 48)) | isSpace c = Just c | otherwise = Nothing + +lineBreakToSpace :: Inline -> Inline +lineBreakToSpace LineBreak = Space +lineBreakToSpace SoftBreak = Space +lineBreakToSpace x = x |