aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/Markdown.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Writers/Markdown.hs')
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs116
1 files changed, 86 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 8e3ac3665..3ac677943 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -3,7 +3,7 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Markdown
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2017 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -34,26 +34,25 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text.
Markdown: <http://daringfireball.net/projects/markdown/>
-}
module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
-import Control.Monad.Except (throwError)
import Control.Monad.Reader
import Control.Monad.State
import Data.Char (chr, isPunctuation, isSpace, ord)
import Data.Default
import qualified Data.HashMap.Strict as H
+import qualified Data.Map as M
import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose)
import Data.Maybe (fromMaybe)
import Data.Monoid (Any (..))
import Data.Ord (comparing)
import qualified Data.Set as Set
+import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Vector as V
import Data.Yaml (Value (Array, Bool, Number, Object, String))
import Network.HTTP (urlEncode)
-import Network.URI (isURI)
import Text.HTML.TagSoup (Tag (..), isTagText, parseTags)
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
-import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
@@ -91,6 +90,9 @@ instance Default WriterEnv
data WriterState = WriterState { stNotes :: Notes
, stRefs :: Refs
+ , stKeys :: M.Map Key
+ (M.Map (Target, Attr) Int)
+ , stLastIdx :: Int
, stIds :: Set.Set String
, stNoteNum :: Int
}
@@ -98,12 +100,14 @@ data WriterState = WriterState { stNotes :: Notes
instance Default WriterState
where def = WriterState{ stNotes = []
, stRefs = []
+ , stKeys = M.empty
+ , stLastIdx = 0
, stIds = Set.empty
, stNoteNum = 1
}
-- | Convert Pandoc to Markdown.
-writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeMarkdown opts document =
evalMD (pandocToMarkdown opts{
writerWrapText = if isEnabled Ext_hard_line_breaks opts
@@ -113,7 +117,7 @@ writeMarkdown opts document =
-- | Convert Pandoc to plain text (like markdown, but without links,
-- pictures, or inline formatting).
-writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writePlain opts document =
evalMD (pandocToMarkdown opts document) def{ envPlain = True } def
@@ -177,15 +181,17 @@ jsonToYaml (Number n) = text $ show n
jsonToYaml _ = empty
-- | Return markdown representation of document.
-pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m String
+pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m Text
pandocToMarkdown opts (Pandoc meta blocks) = do
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
isPlain <- asks envPlain
+ let render' :: Doc -> Text
+ render' = render colwidth . chomp
metadata <- metaToJSON'
- (fmap (render colwidth) . blockListToMarkdown opts)
- (fmap (render colwidth) . blockToMarkdown opts . Plain)
+ (fmap render' . blockListToMarkdown opts)
+ (fmap render' . blockToMarkdown opts . Plain)
meta
let title' = maybe empty text $ getField "title" metadata
let authors' = maybe [] (map text) $ getField "author" metadata
@@ -213,8 +219,6 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
else blocks
body <- blockListToMarkdown opts blocks'
notesAndRefs' <- notesAndRefs opts
- let render' :: Doc -> String
- render' = render colwidth . chomp
let main = render' $ body <> notesAndRefs'
let context = defField "toc" (render' toc)
$ defField "body" main
@@ -241,7 +245,7 @@ keyToMarkdown opts (label', (src, tit), attr) = do
else space <> "\"" <> text tit <> "\""
return $ nest 2 $ hang 2
("[" <> label' <> "]:" <> space) (text src <> tit')
- <> linkAttributes opts attr
+ <+> linkAttributes opts attr
-- | Return markdown representation of notes.
notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc
@@ -471,6 +475,8 @@ blockToMarkdown' opts (Header level attr inlines) = do
space <> attrsToMarkdown attr
| otherwise -> empty
contents <- inlineListToMarkdown opts $
+ -- ensure no newlines; see #3736
+ walk lineBreakToSpace $
if level == 1 && plain
then capitalize inlines
else inlines
@@ -568,7 +574,7 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do
gridTable opts blockListToMarkdown
(all null headers) aligns' widths' headers rows
| isEnabled Ext_raw_html opts -> fmap (id,) $
- text <$>
+ (text . T.unpack) <$>
(writeHtml5String def $ Pandoc nullMeta [t])
| otherwise -> return $ (id, text "[TABLE]")
return $ nst $ tbl $$ caption'' $$ blankline
@@ -788,7 +794,7 @@ blockListToMarkdown opts blocks = do
isListBlock _ = False
commentSep = if isEnabled Ext_raw_html opts
then RawBlock "html" "<!-- -->\n"
- else RawBlock "markdown" "&nbsp;"
+ else RawBlock "markdown" "&nbsp;\n"
mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat
getKey :: Doc -> Key
@@ -798,20 +804,49 @@ getKey = toKey . render Nothing
-- Prefer label if possible; otherwise, generate a unique key.
getReference :: PandocMonad m => Attr -> Doc -> Target -> MD m Doc
getReference attr label target = do
- st <- get
- let keys = map (\(l,_,_) -> getKey l) (stRefs st)
- case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of
+ refs <- gets stRefs
+ case find (\(_,t,a) -> t == target && a == attr) refs of
Just (ref, _, _) -> return ref
Nothing -> do
- label' <- case getKey label `elem` keys of
- True -> -- label is used; generate numerical label
- case find (\n -> Key n `notElem` keys) $
- map show [1..(10000 :: Integer)] of
- Just x -> return $ text x
- Nothing -> throwError $ PandocSomeError "no unique label"
- False -> return label
- modify (\s -> s{ stRefs = (label', target, attr) : stRefs st })
- return label'
+ keys <- gets stKeys
+ case M.lookup (getKey label) keys of
+ Nothing -> do -- no other refs with this label
+ (lab', idx) <- if isEmpty label
+ then do
+ i <- (+ 1) <$> gets stLastIdx
+ modify $ \s -> s{ stLastIdx = i }
+ return (text (show i), i)
+ else return (label, 0)
+ modify (\s -> s{
+ stRefs = (lab', target, attr) : refs,
+ stKeys = M.insert (getKey label)
+ (M.insert (target, attr) idx mempty)
+ (stKeys s) })
+ return lab'
+
+ Just km -> do -- we have refs with this label
+ case M.lookup (target, attr) km of
+ Just i -> do
+ let lab' = label <> if i == 0
+ then mempty
+ else text (show i)
+ -- make sure it's in stRefs; it may be
+ -- a duplicate that was printed in a previous
+ -- block:
+ when ((lab', target, attr) `notElem` refs) $
+ modify (\s -> s{
+ stRefs = (lab', target, attr) : refs })
+ return lab'
+ Nothing -> do -- but this one is to a new target
+ i <- (+ 1) <$> gets stLastIdx
+ modify $ \s -> s{ stLastIdx = i }
+ let lab' = text (show i)
+ modify (\s -> s{
+ stRefs = (lab', target, attr) : refs,
+ stKeys = M.insert (getKey label)
+ (M.insert (target, attr) i km)
+ (stKeys s) })
+ return lab'
-- | Convert list of Pandoc inline elements to markdown.
inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc
@@ -821,7 +856,8 @@ inlineListToMarkdown opts lst = do
where go [] = return empty
go (i:is) = case i of
(Link _ _ _) -> case is of
- -- If a link is followed by another link or '[' we don't shortcut
+ -- If a link is followed by another link, or '[', '(' or ':'
+ -- then we don't shortcut
(Link _ _ _):_ -> unshortcutable
Space:(Link _ _ _):_ -> unshortcutable
Space:(Str('[':_)):_ -> unshortcutable
@@ -831,9 +867,17 @@ inlineListToMarkdown opts lst = do
SoftBreak:(Str('[':_)):_ -> unshortcutable
SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable
SoftBreak:(Cite _ _):_ -> unshortcutable
+ LineBreak:(Link _ _ _):_ -> unshortcutable
+ LineBreak:(Str('[':_)):_ -> unshortcutable
+ LineBreak:(RawInline _ ('[':_)):_ -> unshortcutable
+ LineBreak:(Cite _ _):_ -> unshortcutable
(Cite _ _):_ -> unshortcutable
Str ('[':_):_ -> unshortcutable
+ Str ('(':_):_ -> unshortcutable
+ Str (':':_):_ -> unshortcutable
(RawInline _ ('[':_)):_ -> unshortcutable
+ (RawInline _ ('(':_)):_ -> unshortcutable
+ (RawInline _ (':':_)):_ -> unshortcutable
(RawInline _ (' ':'[':_)):_ -> unshortcutable
_ -> shortcutable
_ -> shortcutable
@@ -890,12 +934,14 @@ inlineToMarkdown opts (Span attrs ils) = do
isEnabled Ext_native_spans opts ->
tagWithAttrs "span" attrs <> contents <> text "</span>"
| otherwise -> contents
+inlineToMarkdown _ (Emph []) = return empty
inlineToMarkdown opts (Emph lst) = do
plain <- asks envPlain
contents <- inlineListToMarkdown opts lst
return $ if plain
then "_" <> contents <> "_"
else "*" <> contents <> "*"
+inlineToMarkdown _ (Strong []) = return empty
inlineToMarkdown opts (Strong lst) = do
plain <- asks envPlain
if plain
@@ -903,6 +949,7 @@ inlineToMarkdown opts (Strong lst) = do
else do
contents <- inlineListToMarkdown opts lst
return $ "**" <> contents <> "**"
+inlineToMarkdown _ (Strikeout []) = return empty
inlineToMarkdown opts (Strikeout lst) = do
contents <- inlineListToMarkdown opts lst
return $ if isEnabled Ext_strikeout opts
@@ -910,6 +957,7 @@ inlineToMarkdown opts (Strikeout lst) = do
else if isEnabled Ext_raw_html opts
then "<s>" <> contents <> "</s>"
else contents
+inlineToMarkdown _ (Superscript []) = return empty
inlineToMarkdown opts (Superscript lst) =
local (\env -> env {envEscapeSpaces = True}) $ do
contents <- inlineListToMarkdown opts lst
@@ -922,6 +970,7 @@ inlineToMarkdown opts (Superscript lst) =
in case mapM toSuperscript rendered of
Just r -> text r
Nothing -> text $ "^(" ++ rendered ++ ")"
+inlineToMarkdown _ (Subscript []) = return empty
inlineToMarkdown opts (Subscript lst) =
local (\env -> env {envEscapeSpaces = True}) $ do
contents <- inlineListToMarkdown opts lst
@@ -1064,7 +1113,8 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit))
| isEnabled Ext_raw_html opts &&
not (isEnabled Ext_link_attributes opts) &&
attr /= nullAttr = -- use raw HTML
- (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [lnk]])
+ (text . T.unpack . T.strip) <$>
+ writeHtml5String def (Pandoc nullMeta [Plain [lnk]])
| otherwise = do
plain <- asks envPlain
linktext <- inlineListToMarkdown opts txt
@@ -1103,7 +1153,8 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit))
| isEnabled Ext_raw_html opts &&
not (isEnabled Ext_link_attributes opts) &&
attr /= nullAttr = -- use raw HTML
- (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [img]])
+ (text . T.unpack . T.strip) <$>
+ writeHtml5String def (Pandoc nullMeta [Plain [img]])
| otherwise = do
plain <- asks envPlain
let txt = if null alternate || alternate == [Str source]
@@ -1154,3 +1205,8 @@ toSubscript c
Just $ chr (0x2080 + (ord c - 48))
| isSpace c = Just c
| otherwise = Nothing
+
+lineBreakToSpace :: Inline -> Inline
+lineBreakToSpace LineBreak = Space
+lineBreakToSpace SoftBreak = Space
+lineBreakToSpace x = x