aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Main.hs21
-rw-r--r--src/Text/Pandoc/Definition.hs18
-rw-r--r--src/Text/Pandoc/Entities.hs30
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs19
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs23
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs217
-rw-r--r--src/Text/Pandoc/Readers/RST.hs130
-rw-r--r--src/Text/Pandoc/Shared.hs227
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs95
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs289
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs133
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs348
-rw-r--r--src/Text/Pandoc/Writers/RST.hs425
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs137
-rw-r--r--src/Text/ParserCombinators/Pandoc.hs12
15 files changed, 1016 insertions, 1108 deletions
diff --git a/src/Main.hs b/src/Main.hs
index 0bb246fa5..980afb25b 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -55,7 +55,7 @@ import System.Console.GetOpt
import System.IO
import Data.Maybe ( fromMaybe )
import Data.List ( isPrefixOf )
-import Char ( toLower )
+import Data.Char ( toLower )
import Control.Monad ( (>>=) )
version :: String
@@ -118,7 +118,7 @@ data Opt = Opt
, optDumpArgs :: Bool -- ^ Output command-line arguments
, optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
, optStrict :: Bool -- ^ Use strict markdown syntax
- , optInlineLinks :: Bool -- ^ Use inline links in parsing HTML
+ , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
}
-- | Defaults for command-line options.
@@ -144,7 +144,7 @@ defaultOpts = Opt
, optDumpArgs = False
, optIgnoreArgs = False
, optStrict = False
- , optInlineLinks = False
+ , optReferenceLinks = False
}
-- | A list of functions, each transforming the options data structure
@@ -190,10 +190,10 @@ options =
(\opt -> return opt { optStrict = True } ))
"" -- "Use strict markdown syntax with no extensions"
- , Option "" ["inline-links"]
+ , Option "" ["reference-links"]
(NoArg
- (\opt -> return opt { optInlineLinks = True } ))
- "" -- "Use inline links in parsing HTML"
+ (\opt -> return opt { optReferenceLinks = True } ))
+ "" -- "Use reference links in parsing HTML"
, Option "R" ["parse-raw"]
(NoArg
@@ -405,7 +405,7 @@ main = do
, optDumpArgs = dumpArgs
, optIgnoreArgs = ignoreArgs
, optStrict = strict
- , optInlineLinks = inlineLinks
+ , optReferenceLinks = referenceLinks
} = opts
if dumpArgs
@@ -453,8 +453,7 @@ main = do
stateStandalone = standalone && (not strict),
stateSmart = smart || writerName' == "latex",
stateColumns = columns,
- stateStrict = strict,
- stateInlineLinks = inlineLinks }
+ stateStrict = strict }
let csslink = if (css == "")
then ""
else "<link rel=\"stylesheet\" href=\"" ++ css ++
@@ -469,13 +468,13 @@ main = do
writerHeader = header,
writerTitlePrefix = titlePrefix,
writerTabStop = tabStop,
- writerNotes = [],
writerS5 = (writerName=="s5"),
writerIncremental = incremental,
writerNumberSections = numberSections,
writerIncludeBefore = includeBefore,
writerIncludeAfter = includeAfter,
- writerStrictMarkdown = strict }
+ writerStrictMarkdown = strict,
+ writerReferenceLinks = referenceLinks }
(readSources sources) >>= (hPutStr output . encodeUTF8 .
(writer writerOptions) .
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
index 5eec6bafe..2408cbaac 100644
--- a/src/Text/Pandoc/Definition.hs
+++ b/src/Text/Pandoc/Definition.hs
@@ -50,7 +50,6 @@ data Block
= Plain [Inline] -- ^ Plain text, not a paragraph
| Null -- ^ Nothing
| Para [Inline] -- ^ Paragraph
- | Key [Inline] Target -- ^ Reference key: name (inlines) and 'Target'
| CodeBlock String -- ^ Code block (literal)
| RawHtml String -- ^ Raw HTML block (literal)
| BlockQuote [Block] -- ^ Block quote (list of blocks)
@@ -63,24 +62,18 @@ data Block
-- the term, and a block list)
| Header Int [Inline] -- ^ Header - level (integer) and text (inlines)
| HorizontalRule -- ^ Horizontal rule
- | Note String [Block] -- ^ Footnote or endnote - reference (string),
- -- text (list of blocks)
| Table [Inline] [Alignment] [Float] [[Block]] [[[Block]]] -- ^ Table,
-- with caption, column alignments,
-- relative column widths, column headers
-- (each a list of blocks), and rows
-- (each a list of lists of blocks)
deriving (Eq, Read, Show)
-
--- | Target for a link: either a URL or an indirect (labeled) reference.
-data Target
- = Src String String -- ^ First string is URL, second is title
- | Ref [Inline] -- ^ Label (list of inlines) for an indirect ref
- deriving (Show, Eq, Read)
-- | Type of quotation marks to use in Quoted inline.
data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read)
+type Target = (String, String) -- ^ Link target (URL, title)
+
-- | Inline elements.
data Inline
= Str String -- ^ Text (string)
@@ -96,8 +89,9 @@ data Inline
| LineBreak -- ^ Hard line break
| TeX String -- ^ LaTeX code (literal)
| HtmlInline String -- ^ HTML code (literal)
- | Link [Inline] Target -- ^ Hyperlink: text (list of inlines) and target
- | Image [Inline] Target -- ^ Image: alternative text (list of inlines)
+ | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target
+ | Image [Inline] Target -- ^ Image: alt text (list of inlines), target
-- and target
- | NoteRef String -- ^ Footnote or endnote reference
+ | Note [Block] -- ^ Footnote or endnote - reference (string),
+ -- text (list of blocks)
deriving (Show, Eq, Read)
diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/Entities.hs
index eaa1cd158..e700398b1 100644
--- a/src/Text/Pandoc/Entities.hs
+++ b/src/Text/Pandoc/Entities.hs
@@ -32,8 +32,8 @@ module Text.Pandoc.Entities (
charToEntity,
charToNumericalEntity,
decodeEntities,
- escapeSGMLChar,
- escapeSGMLString,
+ escapeCharForXML,
+ escapeStringForXML,
characterEntity
) where
import Data.Char ( chr, ord )
@@ -49,11 +49,11 @@ charToEntity char = Map.findWithDefault (charToNumericalEntity char) char revers
charToNumericalEntity :: Char -> String
charToNumericalEntity ch = "&#" ++ show (ord ch) ++ ";"
--- | Parse SGML character entity.
+-- | Parse character entity.
characterEntity :: GenParser Char st Char
-characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "SGML entity"
+characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "character entity"
--- | Parse SGML character entity.
+-- | Parse character entity.
namedEntity :: GenParser Char st Char
namedEntity = try $ do
st <- char '&'
@@ -62,7 +62,7 @@ namedEntity = try $ do
let entity = "&" ++ body ++ ";"
return $ Map.findWithDefault '?' entity entityTable
--- | Parse SGML hexadecimal entity.
+-- | Parse hexadecimal entity.
hexEntity :: GenParser Char st Char
hexEntity = try $ do
st <- string "&#"
@@ -71,7 +71,7 @@ hexEntity = try $ do
end <- char ';'
return $ chr $ read ('0':'x':body)
--- | Parse SGML decimal entity.
+-- | Parse decimal entity.
decimalEntity :: GenParser Char st Char
decimalEntity = try $ do
st <- string "&#"
@@ -79,9 +79,9 @@ decimalEntity = try $ do
end <- char ';'
return $ chr $ read body
--- | Escape one character as needed for SGML.
-escapeSGMLChar :: Char -> String
-escapeSGMLChar x =
+-- | Escape one character as needed for XML.
+escapeCharForXML :: Char -> String
+escapeCharForXML x =
case x of
'&' -> "&amp;"
'<' -> "&lt;"
@@ -94,13 +94,13 @@ escapeSGMLChar x =
needsEscaping :: Char -> Bool
needsEscaping c = c `elem` "&<>\"\160"
--- | Escape string as needed for SGML. Entity references are not preserved.
-escapeSGMLString :: String -> String
-escapeSGMLString "" = ""
-escapeSGMLString str =
+-- | Escape string as needed for XML. Entity references are not preserved.
+escapeStringForXML :: String -> String
+escapeStringForXML "" = ""
+escapeStringForXML str =
case break needsEscaping str of
(okay, "") -> okay
- (okay, (c:cs)) -> okay ++ escapeSGMLChar c ++ escapeSGMLString cs
+ (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
-- | Convert entities in a string to characters.
decodeEntities :: String -> String
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 96244e58f..803fc91c5 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -45,7 +45,7 @@ import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Entities ( characterEntity, decodeEntities )
-import Maybe ( fromMaybe )
+import Data.Maybe ( fromMaybe )
import Data.List ( intersect, takeWhile, dropWhile )
import Data.Char ( toUpper, toLower, isAlphaNum )
@@ -267,9 +267,7 @@ parseHtml = do
option "" (htmlEndTag "html")
many anyChar -- ignore anything after </html>
eof
- state <- getState
- let keyBlocks = stateKeyBlocks state
- return (Pandoc (Meta title authors date) (blocks ++ (reverse keyBlocks)))
+ return (Pandoc (Meta title authors date) blocks)
--
-- parsing blocks
@@ -456,11 +454,7 @@ link = try $ do
Nothing -> fail "no href"
let title = fromMaybe "" (extractAttribute "title" attributes)
label <- inlinesTilEnd "a"
- state <- getState
- ref <- if stateInlineLinks state
- then return (Src url title)
- else generateReference url title
- return $ Link (normalizeSpaces label) ref
+ return $ Link (normalizeSpaces label) (url, title)
image = try $ do
(tag, attributes) <- htmlTag "img"
@@ -469,8 +463,5 @@ image = try $ do
Nothing -> fail "no src"
let title = fromMaybe "" (extractAttribute "title" attributes)
let alt = fromMaybe "" (extractAttribute "alt" attributes)
- state <- getState
- ref <- if stateInlineLinks state
- then return (Src url title)
- else generateReference url title
- return $ Image [Str alt] ref
+ return $ Image [Str alt] (url, title)
+
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 33c4a75ee..b0062ceff 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -37,8 +37,8 @@ import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Maybe ( fromMaybe )
-import Char ( chr )
+import Data.Maybe ( fromMaybe )
+import Data.Char ( chr )
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ParserState -- ^ Parser state, including options for parser
@@ -135,14 +135,11 @@ parseLaTeX = do
spaces
eof
state <- getState
- let keyBlocks = stateKeyBlocks state
- let noteBlocks = stateNoteBlocks state
let blocks' = filter (/= Null) blocks
let title' = stateTitle state
let authors' = stateAuthors state
let date' = stateDate state
- return (Pandoc (Meta title' authors' date')
- (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
+ return (Pandoc (Meta title' authors' date') blocks')
--
-- parsing blocks
@@ -618,15 +615,15 @@ link = try (do
url <- manyTill anyChar (char '}')
char '{'
label <- manyTill inline (char '}')
- return (Link (normalizeSpaces label) (Src url "")))
+ return (Link (normalizeSpaces label) (url, "")))
image = try (do
("includegraphics", _, args) <- command
let args' = filter isArg args -- filter out options
let src = if null args' then
- Src "" ""
+ ("", "")
else
- Src (stripFirstAndLast (head args')) ""
+ (stripFirstAndLast (head args'), "")
return (Image [Str "image"] src))
footnote = try (do
@@ -640,13 +637,7 @@ footnote = try (do
setInput $ contents'
blocks <- parseBlocks
setInput rest
- state <- getState
- let notes = stateNoteBlocks state
- let nextRef = case notes of
- [] -> "1"
- (Note ref body):rest -> (show ((read ref) + 1))
- setState (state { stateNoteBlocks = (Note nextRef blocks):notes })
- return (NoteRef nextRef))
+ return (Note blocks))
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a2e84e8c2..353dd45dd 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown (
readMarkdown
) where
-import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect )
+import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup )
import Data.Char ( isAlphaNum )
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
@@ -160,28 +160,72 @@ titleBlock = try (do
option "" blanklines
return (title, author, date))
--- | Returns the number assigned to a Note block
-numberOfNote :: Block -> Int
-numberOfNote (Note ref _) = (read ref)
-numberOfNote _ = 0
-
parseMarkdown = do
- updateState (\state -> state { stateParseRaw = True })
- -- need to parse raw HTML, since markdown allows it
+ updateState (\state -> state { stateParseRaw = True }) -- parse raw HTML: markdown allows it
(title, author, date) <- option ([],[],"") titleBlock
-- go through once just to get list of reference keys
- keysUsed <- lookAhead $ (do {manyTill (referenceKey <|> (do{anyLine; return Null})) eof;
- newState <- getState;
- return $ stateKeysUsed newState})
- updateState (\st -> st { stateKeysUsed = keysUsed })
+ refs <- manyTill (noteBlock <|> referenceKey <|> (do l <- lineClump
+ return (LineClump l))) eof
+ let keys = map (\(KeyBlock label target) -> (label, target)) $
+ filter isKeyBlock refs
+ let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $
+ filter isNoteBlock refs
+ let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
+ setInput $ concat rawlines -- with note blocks and keys stripped out
+ updateState (\state -> state { stateKeys = keys, stateNotes = notes })
blocks <- parseBlocks -- go through again, for real
let blocks' = filter (/= Null) blocks
- state <- getState
- let keys = reverse $ stateKeyBlocks state
- let notes = reverse $ stateNoteBlocks state
- let sortedNotes = sortBy (\x y -> compare (numberOfNote x)
- (numberOfNote y)) notes
- return (Pandoc (Meta title author date) (blocks' ++ sortedNotes ++ keys))
+ return (Pandoc (Meta title author date) blocks')
+
+--
+-- initial pass for references
+--
+
+referenceKey = try $ do
+ nonindentSpaces
+ label <- reference
+ char labelSep
+ skipSpaces
+ option ' ' (char autoLinkStart)
+ src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars))
+ option ' ' (char autoLinkEnd)
+ tit <- option "" title
+ blanklines
+ return $ KeyBlock label (removeTrailingSpace src, tit)
+
+noteMarker = try (do
+ char labelStart
+ char noteStart
+ manyTill (noneOf " \t\n") (char labelEnd))
+
+rawLine = try (do
+ notFollowedBy' blankline
+ notFollowedBy' noteMarker
+ contents <- many1 nonEndline
+ end <- option "" (do
+ newline
+ option "" (try indentSpaces)
+ return "\n")
+ return (contents ++ end))
+
+rawLines = do
+ lines <- many1 rawLine
+ return (concat lines)
+
+noteBlock = try $ do
+ failIfStrict
+ ref <- noteMarker
+ char ':'
+ option ' ' (try blankline)
+ option "" (try indentSpaces)
+ raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
+ option "" blanklines
+ -- parse the extracted text, which may contain various block elements:
+ rest <- getInput
+ setInput $ (joinWithSep "\n" raw) ++ "\n\n"
+ contents <- parseBlocks
+ setInput rest
+ return (NoteBlock ref contents)
--
-- parsing blocks
@@ -189,9 +233,17 @@ parseMarkdown = do
parseBlocks = manyTill block eof
-block = choice [ header, table, codeBlock, note, referenceKey, hrule, list,
- blockQuote, htmlBlock, rawLaTeXEnvironment', para,
- plain, nullBlock ] <?> "block"
+block = choice [ header
+ , table
+ , codeBlock
+ , hrule
+ , list
+ , blockQuote
+ , htmlBlock
+ , rawLaTeXEnvironment'
+ , para
+ , plain
+ , nullBlock ] <?> "block"
--
-- header blocks
@@ -262,45 +314,6 @@ codeBlock = do
return (CodeBlock (stripTrailingNewlines result))
--
--- note block
---
-
-rawLine = try (do
- notFollowedBy' blankline
- notFollowedBy' noteMarker
- contents <- many1 nonEndline
- end <- option "" (do
- newline
- option "" (try indentSpaces)
- return "\n")
- return (contents ++ end))
-
-rawLines = do
- lines <- many1 rawLine
- return (concat lines)
-
-note = try (do
- failIfStrict
- ref <- noteMarker
- char ':'
- skipSpaces
- skipEndline
- raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
- option "" blanklines
- -- parse the extracted text, which may contain various block elements:
- rest <- getInput
- setInput $ (joinWithSep "\n" raw) ++ "\n\n"
- contents <- parseBlocks
- setInput rest
- state <- getState
- let identifiers = stateNoteIdentifiers state
- case (findIndex (== ref) identifiers) of
- Just n -> updateState (\s -> s {stateNoteBlocks =
- (Note (show (n+1)) contents):(stateNoteBlocks s)})
- Nothing -> updateState id
- return Null)
-
---
-- block quotes
--
@@ -535,25 +548,6 @@ rawHtmlBlocks = try (do
else combined
return (RawHtml combined'))
---
--- reference key
---
-
-referenceKey = try (do
- nonindentSpaces
- label <- reference
- char labelSep
- skipSpaces
- option ' ' (char autoLinkStart)
- src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars))
- option ' ' (char autoLinkEnd)
- tit <- option "" title
- blanklines
- state <- getState
- let keysUsed = stateKeysUsed state
- setState state { stateKeysUsed = (label:keysUsed) }
- return $ Key label (Src (removeTrailingSpace src) tit))
-
--
-- LaTeX
--
@@ -713,7 +707,7 @@ table = do
inline = choice [ rawLaTeXInline'
, escapedChar
, entity
- , noteRef
+ , note
, inlineNote
, link
, referenceLink
@@ -933,7 +927,7 @@ reference = try $ do
return (normalizeSpaces label)
-- source for a link, with optional title
-source = try (do
+source = try $ do
char srcStart
option ' ' (char autoLinkStart)
src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners))
@@ -941,7 +935,7 @@ source = try (do
tit <- option "" title
skipSpaces
char srcEnd
- return (Src (removeTrailingSpace src) tit))
+ return (removeTrailingSpace src, tit)
titleWith startChar endChar = try (do
skipSpaces
@@ -965,30 +959,18 @@ explicitLink = try (do
src <- source
return (Link label src))
-referenceLink = choice [referenceLinkDouble, referenceLinkSingle]
-
--- a link like [this][ref]
-referenceLinkDouble = try (do
+-- a link like [this][ref] or [this][] or [this]
+referenceLink = try $ do
label <- reference
- skipSpaces
- option ' ' newline
- skipSpaces
- ref <- reference
+ ref <- option [] (try (do skipSpaces
+ option ' ' newline
+ skipSpaces
+ reference))
let ref' = if null ref then label else ref
state <- getState
- if ref' `elem` (stateKeysUsed state)
- then return ()
- else fail "no corresponding key"
- return (Link label (Ref ref')))
-
--- a link like [this]
-referenceLinkSingle = try (do
- label <- reference
- state <- getState
- if label `elem` (stateKeysUsed state)
- then return ()
- else fail "no corresponding key"
- return (Link label (Ref label)))
+ case lookupKeySrc (stateKeys state) ref' of
+ Nothing -> fail "no corresponding key"
+ Just target -> return (Link label target)
autoLink = autoLinkEmail <|> autoLinkRegular
@@ -999,7 +981,7 @@ autoLinkEmail = try $ do
domain <- sepBy1 (many1 (noneOf "/:.@<> \t\n")) (char '.')
let src = name ++ "@" ++ (joinWithSep "." domain)
char autoLinkEnd
- return $ Link [Str src] (Src ("mailto:" ++ src) "")
+ return $ Link [Str src] (("mailto:" ++ src), "")
-- a link <http://like.this.com>
autoLinkRegular = try $ do
@@ -1007,39 +989,28 @@ autoLinkRegular = try $ do
prot <- oneOfStrings ["http:", "ftp:", "mailto:"]
rest <- many1Till (noneOf " \t\n<>") (char autoLinkEnd)
let src = prot ++ rest
- return $ Link [Str src] (Src src "")
+ return $ Link [Str src] (src, "")
image = try (do
char imageStart
(Link label src) <- link
return (Image label src))
-noteMarker = try (do
- char labelStart
- char noteStart
- manyTill (noneOf " \t\n") (char labelEnd))
-
-noteRef = try (do
+note = try $ do
failIfStrict
ref <- noteMarker
state <- getState
- let identifiers = (stateNoteIdentifiers state) ++ [ref]
- setState state {stateNoteIdentifiers = identifiers}
- return (NoteRef (show (length identifiers))))
+ let notes = stateNotes state
+ case lookup ref notes of
+ Nothing -> fail "note not found"
+ Just contents -> return (Note contents)
-inlineNote = try (do
+inlineNote = try $ do
failIfStrict
char noteStart
char labelStart
contents <- manyTill inline (char labelEnd)
- state <- getState
- let identifiers = stateNoteIdentifiers state
- let ref = show $ (length identifiers) + 1
- let noteBlocks = stateNoteBlocks state
- setState state {stateNoteIdentifiers = (identifiers ++ [ref]),
- stateNoteBlocks =
- (Note ref [Para contents]):noteBlocks}
- return (NoteRef ref))
+ return (Note [Para contents])
rawLaTeXInline' = do
failIfStrict
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a3de0a2ea..d2143af38 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -37,8 +37,8 @@ import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
import Data.Maybe ( fromMaybe )
-import List ( findIndex )
-import Char ( toUpper )
+import Data.List ( findIndex, delete )
+import Data.Char ( toUpper )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -> String -> Pandoc
@@ -62,11 +62,7 @@ specialChars = "\\`|*_<>$:[-"
-- parsing documents
--
-isAnonKeyBlock block = case block of
- (Key [Str "_"] str) -> True
- otherwise -> False
-
-isNotAnonKeyBlock block = not (isAnonKeyBlock block)
+isAnonKey (ref, src) = (ref == [Str "_"])
isHeader1 :: Block -> Bool
isHeader1 (Header 1 _) = True
@@ -101,20 +97,22 @@ titleTransform blocks = (blocks, [])
parseRST = do
-- first pass: get anonymous keys
- keyBlocks <- lookAhead $ manyTill (anonymousKey <|> (do{anyLine; return Null})) eof
- let anonymousKeys = filter (/= Null) keyBlocks
- -- run parser again to fill in anonymous links...
- updateState (\st -> st { stateKeyBlocks = anonymousKeys })
- state <- getState
+ refs <- manyTill (referenceKey <|> (do l <- lineClump
+ return (LineClump l))) eof
+ let keys = map (\(KeyBlock label target) -> (label, target)) $
+ filter isKeyBlock refs
+ let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
+ setInput $ concat rawlines -- with keys stripped out
+ updateState (\state -> state { stateKeys = keys })
blocks <- parseBlocks
- let blocks' = filter isNotAnonKeyBlock blocks
+ let blocks' = filter (/= Null) blocks
+ state <- getState
let (blocks'', title) = if stateStandalone state
then titleTransform blocks'
else (blocks', [])
- state' <- getState
- let authors = stateAuthors state'
- let date = stateDate state'
- let title' = if (null title) then (stateTitle state') else title
+ let authors = stateAuthors state
+ let date = stateDate state
+ let title' = if (null title) then (stateTitle state) else title
return (Pandoc (Meta title' authors date) blocks'')
--
@@ -124,7 +122,7 @@ parseRST = do
parseBlocks = manyTill block eof
block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote,
- referenceKey, imageBlock, unknownDirective, header,
+ imageBlock, unknownDirective, header,
hrule, list, fieldList, lineBlock, para, plain,
nullBlock ] <?> "block"
@@ -221,7 +219,7 @@ plain = do
imageBlock = try (do
string ".. image:: "
src <- manyTill anyChar newline
- return (Plain [Image [Str "image"] (Src src "")]))
+ return (Plain [Image [Str "image"] (src, "")]))
--
-- header blocks
@@ -492,43 +490,43 @@ unknownDirective = try (do
-- reference key
--
-referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
+referenceKey = do
+ result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
+ option "" blanklines
+ return result
-imageKey = try (do
+imageKey = try $ do
string ".. |"
ref <- manyTill inline (char '|')
skipSpaces
string "image::"
src <- manyTill anyChar newline
- return (Key (normalizeSpaces ref)
- (Src (removeLeadingTrailingSpace src) "")))
+ return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "")
-anonymousKey = try (do
+anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
skipSpaces
option ' ' newline
src <- manyTill anyChar newline
state <- getState
- return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) "")))
+ return $ KeyBlock [Str "_"] (removeLeadingTrailingSpace src, "")
-regularKeyQuoted = try (do
+regularKeyQuoted = try $ do
string ".. _`"
ref <- manyTill inline (char '`')
char ':'
skipSpaces
option ' ' newline
src <- manyTill anyChar newline
- return (Key (normalizeSpaces ref)
- (Src (removeLeadingTrailingSpace src) "")))
+ return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "")
-regularKey = try (do
+regularKey = try $ do
string ".. _"
ref <- manyTill inline (char ':')
skipSpaces
option ' ' newline
src <- manyTill anyChar newline
- return (Key (normalizeSpaces ref)
- (Src (removeLeadingTrailingSpace src) "")))
+ return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "")
--
-- inline
@@ -577,7 +575,7 @@ tabchar = do
return (Str "\t")
str = do
- notFollowedBy' oneWordReferenceLink
+ notFollowedBy' oneWordReference
result <- many1 (noneOf (specialChars ++ "\t\n "))
return (Str result)
@@ -596,46 +594,44 @@ endline = try (do
-- links
--
-link = choice [explicitLink, referenceLink, autoLink,
- oneWordReferenceLink] <?> "link"
+link = choice [explicitLink, referenceLink, autoLink] <?> "link"
-explicitLink = try (do
+explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` is marks start of inline code
label <- manyTill inline (try (do {spaces; char '<'}))
src <- manyTill (noneOf ">\n ") (char '>')
skipSpaces
string "`_"
- return (Link (normalizeSpaces label)
- (Src (removeLeadingTrailingSpace src) "")))
+ return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "")
-anonymousLinkEnding = try (do
- char '_'
- state <- getState
- let anonKeys = stateKeyBlocks state
- -- if there's a list of anon key refs (from previous pass), pop one off.
- -- otherwise return an anon key ref for the next pass to take care of...
- case anonKeys of
- (Key [Str "_"] src):rest ->
- do
- setState (state { stateKeyBlocks = rest })
- return src
- otherwise -> return (Ref [Str "_"]))
-
-referenceLink = try (do
+reference = try $ do
char '`'
notFollowedBy (char '`')
- label <- manyTill inline (char '`')
+ label <- many1Till inline (char '`')
char '_'
- src <- option (Ref []) anonymousLinkEnding
- return (Link (normalizeSpaces label) src))
+ return label
-oneWordReferenceLink = try (do
- label <- many1 alphaNum
+oneWordReference = do
+ raw <- many1 alphaNum
char '_'
- src <- option (Ref []) anonymousLinkEnding
notFollowedBy alphaNum -- because this_is_not a link
- return (Link [Str label] src))
+ return [Str raw]
+
+referenceLink = try $ do
+ label <- reference <|> oneWordReference
+ key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable key of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ -- if anonymous link, remove first anon key so it won't be used again
+ let keyTable' = if (key == [Str "_"]) -- anonymous link?
+ then delete ([Str "_"], src) keyTable -- remove first anon key
+ else keyTable
+ setState $ state { stateKeys = keyTable' }
+ return $ Link (normalizeSpaces label) src
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
"mailto:", "news:", "telnet:" ]
@@ -645,9 +641,9 @@ uri = try (do
identifier <- many1 (noneOf " \t\n")
return (scheme ++ identifier))
-autoURI = try (do
+autoURI = try $ do
src <- uri
- return (Link [Str src] (Src src "")))
+ return $ Link [Str src] (src, "")
emailChar = alphaNum <|> oneOf "-+_."
@@ -666,14 +662,20 @@ domain = try (do
dom <- many1 (try (do{ char '.'; many1 domainChar }))
return (joinWithSep "." (first:dom)))
-autoEmail = try (do
+autoEmail = try $ do
src <- emailAddress
- return (Link [Str src] (Src ("mailto:" ++ src) "")))
+ return $ Link [Str src] ("mailto:" ++ src, "")
autoLink = autoURI <|> autoEmail
-- For now, we assume that all substitution references are for images.
-image = try (do
+image = try $ do
char '|'
ref <- manyTill inline (char '|')
- return (Image (normalizeSpaces ref) (Ref ref)))
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable ref of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ return (Image (normalizeSpaces ref) src)
+
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 526263c4a..afb75e4c5 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -45,6 +45,10 @@ module Text.Pandoc.Shared (
-- * Parsing
readWith,
testStringWith,
+ Reference (..),
+ isNoteBlock,
+ isKeyBlock,
+ isLineClump,
HeaderType (..),
ParserContext (..),
QuoteContext (..),
@@ -53,27 +57,19 @@ module Text.Pandoc.Shared (
-- * Native format prettyprinting
prettyPandoc,
-- * Pandoc block list processing
- isNoteBlock,
normalizeSpaces,
compactify,
- generateReference,
+ -- * Writer options
WriterOptions (..),
defaultWriterOptions,
+ -- * Reference key lookup functions
KeyTable,
- keyTable,
lookupKeySrc,
refsMatch,
- replaceReferenceLinks,
- replaceRefLinksBlockList,
- -- * SGML
- inTags,
- selfClosingTag,
- inTagsSimple,
- inTagsIndented
) where
import Text.Pandoc.Definition
import Text.ParserCombinators.Parsec as Parsec
-import Text.Pandoc.Entities ( decodeEntities, escapeSGMLString )
+import Text.Pandoc.Entities ( decodeEntities, escapeStringForXML )
import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>),
($$), nest, Doc, isEmpty )
import Data.Char ( toLower, ord )
@@ -113,16 +109,37 @@ data QuoteContext
| NoQuote -- ^ Used when we're not parsing inside quotes
deriving (Eq, Show)
+type KeyTable = [([Inline], Target)]
+
+type NoteTable = [(String, [Block])]
+
+-- | References from preliminary parsing
+data Reference
+ = KeyBlock [Inline] Target -- ^ Key for reference-style link (label URL title)
+ | NoteBlock String [Block] -- ^ Footnote reference and contents
+ | LineClump String -- ^ Raw clump of lines with blanks at end
+ deriving (Eq, Read, Show)
+
+-- | Auxiliary functions used in preliminary parsing
+isNoteBlock :: Reference -> Bool
+isNoteBlock (NoteBlock _ _) = True
+isNoteBlock _ = False
+
+isKeyBlock :: Reference -> Bool
+isKeyBlock (KeyBlock _ _) = True
+isKeyBlock _ = False
+
+isLineClump :: Reference -> Bool
+isLineClump (LineClump _) = True
+isLineClump _ = False
+
data ParserState = ParserState
{ stateParseRaw :: Bool, -- ^ Parse untranslatable HTML
-- and LaTeX?
stateParserContext :: ParserContext, -- ^ What are we parsing?
stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
- stateKeyBlocks :: [Block], -- ^ List of reference key blocks
- stateKeysUsed :: [[Inline]], -- ^ List of references used
- stateNoteBlocks :: [Block], -- ^ List of note blocks
- stateNoteIdentifiers :: [String], -- ^ List of footnote identifiers
- -- in the order encountered
+ stateKeys :: KeyTable, -- ^ List of reference keys
+ stateNotes :: NoteTable, -- ^ List of notes
stateTabStop :: Int, -- ^ Tab stop
stateStandalone :: Bool, -- ^ If @True@, parse
-- bibliographic info
@@ -133,7 +150,6 @@ data ParserState = ParserState
stateSmart :: Bool, -- ^ Use smart typography
stateColumns :: Int, -- ^ Number of columns in
-- terminal (used for tables)
- stateInlineLinks :: Bool, -- ^ Parse html links as inline
stateHeaderTable :: [HeaderType] -- ^ List of header types used,
-- in what order (rst only)
}
@@ -144,10 +160,8 @@ defaultParserState =
ParserState { stateParseRaw = False,
stateParserContext = NullState,
stateQuoteContext = NoQuote,
- stateKeyBlocks = [],
- stateKeysUsed = [],
- stateNoteBlocks = [],
- stateNoteIdentifiers = [],
+ stateKeys = [],
+ stateNotes = [],
stateTabStop = 4,
stateStandalone = False,
stateTitle = [],
@@ -156,7 +170,6 @@ defaultParserState =
stateStrict = False,
stateSmart = False,
stateColumns = 80,
- stateInlineLinks = False,
stateHeaderTable = [] }
-- | Indent string as a block.
@@ -182,8 +195,6 @@ prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
prettyBlock :: Block -> String
prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
(prettyBlockList 2 blocks)
-prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++
- (prettyBlockList 2 blocks)
prettyBlock (OrderedList blockLists) =
"OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", "
(map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
@@ -236,11 +247,6 @@ endsWith :: Char -> [Char] -> Bool
endsWith char [] = False
endsWith char str = (char == last str)
--- | Returns @True@ if block is a @Note@ block
-isNoteBlock :: Block -> Bool
-isNoteBlock (Note ref blocks) = True
-isNoteBlock _ = False
-
-- | Joins a list of lists, separated by another list.
joinWithSep :: [a] -- ^ List to use as separator
-> [[a]] -- ^ Lists to join
@@ -351,9 +357,9 @@ data WriterOptions = WriterOptions
, writerIncremental :: Bool -- ^ Incremental S5 lists
, writerNumberSections :: Bool -- ^ Number sections in LaTeX
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, writerTabStop :: Int -- ^ Tabstop for conversion between
-- spaces and tabs
- , writerNotes :: [Block] -- ^ List of note blocks
} deriving Show
-- | Default writer options.
@@ -362,79 +368,18 @@ defaultWriterOptions =
writerHeader = "",
writerTitlePrefix = "",
writerTabStop = 4,
- writerNotes = [],
writerS5 = False,
writerIncremental = False,
writerNumberSections = False,
writerIncludeBefore = "",
writerIncludeAfter = "",
- writerStrictMarkdown = False }
-
---
--- Functions for constructing lists of reference keys
---
-
--- | Returns @Just@ numerical key reference if there's already a key
--- for the specified target in the list of blocks, otherwise @Nothing@.
-keyFoundIn :: [Block] -- ^ List of key blocks to search
- -> Target -- ^ Target to search for
- -> Maybe String
-keyFoundIn [] src = Nothing
-keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src)
- then Just num
- else keyFoundIn rest src
-keyFoundIn (_:rest) src = keyFoundIn rest src
-
--- | Return next unique numerical key, given keyList
-nextUniqueKey :: [[Inline]] -> String
-nextUniqueKey keys =
- let nums = [1..10000]
- notAKey n = not (any (== [Str (show n)]) keys) in
- case (find notAKey nums) of
- Just x -> show x
- Nothing -> error "Could not find unique key for reference link"
-
--- | Generate a reference for a URL (either an existing reference, if
--- there is one, or a new one, if there isn't) and update parser state.
-generateReference :: String -- ^ URL
- -> String -- ^ Title
- -> GenParser tok ParserState Target
-generateReference url title = do
- let src = Src (decodeEntities url) (decodeEntities title)
- state <- getState
- let keyBlocks = stateKeyBlocks state
- let keysUsed = stateKeysUsed state
- case (keyFoundIn keyBlocks src) of
- Just num -> return (Ref [Str num])
- Nothing -> do
- let nextNum = nextUniqueKey keysUsed
- updateState (\st -> st { stateKeyBlocks =
- (Key [Str nextNum] src):keyBlocks,
- stateKeysUsed =
- [Str nextNum]:keysUsed })
- return (Ref [Str nextNum])
+ writerStrictMarkdown = False,
+ writerReferenceLinks = False }
--
--- code to replace reference links with real links and remove unneeded key blocks
+-- code to lookup reference keys in key table
--
-type KeyTable = [([Inline], Target)]
-
--- | Returns @True@ if block is a Key block
-isRefBlock :: Block -> Bool
-isRefBlock (Key _ _) = True
-isRefBlock _ = False
-
--- | Returns a pair of a list of pairs of keys and associated sources, and a new
--- list of blocks with the included key blocks deleted.
-keyTable :: [Block] -> (KeyTable, [Block])
-keyTable [] = ([],[])
-keyTable ((Key ref target):lst) = (((ref, target):table), rest)
- where (table, rest) = keyTable lst
-keyTable (Null:lst) = keyTable lst -- get rid of Nulls
-keyTable (other:lst) = (table, (other:rest))
- where (table, rest) = keyTable lst
-
-- | Look up key in key table and return target object.
lookupKeySrc :: KeyTable -- ^ Key table
-> [Inline] -- ^ Key
@@ -455,8 +400,6 @@ refsMatch ((TeX x):restx) ((TeX y):resty) =
((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((NoteRef x):restx) ((NoteRef y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((Emph x):restx) ((Emph y):resty) =
refsMatch x y && refsMatch restx resty
refsMatch ((Strong x):restx) ((Strong y):resty) =
@@ -467,95 +410,3 @@ refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
refsMatch [] x = null x
refsMatch x [] = null x
--- | Replace reference links with explicit links in list of blocks,
--- removing key blocks.
-replaceReferenceLinks :: [Block] -> [Block]
-replaceReferenceLinks blocks =
- let (keytable, purged) = keyTable blocks in
- replaceRefLinksBlockList keytable purged
-
--- | Use key table to replace reference links with explicit links in a list
--- of blocks
-replaceRefLinksBlockList :: KeyTable -> [Block] -> [Block]
-replaceRefLinksBlockList keytable lst =
- map (replaceRefLinksBlock keytable) lst
-
--- | Use key table to replace reference links with explicit links in a block
-replaceRefLinksBlock :: KeyTable -> Block -> Block
-replaceRefLinksBlock keytable (Plain lst) =
- Plain (map (replaceRefLinksInline keytable) lst)
-replaceRefLinksBlock keytable (Para lst) =
- Para (map (replaceRefLinksInline keytable) lst)
-replaceRefLinksBlock keytable (Header lvl lst) =
- Header lvl (map (replaceRefLinksInline keytable) lst)
-replaceRefLinksBlock keytable (BlockQuote lst) =
- BlockQuote (map (replaceRefLinksBlock keytable) lst)
-replaceRefLinksBlock keytable (Note ref lst) =
- Note ref (map (replaceRefLinksBlock keytable) lst)
-replaceRefLinksBlock keytable (OrderedList lst) =
- OrderedList (map (replaceRefLinksBlockList keytable) lst)
-replaceRefLinksBlock keytable (BulletList lst) =
- BulletList (map (replaceRefLinksBlockList keytable) lst)
-replaceRefLinksBlock keytable (DefinitionList lst) =
- DefinitionList (map (\(term, def) ->
- (map (replaceRefLinksInline keytable) term,
- replaceRefLinksBlockList keytable def)) lst)
-replaceRefLinksBlock keytable (Table caption alignment widths headers rows) =
- Table (map (replaceRefLinksInline keytable) caption) alignment widths
- (map (replaceRefLinksBlockList keytable) headers)
- (map (map (replaceRefLinksBlockList keytable)) rows)
-replaceRefLinksBlock keytable other = other
-
--- | Use key table to replace reference links with explicit links in an
--- inline element.
-replaceRefLinksInline :: KeyTable -> Inline -> Inline
-replaceRefLinksInline keytable (Link text (Ref ref)) = (Link newText newRef)
- where newRef = case lookupKeySrc keytable
- (if (null ref) then text else ref) of
- Nothing -> (Ref ref)
- Just src -> src
- newText = map (replaceRefLinksInline keytable) text
-replaceRefLinksInline keytable (Image text (Ref ref)) = (Image newText newRef)
- where newRef = case lookupKeySrc keytable
- (if (null ref) then text else ref) of
- Nothing -> (Ref ref)
- Just src -> src
- newText = map (replaceRefLinksInline keytable) text
-replaceRefLinksInline keytable (Emph lst) =
- Emph (map (replaceRefLinksInline keytable) lst)
-replaceRefLinksInline keytable (Strong lst) =
- Strong (map (replaceRefLinksInline keytable) lst)
-replaceRefLinksInline keytable (Quoted t lst) =
- Quoted t (map (replaceRefLinksInline keytable) lst)
-replaceRefLinksInline keytable other = other
-
--- | Return a text object with a string of formatted SGML attributes.
-attributeList :: [(String, String)] -> Doc
-attributeList = text . concatMap
- (\(a, b) -> " " ++ escapeSGMLString a ++ "=\"" ++
- escapeSGMLString b ++ "\"")
-
--- | Put the supplied contents between start and end tags of tagType,
--- with specified attributes and (if specified) indentation.
-inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
-inTags isIndented tagType attribs contents =
- let openTag = PP.char '<' <> text tagType <> attributeList attribs <>
- PP.char '>'
- closeTag = text "</" <> text tagType <> PP.char '>' in
- if isIndented
- then openTag $$ nest 2 contents $$ closeTag
- else openTag <> contents <> closeTag
-
--- | Return a self-closing tag of tagType with specified attributes
-selfClosingTag :: String -> [(String, String)] -> Doc
-selfClosingTag tagType attribs =
- PP.char '<' <> text tagType <> attributeList attribs <> text " />"
-
--- | Put the supplied contents between start and end tags of tagType.
-inTagsSimple :: String -> Doc -> Doc
-inTagsSimple tagType = inTags False tagType []
-
--- | Put the supplied contents in indented block btw start and end tags.
-inTagsIndented :: String -> Doc -> Doc
-inTagsIndented tagType = inTags True tagType []
-
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 87eba9ad0..9fce1c061 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -27,16 +27,53 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to Docbook XML.
-}
-module Text.Pandoc.Writers.Docbook (
- writeDocbook
- ) where
+module Text.Pandoc.Writers.Docbook ( writeDocbook) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( escapeSGMLString )
+import Text.Pandoc.Entities ( escapeStringForXML )
import Data.Char ( toLower, ord )
import Data.List ( isPrefixOf, partition, drop )
import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+--
+-- code to format XML
+--
+
+-- | Return a text object with a string of formatted XML attributes.
+attributeList :: [(String, String)] -> Doc
+attributeList = text . concatMap
+ (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++
+ escapeStringForXML b ++ "\"")
+
+-- | Put the supplied contents between start and end tags of tagType,
+-- with specified attributes and (if specified) indentation.
+inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
+inTags isIndented tagType attribs contents =
+ let openTag = char '<' <> text tagType <> attributeList attribs <>
+ char '>'
+ closeTag = text "</" <> text tagType <> char '>' in
+ if isIndented
+ then openTag $$ nest 2 contents $$ closeTag
+ else openTag <> contents <> closeTag
+
+-- | Return a self-closing tag of tagType with specified attributes
+selfClosingTag :: String -> [(String, String)] -> Doc
+selfClosingTag tagType attribs =
+ char '<' <> text tagType <> attributeList attribs <> text " />"
+
+-- | Put the supplied contents between start and end tags of tagType.
+inTagsSimple :: String -> Doc -> Doc
+inTagsSimple tagType = inTags False tagType []
+
+-- | Put the supplied contents in indented block btw start and end tags.
+inTagsIndented :: String -> Doc -> Doc
+inTagsIndented tagType = inTags True tagType []
+
+--
+-- Docbook writer
+--
+
-- | Data structure for defining hierarchical Pandoc documents
data Element = Blk Block
| Sec [Inline] [Element] deriving (Eq, Read, Show)
@@ -64,8 +101,8 @@ authorToDocbook name = inTagsIndented "author" $
then -- last name first
let (lastname, rest) = break (==',') name
firstname = removeLeadingSpace rest in
- inTagsSimple "firstname" (text $ escapeSGMLString firstname) <>
- inTagsSimple "surname" (text $ escapeSGMLString lastname)
+ inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
else -- last name last
let namewords = words name
lengthname = length namewords
@@ -73,8 +110,8 @@ authorToDocbook name = inTagsIndented "author" $
0 -> ("","")
1 -> ("", name)
n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
- inTagsSimple "firstname" (text $ escapeSGMLString firstname) $$
- inTagsSimple "surname" (text $ escapeSGMLString lastname)
+ inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
@@ -86,18 +123,15 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) =
then inTagsIndented "articleinfo" $
(inTagsSimple "title" (wrap opts title)) $$
(vcat (map authorToDocbook authors)) $$
- (inTagsSimple "date" (text $ escapeSGMLString date))
+ (inTagsSimple "date" (text $ escapeStringForXML date))
else empty
- blocks' = replaceReferenceLinks blocks
- (noteBlocks, blocks'') = partition isNoteBlock blocks'
- opts' = opts {writerNotes = noteBlocks}
- elements = hierarchicalize blocks''
- before = writerIncludeBefore opts'
- after = writerIncludeAfter opts'
+ elements = hierarchicalize blocks
+ before = writerIncludeBefore opts
+ after = writerIncludeAfter opts
body = (if null before then empty else text before) $$
- vcat (map (elementToDocbook opts') elements) $$
+ vcat (map (elementToDocbook opts) elements) $$
(if null after then empty else text after)
- body' = if writerStandalone opts'
+ body' = if writerStandalone opts
then inTagsIndented "article" (meta $$ body)
else body in
render $ head $$ body' $$ text ""
@@ -140,15 +174,13 @@ blockToDocbook opts (Para lst) =
blockToDocbook opts (BlockQuote blocks) =
inTagsIndented "blockquote" (blocksToDocbook opts blocks)
blockToDocbook opts (CodeBlock str) =
- text "<screen>\n" <> text (escapeSGMLString str) <> text "\n</screen>"
+ text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>"
blockToDocbook opts (BulletList lst) =
inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
blockToDocbook opts (OrderedList lst) =
inTagsIndented "orderedlist" $ listItemsToDocbook opts lst
blockToDocbook opts (RawHtml str) = text str -- raw XML block
blockToDocbook opts HorizontalRule = empty -- not semantic
-blockToDocbook opts (Note _ _) = empty -- shouldn't occur
-blockToDocbook opts (Key _ _) = empty -- shouldn't occur
blockToDocbook opts (Table caption aligns widths headers rows) =
let alignStrings = map alignmentToString aligns
captionDoc = if null caption
@@ -197,7 +229,7 @@ inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
-- | Convert an inline element to Docbook.
inlineToDocbook :: WriterOptions -> Inline -> Doc
-inlineToDocbook opts (Str str) = text $ escapeSGMLString str
+inlineToDocbook opts (Str str) = text $ escapeStringForXML str
inlineToDocbook opts (Emph lst) =
inTagsSimple "emphasis" (inlinesToDocbook opts lst)
inlineToDocbook opts (Strong lst) =
@@ -210,31 +242,24 @@ inlineToDocbook opts Ellipses = text "&#8230;"
inlineToDocbook opts EmDash = text "&#8212;"
inlineToDocbook opts EnDash = text "&#8211;"
inlineToDocbook opts (Code str) =
- inTagsSimple "literal" $ text (escapeSGMLString str)
+ inTagsSimple "literal" $ text (escapeStringForXML str)
inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
inlineToDocbook opts (HtmlInline str) = empty
inlineToDocbook opts LineBreak =
text $ "<literallayout></literallayout>"
inlineToDocbook opts Space = char ' '
-inlineToDocbook opts (Link txt (Src src tit)) =
+inlineToDocbook opts (Link txt (src, tit)) =
if isPrefixOf "mailto:" src
- then inTagsSimple "email" $ text (escapeSGMLString $ drop 7 src)
+ then inTagsSimple "email" $ text (escapeStringForXML $ drop 7 src)
else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
-inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur
-inlineToDocbook opts (Image alt (Src src tit)) =
+inlineToDocbook opts (Image alt (src, tit)) =
let titleDoc = if null tit
then empty
else inTagsIndented "objectinfo" $
inTagsIndented "title"
- (text $ escapeSGMLString tit) in
+ (text $ escapeStringForXML tit) in
inTagsIndented "inlinemediaobject" $
inTagsIndented "imageobject" $
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
-inlineToDocbook opts (Image alternate (Ref ref)) = empty --shouldn't occur
-inlineToDocbook opts (NoteRef ref) =
- let notes = writerNotes opts
- hits = filter (\(Note r _) -> r == ref) notes in
- if null hits
- then empty
- else let (Note _ contents) = head hits in
- inTagsIndented "footnote" $ blocksToDocbook opts contents
+inlineToDocbook opts (Note contents) =
+ inTagsIndented "footnote" $ blocksToDocbook opts contents
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index be5eb8506..f6fc0741e 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -35,8 +35,11 @@ import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
import Data.List ( isPrefixOf, partition )
+import Control.Monad.State
import Text.XHtml.Strict
+type Notes = [Html]
+
-- | Convert Pandoc document to Html string.
writeHtmlString :: WriterOptions -> Pandoc -> String
writeHtmlString opts =
@@ -48,13 +51,10 @@ writeHtmlString opts =
writeHtml :: WriterOptions -> Pandoc -> Html
writeHtml opts (Pandoc (Meta tit authors date) blocks) =
let titlePrefix = writerTitlePrefix opts
- topTitle = inlineListToHtml opts tit
- topTitle' = if not (null titlePrefix)
- then stringToHtml titlePrefix +++
- if not (null tit)
- then '-' +++ topTitle
- else noHtml
- else topTitle
+ topTitle = evalState (inlineListToHtml opts tit) []
+ topTitle' = if null titlePrefix
+ then topTitle
+ else titlePrefix +++ " - " +++ topTitle
head = header $ thetitle topTitle' +++
meta ! [httpequiv "Content-Type",
content "text/html; charset=UTF-8"] +++
@@ -69,31 +69,30 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) =
(not (writerS5 opts))
then h1 ! [theclass "title"] $ topTitle
else noHtml
- blocks' = replaceReferenceLinks blocks
- (noteBlocks, blocks'') = partition isNoteBlock blocks'
+ (blocks', revnotes) = runState (blockListToHtml opts blocks) []
+ notes = reverse revnotes
before = primHtml $ writerIncludeBefore opts
after = primHtml $ writerIncludeAfter opts
- thebody = before +++ titleHeader +++
- toHtmlFromList (map (blockToHtml opts) blocks'') +++
- footnoteSection opts noteBlocks +++ after
+ thebody = before +++ titleHeader +++ blocks' +++
+ footnoteSection opts notes +++ after
in if writerStandalone opts
then head +++ (body thebody)
else thebody
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
-footnoteSection :: WriterOptions -> [Block] -> Html
+footnoteSection :: WriterOptions -> Notes -> Html
footnoteSection opts notes =
if null notes
- then noHtml
- else thediv ! [theclass "footnotes"] $
- hr +++ (olist $ toHtmlFromList $ map (blockToHtml opts) notes)
+ then noHtml
+ else thediv ! [theclass "footnotes"] $
+ hr +++ (olist << notes)
-- | Obfuscate a "mailto:" link using Javascript.
-obfuscateLink :: WriterOptions -> [Inline] -> String -> Html
-obfuscateLink opts txt src =
+obfuscateLink :: WriterOptions -> Html -> String -> Html
+obfuscateLink opts text src =
let emailRegex = mkRegex "mailto:*([^@]*)@(.*)"
- text' = show $ inlineListToHtml opts txt
+ text' = show $ text
src' = map toLower src in
case (matchRegex emailRegex src') of
(Just [name, domain]) ->
@@ -117,7 +116,7 @@ obfuscateLink opts txt src =
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
noscript (primHtml $ obfuscateString altText)
- _ -> anchor ! [href src] $ inlineListToHtml opts txt -- malformed email
+ _ -> anchor ! [href src] $ text -- malformed email
-- | Obfuscate character as entity.
obfuscateChar :: Char -> String
@@ -131,137 +130,153 @@ obfuscateString :: String -> String
obfuscateString = (concatMap obfuscateChar) . decodeEntities
-- | Convert Pandoc block element to HTML.
-blockToHtml :: WriterOptions -> Block -> Html
-blockToHtml opts Null = noHtml
-blockToHtml opts (Plain lst) = inlineListToHtml opts lst
-blockToHtml opts (Para lst) = paragraph $ inlineListToHtml opts lst
-blockToHtml opts (BlockQuote blocks) =
- if (writerS5 opts)
- then -- in S5, treat list in blockquote specially
- -- if default is incremental, make it nonincremental;
- -- otherwise incremental
- let inc = not (writerIncremental opts) in
- case blocks of
- [BulletList lst] -> blockToHtml (opts {writerIncremental =
- inc}) (BulletList lst)
- [OrderedList lst] -> blockToHtml (opts {writerIncremental =
- inc}) (OrderedList lst)
- otherwise -> blockquote $ toHtmlFromList $
- map (blockToHtml opts) blocks
- else blockquote $ toHtmlFromList $ map (blockToHtml opts) blocks
-blockToHtml opts (Note ref lst) =
- let contents = toHtmlFromList $ map (blockToHtml opts) lst
- backlink = anchor ! [href ("#fnref" ++ ref), theclass "footnoteBacklink",
- title ("Jump back to footnote " ++ ref)] $
- (primHtmlChar "#8617") in
- li ! [identifier ("fn" ++ ref)] $ contents +++ backlink
-blockToHtml opts (Key _ _) = noHtml
-blockToHtml opts (CodeBlock str) =
- pre $ thecode << (str ++ "\n") -- the final \n for consistency with Markdown.pl
-blockToHtml opts (RawHtml str) = primHtml str
-blockToHtml opts (BulletList lst) =
- let attribs = if writerIncremental opts
- then [theclass "incremental"]
- else [] in
- unordList ! attribs $ map (blockListToHtml opts) lst
-blockToHtml opts (OrderedList lst) =
- let attribs = if writerIncremental opts
- then [theclass "incremental"]
- else [] in
- ordList ! attribs $ map (blockListToHtml opts) lst
-blockToHtml opts (DefinitionList lst) =
- let attribs = if writerIncremental opts
- then [theclass "incremental"]
- else [] in
- defList ! attribs $ map (\(term, def) -> (inlineListToHtml opts term,
- blockListToHtml opts def)) lst
-blockToHtml opts HorizontalRule = hr
-blockToHtml opts (Header level lst) =
- let contents = inlineListToHtml opts lst in
- case level of
- 1 -> h1 contents
- 2 -> h2 contents
- 3 -> h3 contents
- 4 -> h4 contents
- 5 -> h5 contents
- 6 -> h6 contents
- _ -> paragraph contents
-blockToHtml opts (Table capt aligns widths headers rows) =
- let alignStrings = map alignmentToString aligns
- captionDoc = if null capt
- then noHtml
- else caption $ inlineListToHtml opts capt in
- table $ captionDoc +++
- (colHeadsToHtml opts alignStrings widths headers) +++
- (toHtmlFromList $ map (tableRowToHtml opts alignStrings) rows)
+blockToHtml :: WriterOptions -> Block -> State Notes Html
+blockToHtml opts block =
+ case block of
+ (Null) -> return $ noHtml
+ (Plain lst) -> inlineListToHtml opts lst
+ (Para lst) -> inlineListToHtml opts lst >>= (return . paragraph)
+ (RawHtml str) -> return $ primHtml str
+ (HorizontalRule) -> return $ hr
+ (CodeBlock str) -> return $ pre $ thecode << (str ++ "\n")
+ -- the final \n for consistency with Markdown.pl
+ (BlockQuote blocks) -> -- in S5, treat list in blockquote specially
+ -- if default is incremental, make it nonincremental;
+ -- otherwise incremental
+ if writerS5 opts
+ then let inc = not (writerIncremental opts) in
+ case blocks of
+ [BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
+ (BulletList lst)
+ [OrderedList lst] -> blockToHtml (opts {writerIncremental = inc})
+ (OrderedList lst)
+ otherwise -> blockListToHtml opts blocks >>=
+ (return . blockquote)
+ else blockListToHtml opts blocks >>= (return . blockquote)
+ (Header level lst) -> do contents <- inlineListToHtml opts lst
+ return $ case level of
+ 1 -> h1 contents
+ 2 -> h2 contents
+ 3 -> h3 contents
+ 4 -> h4 contents
+ 5 -> h5 contents
+ 6 -> h6 contents
+ _ -> paragraph contents
+ (BulletList lst) -> do contents <- mapM (blockListToHtml opts) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ unordList ! attribs $ contents
+ (OrderedList lst) -> do contents <- mapM (blockListToHtml opts) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ ordList ! attribs $ contents
+ (DefinitionList lst) -> do contents <- mapM (\(term, def) ->
+ do term' <- inlineListToHtml opts term
+ def' <- blockListToHtml opts def
+ return $ (term', def'))
+ lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ defList ! attribs $ contents
+ (Table capt aligns widths headers rows) ->
+ do let alignStrings = map alignmentToString aligns
+ captionDoc <- if null capt
+ then return noHtml
+ else inlineListToHtml opts capt >>=
+ (return . caption)
+ colHeads <- colHeadsToHtml opts alignStrings
+ widths headers
+ rows' <- mapM (tableRowToHtml opts alignStrings) rows
+ return $ table $ captionDoc +++ colHeads +++ rows'
colHeadsToHtml opts alignStrings widths headers =
- let heads = zipWith3
- (\align width item -> tableItemToHtml opts th align width item)
- alignStrings widths headers in
- tr $ toHtmlFromList heads
+ do heads <- sequence $ zipWith3
+ (\align width item -> tableItemToHtml opts th align width item)
+ alignStrings widths headers
+ return $ tr $ toHtmlFromList heads
alignmentToString alignment = case alignment of
AlignLeft -> "left"
AlignRight -> "right"
AlignCenter -> "center"
AlignDefault -> "left"
+
tableRowToHtml opts aligns cols =
- tr $ toHtmlFromList $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols
+ do contents <- sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols
+ return $ tr $ toHtmlFromList contents
tableItemToHtml opts tag align' width item =
- let attrib = [align align'] ++
- if (width /= 0)
- then [thestyle ("{width: " ++ show (truncate (100*width)) ++ "%;}")]
- else [] in
- tag ! attrib $ toHtmlFromList $ map (blockToHtml opts) item
+ do contents <- blockListToHtml opts item
+ let attrib = [align align'] ++
+ if (width /= 0)
+ then [thestyle ("{width: " ++ show (truncate (100*width)) ++ "%;}")]
+ else []
+ return $ tag ! attrib $ contents
-blockListToHtml :: WriterOptions -> [Block] -> Html
-blockListToHtml opts list =
- toHtmlFromList $ map (blockToHtml opts) list
+blockListToHtml :: WriterOptions -> [Block] -> State Notes Html
+blockListToHtml opts lst = mapM (blockToHtml opts) lst >>= (return . toHtmlFromList)
-- | Convert list of Pandoc inline elements to HTML.
-inlineListToHtml :: WriterOptions -> [Inline] -> Html
-inlineListToHtml opts lst = toHtmlFromList $ map (inlineToHtml opts) lst
+inlineListToHtml :: WriterOptions -> [Inline] -> State Notes Html
+inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= (return . toHtmlFromList)
-- | Convert Pandoc inline element to HTML.
-inlineToHtml :: WriterOptions -> Inline -> Html
-inlineToHtml opts (Emph lst) =
- emphasize $ inlineListToHtml opts lst
-inlineToHtml opts (Strong lst) =
- strong $ inlineListToHtml opts lst
-inlineToHtml opts (Code str) =
- thecode << str
-inlineToHtml opts (Quoted SingleQuote lst) =
- primHtmlChar "lsquo" +++ inlineListToHtml opts lst +++ primHtmlChar "rsquo"
-inlineToHtml opts (Quoted DoubleQuote lst) =
- primHtmlChar "ldquo" +++ inlineListToHtml opts lst +++ primHtmlChar "rdquo"
-inlineToHtml opts EmDash = primHtmlChar "mdash"
-inlineToHtml opts EnDash = primHtmlChar "ndash"
-inlineToHtml opts Ellipses = primHtmlChar "hellip"
-inlineToHtml opts Apostrophe = primHtmlChar "rsquo"
-inlineToHtml opts (Str str) = stringToHtml str
-inlineToHtml opts (TeX str) = stringToHtml str
-inlineToHtml opts (HtmlInline str) = primHtml str
-inlineToHtml opts (LineBreak) = br
-inlineToHtml opts Space = stringToHtml " "
-inlineToHtml opts (Link txt (Src src tit)) =
- if (isPrefixOf "mailto:" src)
- then obfuscateLink opts txt src
- else anchor ! ([href src] ++ if null tit then [] else [title tit]) $
- inlineListToHtml opts txt
-inlineToHtml opts (Link txt (Ref ref)) =
- '[' +++ (inlineListToHtml opts txt) +++
- ']' +++ '[' +++ (inlineListToHtml opts ref) +++
- ']'
- -- this is what markdown does, for better or worse
-inlineToHtml opts (Image alttext (Src source tit)) =
- let alternate = renderHtmlFragment $ inlineListToHtml opts alttext in
- image ! ([src source, title tit] ++ if null alttext then [] else [alt alternate])
- -- note: null title is included, as in Markdown.pl
-inlineToHtml opts (Image alternate (Ref ref)) =
- '!' +++ inlineToHtml opts (Link alternate (Ref ref))
-inlineToHtml opts (NoteRef ref) =
- anchor ! [href ("#fn" ++ ref), theclass "footnoteRef", identifier ("fnref" ++ ref)] <<
- sup << ref
+inlineToHtml :: WriterOptions -> Inline -> State Notes Html
+inlineToHtml opts inline =
+ case inline of
+ (Str str) -> return $ stringToHtml str
+ (Space) -> return $ stringToHtml " "
+ (LineBreak) -> return $ br
+ (EmDash) -> return $ primHtmlChar "mdash"
+ (EnDash) -> return $ primHtmlChar "ndash"
+ (Ellipses) -> return $ primHtmlChar "hellip"
+ (Apostrophe) -> return $ primHtmlChar "rsquo"
+ (Emph lst) -> inlineListToHtml opts lst >>= (return . emphasize)
+ (Strong lst) -> inlineListToHtml opts lst >>= (return . strong)
+ (Code str) -> return $ thecode << str
+ (Quoted quoteType lst) ->
+ let (leftQuote, rightQuote) = case quoteType of
+ SingleQuote -> (primHtmlChar "lsquo",
+ primHtmlChar "rsquo")
+ DoubleQuote -> (primHtmlChar "ldquo",
+ primHtmlChar "rdquo") in
+ do contents <- inlineListToHtml opts lst
+ return $ leftQuote +++ contents +++ rightQuote
+ (TeX str) -> return $ stringToHtml str
+ (HtmlInline str) -> return $ primHtml str
+ (Link txt (src,tit)) ->
+ do linkText <- inlineListToHtml opts txt
+ return $ if (isPrefixOf "mailto:" src)
+ then obfuscateLink opts linkText src
+ else anchor ! ([href src] ++
+ if null tit
+ then []
+ else [title tit]) $
+ linkText
+ (Image txt (source,tit)) ->
+ do alternate <- inlineListToHtml opts txt
+ let alternate' = renderHtmlFragment alternate
+ let attributes = [src source, title tit] ++
+ if null txt then [] else [alt alternate']
+ return $ image ! attributes
+ -- note: null title included, as in Markdown.pl
+ (Note contents) -> do notes <- get
+ let number = (length notes) + 1
+ let ref = show number
+ htmlContents <- blockListToNote opts ref contents
+ modify (htmlContents:) -- push contents onto front of notes
+ return $ anchor ! [href ("#fn" ++ ref),
+ theclass "footnoteRef",
+ identifier ("fnref" ++ ref)] << sup << ref
+
+blockListToNote :: WriterOptions -> String -> [Block] -> State Notes Html
+blockListToNote opts ref blocks =
+ do contents <- blockListToHtml opts blocks
+ let backlink = anchor ! [href ("#fnref" ++ ref), theclass "footnoteBacklink",
+ title ("Jump back to footnote " ++ ref)] $
+ (primHtmlChar "#8617")
+ return $ li ! [identifier ("fn" ++ ref)] $ contents +++ backlink
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index de1b7e207..8a9cacba3 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -33,31 +33,28 @@ module Text.Pandoc.Writers.LaTeX (
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Printf ( printf )
-import List ( (\\) )
+import Data.List ( (\\) )
-- | Convert Pandoc to LaTeX.
writeLaTeX :: WriterOptions -> Pandoc -> String
writeLaTeX options (Pandoc meta blocks) =
- let notes = filter isNoteBlock blocks in -- assumes all notes at outer level
let body = (writerIncludeBefore options) ++
- (concatMap (blockToLaTeX notes)
- (replaceReferenceLinks blocks)) ++
+ (concatMap blockToLaTeX blocks) ++
(writerIncludeAfter options) in
let head = if writerStandalone options
- then latexHeader notes options meta
+ then latexHeader options meta
else "" in
let foot = if writerStandalone options then "\n\\end{document}\n" else "" in
head ++ body ++ foot
-- | Insert bibliographic information into LaTeX header.
-latexHeader :: [Block] -- ^ List of note blocks to use in resolving note refs
- -> WriterOptions -- ^ Options, including LaTeX header
+latexHeader :: WriterOptions -- ^ Options, including LaTeX header
-> Meta -- ^ Meta with bibliographic information
-> String
-latexHeader notes options (Meta title authors date) =
+latexHeader options (Meta title authors date) =
let titletext = if null title
then ""
- else "\\title{" ++ inlineListToLaTeX notes title ++ "}\n"
+ else "\\title{" ++ inlineListToLaTeX title ++ "}\n"
authorstext = if null authors
then ""
else "\\author{" ++ (joinWithSep "\\\\"
@@ -99,31 +96,28 @@ deVerb ((Code str):rest) = (Str str):(deVerb rest)
deVerb (other:rest) = other:(deVerb rest)
-- | Convert Pandoc block element to LaTeX.
-blockToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs
- -> Block -- ^ Block to convert
+blockToLaTeX :: Block -- ^ Block to convert
-> String
-blockToLaTeX notes Null = ""
-blockToLaTeX notes (Plain lst) = inlineListToLaTeX notes lst ++ "\n"
-blockToLaTeX notes (Para lst) = (inlineListToLaTeX notes lst) ++ "\n\n"
-blockToLaTeX notes (BlockQuote lst) = "\\begin{quote}\n" ++
- (concatMap (blockToLaTeX notes) lst) ++ "\\end{quote}\n"
-blockToLaTeX notes (Note ref lst) = ""
-blockToLaTeX notes (Key _ _) = ""
-blockToLaTeX notes (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++
+blockToLaTeX Null = ""
+blockToLaTeX (Plain lst) = inlineListToLaTeX lst ++ "\n"
+blockToLaTeX (Para lst) = (inlineListToLaTeX lst) ++ "\n\n"
+blockToLaTeX (BlockQuote lst) = "\\begin{quote}\n" ++
+ (concatMap blockToLaTeX lst) ++ "\\end{quote}\n"
+blockToLaTeX (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++
"\n\\end{verbatim}\n"
-blockToLaTeX notes (RawHtml str) = ""
-blockToLaTeX notes (BulletList lst) = "\\begin{itemize}\n" ++
- (concatMap (listItemToLaTeX notes) lst) ++ "\\end{itemize}\n"
-blockToLaTeX notes (OrderedList lst) = "\\begin{enumerate}\n" ++
- (concatMap (listItemToLaTeX notes) lst) ++ "\\end{enumerate}\n"
-blockToLaTeX notes HorizontalRule =
+blockToLaTeX (RawHtml str) = ""
+blockToLaTeX (BulletList lst) = "\\begin{itemize}\n" ++
+ (concatMap listItemToLaTeX lst) ++ "\\end{itemize}\n"
+blockToLaTeX (OrderedList lst) = "\\begin{enumerate}\n" ++
+ (concatMap listItemToLaTeX lst) ++ "\\end{enumerate}\n"
+blockToLaTeX HorizontalRule =
"\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n"
-blockToLaTeX notes (Header level lst) =
+blockToLaTeX (Header level lst) =
if (level > 0) && (level <= 3)
then "\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++
- (inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n"
- else (inlineListToLaTeX notes lst) ++ "\n\n"
-blockToLaTeX notes (Table caption aligns widths heads rows) =
+ (inlineListToLaTeX (deVerb lst)) ++ "}\n\n"
+ else (inlineListToLaTeX lst) ++ "\n\n"
+blockToLaTeX (Table caption aligns widths heads rows) =
let colWidths = map printDecimal widths
colDescriptors = concat $ zipWith
(\width align -> ">{\\PBS" ++
@@ -135,11 +129,11 @@ blockToLaTeX notes (Table caption aligns widths heads rows) =
"\\hspace{0pt}}p{" ++ width ++
"\\textwidth}")
colWidths aligns
- headers = tableRowToLaTeX notes heads
- captionText = inlineListToLaTeX notes caption
+ headers = tableRowToLaTeX heads
+ captionText = inlineListToLaTeX caption
tableBody = "\\begin{tabular}{" ++ colDescriptors ++ "}\n" ++
headers ++ "\\hline\n" ++
- (concatMap (tableRowToLaTeX notes) rows) ++
+ (concatMap tableRowToLaTeX rows) ++
"\\end{tabular}\n"
centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n" in
if null captionText
@@ -151,19 +145,18 @@ blockToLaTeX notes (Table caption aligns widths heads rows) =
printDecimal :: Float -> String
printDecimal = printf "%.2f"
-tableColumnWidths notes cols = map (length . (concatMap (blockToLaTeX notes))) cols
+tableColumnWidths cols = map (length . (concatMap blockToLaTeX)) cols
-tableRowToLaTeX notes cols = joinWithSep " & " (map (concatMap (blockToLaTeX notes)) cols) ++ "\\\\\n"
+tableRowToLaTeX cols = joinWithSep " & " (map (concatMap blockToLaTeX) cols) ++ "\\\\\n"
-listItemToLaTeX notes list = "\\item " ++
- (concatMap (blockToLaTeX notes) list)
+listItemToLaTeX list = "\\item " ++
+ (concatMap blockToLaTeX list)
-- | Convert list of inline elements to LaTeX.
-inlineListToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs
- -> [Inline] -- ^ Inlines to convert
+inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
-> String
-inlineListToLaTeX notes lst =
- concatMap (inlineToLaTeX notes) lst
+inlineListToLaTeX lst =
+ concatMap inlineToLaTeX lst
isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True
@@ -171,47 +164,35 @@ isQuoted Apostrophe = True
isQuoted _ = False
-- | Convert inline element to LaTeX
-inlineToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs
- -> Inline -- ^ Inline to convert
+inlineToLaTeX :: Inline -- ^ Inline to convert
-> String
-inlineToLaTeX notes (Emph lst) = "\\emph{" ++
- (inlineListToLaTeX notes (deVerb lst)) ++ "}"
-inlineToLaTeX notes (Strong lst) = "\\textbf{" ++
- (inlineListToLaTeX notes (deVerb lst)) ++ "}"
-inlineToLaTeX notes (Code str) = "\\verb" ++ [chr] ++ stuffing ++ [chr]
+inlineToLaTeX (Emph lst) = "\\emph{" ++
+ (inlineListToLaTeX (deVerb lst)) ++ "}"
+inlineToLaTeX (Strong lst) = "\\textbf{" ++
+ (inlineListToLaTeX (deVerb lst)) ++ "}"
+inlineToLaTeX (Code str) = "\\verb" ++ [chr] ++ stuffing ++ [chr]
where stuffing = str
chr = ((enumFromTo '!' '~') \\ stuffing) !! 0
-inlineToLaTeX notes (Quoted SingleQuote lst) =
+inlineToLaTeX (Quoted SingleQuote lst) =
let s1 = if (not (null lst)) && (isQuoted (head lst)) then "\\," else ""
s2 = if (not (null lst)) && (isQuoted (last lst)) then "\\," else "" in
- "`" ++ s1 ++ inlineListToLaTeX notes lst ++ s2 ++ "'"
-inlineToLaTeX notes (Quoted DoubleQuote lst) =
+ "`" ++ s1 ++ inlineListToLaTeX lst ++ s2 ++ "'"
+inlineToLaTeX (Quoted DoubleQuote lst) =
let s1 = if (not (null lst)) && (isQuoted (head lst)) then "\\," else ""
s2 = if (not (null lst)) && (isQuoted (last lst)) then "\\," else "" in
- "``" ++ s1 ++ inlineListToLaTeX notes lst ++ s2 ++ "''"
-inlineToLaTeX notes Apostrophe = "'"
-inlineToLaTeX notes EmDash = "---"
-inlineToLaTeX notes EnDash = "--"
-inlineToLaTeX notes Ellipses = "\\ldots{}"
-inlineToLaTeX notes (Str str) = stringToLaTeX str
-inlineToLaTeX notes (TeX str) = str
-inlineToLaTeX notes (HtmlInline str) = ""
-inlineToLaTeX notes (LineBreak) = "\\\\\n"
-inlineToLaTeX notes Space = " "
-inlineToLaTeX notes (Link text (Src src tit)) =
- "\\href{" ++ src ++ "}{" ++ (inlineListToLaTeX notes (deVerb text)) ++ "}"
-inlineToLaTeX notes (Link text (Ref ref)) = "[" ++
- (inlineListToLaTeX notes text) ++ "][" ++ (inlineListToLaTeX notes ref) ++
- "]" -- this is what markdown does, for better or worse
-inlineToLaTeX notes (Image alternate (Src source tit)) =
+ "``" ++ s1 ++ inlineListToLaTeX lst ++ s2 ++ "''"
+inlineToLaTeX Apostrophe = "'"
+inlineToLaTeX EmDash = "---"
+inlineToLaTeX EnDash = "--"
+inlineToLaTeX Ellipses = "\\ldots{}"
+inlineToLaTeX (Str str) = stringToLaTeX str
+inlineToLaTeX (TeX str) = str
+inlineToLaTeX (HtmlInline str) = ""
+inlineToLaTeX (LineBreak) = "\\\\\n"
+inlineToLaTeX Space = " "
+inlineToLaTeX (Link text (src, tit)) =
+ "\\href{" ++ src ++ "}{" ++ (inlineListToLaTeX (deVerb text)) ++ "}"
+inlineToLaTeX (Image alternate (source, tit)) =
"\\includegraphics{" ++ source ++ "}"
-inlineToLaTeX notes (Image alternate (Ref ref)) =
- "![" ++ (inlineListToLaTeX notes alternate) ++ "][" ++
- (inlineListToLaTeX notes ref) ++ "]"
-inlineToLaTeX [] (NoteRef ref) = ""
-inlineToLaTeX ((Note firstref firstblocks):rest) (NoteRef ref) =
- if (firstref == ref)
- then "\\footnote{" ++ (stripTrailingNewlines
- (concatMap (blockToLaTeX rest) firstblocks)) ++ "}"
- else inlineToLaTeX rest (NoteRef ref)
-
+inlineToLaTeX (Note contents) =
+ "\\footnote{" ++ (stripTrailingNewlines $ concatMap blockToLaTeX contents) ++ "}"
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 687f6e6c4..8f1b3cea9 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -34,19 +34,71 @@ module Text.Pandoc.Writers.Markdown (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Data.List ( group, isPrefixOf, drop )
+import Data.List ( group, isPrefixOf, drop, find )
import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+
+type Notes = [[Block]]
+type Refs = KeyTable
+type WriterState = (Notes, Refs)
-- | Convert Pandoc to Markdown.
writeMarkdown :: WriterOptions -> Pandoc -> String
-writeMarkdown options (Pandoc meta blocks) =
- let body = text (writerIncludeBefore options) <>
- vcat (map (blockToMarkdown (writerTabStop options))
- (formatKeys blocks)) $$ text (writerIncludeAfter options) in
- let head = if (writerStandalone options)
- then ((metaToMarkdown meta) $$ text (writerHeader options))
- else empty in
- render $ head <> body
+writeMarkdown opts document =
+ render $ evalState (pandocToMarkdown opts document) ([],[])
+
+-- | Return markdown representation of document.
+pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToMarkdown opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ before' = if null before then empty else text before
+ after' = if null after then empty else text after
+ metaBlock <- metaToMarkdown opts meta
+ let head = if (writerStandalone opts)
+ then metaBlock $$ text (writerHeader opts)
+ else empty
+ body <- blockListToMarkdown opts blocks
+ (notes, _) <- get
+ notes' <- notesToMarkdown opts (reverse notes)
+ (_, refs) <- get -- note that the notes may contain refs
+ refs' <- keyTableToMarkdown opts (reverse refs)
+ return $ head <> (before' $$ body <> text "\n" $$
+ notes' <> text "\n" $$ refs' $$ after')
+
+-- | Return markdown representation of reference key table.
+keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
+keyTableToMarkdown opts refs =
+ mapM (keyToMarkdown opts) refs >>= (return . vcat)
+
+-- | Return markdown representation of a reference key.
+keyToMarkdown :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+keyToMarkdown opts (label, (src, tit)) = do
+ label' <- inlineListToMarkdown opts label
+ let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\""
+ return $ text " " <> char '[' <> label' <> char ']' <> text ": " <>
+ text src <> tit'
+
+-- | Return markdown representation of notes.
+notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToMarkdown opts notes =
+ mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
+ (return . vcat)
+
+-- | Return markdown representation of a note.
+noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToMarkdown opts num note = do
+ contents <- blockListToMarkdown opts note
+ let marker = text "[^" <> text (show num) <> text "]:"
+ return $ hang marker (writerTabStop opts) contents
+
+wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+wrappedMarkdown opts sect = do
+ let chunks = splitBy Space sect
+ chunks' <- mapM (inlineListToMarkdown opts) chunks
+ return $ fsep chunks'
-- | Escape nonbreaking space as &nbsp; entity
escapeNbsp "" = ""
@@ -59,155 +111,163 @@ escapeNbsp str =
escapeString :: String -> String
escapeString = backslashEscape "`<\\*_^" . escapeNbsp
--- | Take list of inline elements and return wrapped doc.
-wrappedMarkdown :: [Inline] -> Doc
-wrappedMarkdown lst =
- let wrapSection sec = fsep $ map inlineListToMarkdown $ (splitBy Space sec)
- wrappedSecs = map wrapSection $ splitBy LineBreak lst
- wrappedSecs' = foldr (\s rest -> if not (null rest)
- then (s <> text " "):rest
- else s:rest) [] wrappedSecs in
- vcat wrappedSecs'
-
--- | Insert Blank block between key and non-key
-formatKeys :: [Block] -> [Block]
-formatKeys [] = []
-formatKeys [x] = [x]
-formatKeys ((Key x1 y1):(Key x2 y2):rest) =
- (Key x1 y1):(formatKeys ((Key x2 y2):rest))
-formatKeys ((Key x1 y1):rest) = (Key x1 y1):(Plain [Str ""]):(formatKeys rest)
-formatKeys (x:(Key x1 y1):rest) = x:(Plain [Str ""]):(formatKeys ((Key x1 y1):rest))
-formatKeys (x:rest) = x:(formatKeys rest)
-
-- | Convert bibliographic information into Markdown header.
-metaToMarkdown :: Meta -> Doc
-metaToMarkdown (Meta [] [] "") = empty
-metaToMarkdown (Meta title [] "") = (titleToMarkdown title) <> (text "\n")
-metaToMarkdown (Meta title authors "") = (titleToMarkdown title) <>
- (text "\n") <> (authorsToMarkdown authors) <> (text "\n")
-metaToMarkdown (Meta title authors date) = (titleToMarkdown title) <>
- (text "\n") <> (authorsToMarkdown authors) <> (text "\n") <>
- (dateToMarkdown date) <> (text "\n")
+metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc
+metaToMarkdown opts (Meta title authors date) = do
+ title' <- titleToMarkdown opts title
+ authors' <- authorsToMarkdown authors
+ date' <- dateToMarkdown date
+ return $ title' <> authors' <> date'
-titleToMarkdown :: [Inline] -> Doc
-titleToMarkdown lst = text "% " <> (inlineListToMarkdown lst)
+titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+titleToMarkdown opts [] = return empty
+titleToMarkdown opts lst = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "% " <> contents <> text "\n"
-authorsToMarkdown :: [String] -> Doc
-authorsToMarkdown lst =
- text "% " <> text (joinWithSep ", " (map escapeString lst))
+authorsToMarkdown :: [String] -> State WriterState Doc
+authorsToMarkdown [] = return empty
+authorsToMarkdown lst = return $
+ text "% " <> text (joinWithSep ", " (map escapeString lst)) <> text "\n"
-dateToMarkdown :: String -> Doc
-dateToMarkdown str = text "% " <> text (escapeString str)
+dateToMarkdown :: String -> State WriterState Doc
+dateToMarkdown [] = return empty
+dateToMarkdown str = return $ text "% " <> text (escapeString str) <> text "\n"
-- | Convert Pandoc block element to markdown.
-blockToMarkdown :: Int -- ^ Tab stop
- -> Block -- ^ Block element
- -> Doc
-blockToMarkdown tabStop Null = empty
-blockToMarkdown tabStop (Plain lst) = wrappedMarkdown lst
-blockToMarkdown tabStop (Para lst) = (wrappedMarkdown lst) <> (text "\n")
-blockToMarkdown tabStop (BlockQuote lst) =
- (vcat $ map (\line -> (text "> ") <> (text line)) $ lines $ render $ vcat $
- map (blockToMarkdown tabStop) lst) <> (text "\n")
-blockToMarkdown tabStop (Note ref lst) =
- let lns = lines $ render $ vcat $ map (blockToMarkdown tabStop) lst in
- if null lns
- then empty
- else let first = head lns
- rest = tail lns in
- text ("[^" ++ (escapeString ref) ++ "]: ") <> (text first) $$
- (vcat $ map (\line -> (text " ") <> (text line)) rest) <>
- text "\n"
-blockToMarkdown tabStop (Key txt (Src src tit)) =
- text " " <> char '[' <> inlineListToMarkdown txt <> char ']' <>
- text ": " <> text src <>
- if tit /= "" then text (" \"" ++ tit ++ "\"") else empty
-blockToMarkdown tabStop (CodeBlock str) =
- (nest tabStop $ vcat $ map text (lines str)) <> text "\n"
-blockToMarkdown tabStop (RawHtml str) = text str
-blockToMarkdown tabStop (BulletList lst) =
- vcat (map (bulletListItemToMarkdown tabStop) lst) <> text "\n"
-blockToMarkdown tabStop (OrderedList lst) =
- vcat (zipWith (orderedListItemToMarkdown tabStop)
- (enumFromTo 1 (length lst)) lst) <> text "\n"
-blockToMarkdown tabStop HorizontalRule = text "\n* * * * *\n"
-blockToMarkdown tabStop (Header level lst) = text ((replicate level '#') ++
- " ") <> (inlineListToMarkdown lst) <> (text "\n")
-blockToMarkdown tabStop (Table caption _ _ headers rows) =
- blockToMarkdown tabStop (Para [Str "pandoc: TABLE unsupported in Markdown writer"])
-
-
-bulletListItemToMarkdown tabStop list =
- hang (text "- ") tabStop (vcat (map (blockToMarkdown tabStop) list))
+blockToMarkdown :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToMarkdown opts Null = return empty
+blockToMarkdown opts (Plain inlines) = wrappedMarkdown opts inlines
+blockToMarkdown opts (Para inlines) = do
+ contents <- wrappedMarkdown opts inlines
+ return $ contents <> text "\n"
+blockToMarkdown opts (RawHtml str) = return $ text str
+blockToMarkdown opts HorizontalRule = return $ text "\n* * * * *\n"
+blockToMarkdown opts (Header level inlines) = do
+ contents <- inlineListToMarkdown opts inlines
+ return $ text ((replicate level '#') ++ " ") <> contents <> text "\n"
+blockToMarkdown opts (CodeBlock str) = return $
+ (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
+blockToMarkdown opts (BlockQuote blocks) = do
+ contents <- blockListToMarkdown opts blocks
+ let quotedContents = unlines $ map ("> " ++) $ lines $ render contents
+ return $ text quotedContents
+blockToMarkdown opts (Table caption _ _ headers rows) = blockToMarkdown opts
+ (Para [Str "pandoc: TABLE unsupported in Markdown writer"])
+blockToMarkdown opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMarkdown opts) items
+ return $ (vcat contents) <> text "\n"
+blockToMarkdown opts (OrderedList items) = do
+ contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
+ zip [1..] items
+ return $ (vcat contents) <> text "\n"
+
+-- | Convert bullet list item (list of blocks) to markdown.
+bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToMarkdown opts items = do
+ contents <- blockListToMarkdown opts items
+ return $ hang (text "- ") (writerTabStop opts) contents
-- | Convert ordered list item (a list of blocks) to markdown.
-orderedListItemToMarkdown :: Int -- ^ tab stop
- -> Int -- ^ ordinal number of list item
- -> [Block] -- ^ list item (list of blocks)
- -> Doc
-orderedListItemToMarkdown tabStop num list =
- hang (text ((show num) ++ "." ++ spacer)) tabStop
- (vcat (map (blockToMarkdown tabStop) list))
- where spacer = if (num < 10) then " " else ""
+orderedListItemToMarkdown :: WriterOptions -- ^ options
+ -> Int -- ^ ordinal number of list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToMarkdown opts num items = do
+ contents <- blockListToMarkdown opts items
+ let spacer = if (num < 10) then " " else ""
+ return $ hang (text ((show num) ++ "." ++ spacer)) (writerTabStop opts)
+ contents
+
+-- | Convert list of Pandoc block elements to markdown.
+blockListToMarkdown :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToMarkdown opts blocks =
+ mapM (blockToMarkdown opts) blocks >>= (return . vcat)
+
+-- | Get reference for target; if none exists, create unique one and return.
+-- Prefer label if possible; otherwise, generate a unique key.
+getReference :: [Inline] -> Target -> State WriterState [Inline]
+getReference label (src, tit) = do
+ (_,refs) <- get
+ case find ((== (src, tit)) . snd) refs of
+ Just (ref, _) -> return ref
+ Nothing -> do
+ let label' = case find ((== label) . fst) refs of
+ Just _ -> -- label is used; generate numerical label
+ case find (\n -> not (any (== [Str (show n)])
+ (map fst refs))) [1..10000] of
+ Just x -> [Str (show x)]
+ Nothing -> error "no unique label"
+ Nothing -> label
+ modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
+ return label'
-- | Convert list of Pandoc inline elements to markdown.
-inlineListToMarkdown :: [Inline] -> Doc
-inlineListToMarkdown lst = hcat $ map inlineToMarkdown lst
+inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToMarkdown opts lst = mapM (inlineToMarkdown opts) lst >>= (return . hcat)
-- | Convert Pandoc inline element to markdown.
-inlineToMarkdown :: Inline -> Doc
-inlineToMarkdown (Emph lst) = text "*" <>
- (inlineListToMarkdown lst) <> text "*"
-inlineToMarkdown (Strong lst) = text "**" <>
- (inlineListToMarkdown lst) <> text "**"
-inlineToMarkdown (Quoted SingleQuote lst) = char '\'' <>
- (inlineListToMarkdown lst) <> char '\''
-inlineToMarkdown (Quoted DoubleQuote lst) = char '"' <>
- (inlineListToMarkdown lst) <> char '"'
-inlineToMarkdown EmDash = text "--"
-inlineToMarkdown EnDash = char '-'
-inlineToMarkdown Apostrophe = char '\''
-inlineToMarkdown Ellipses = text "..."
-inlineToMarkdown (Code str) =
+inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMarkdown opts (Emph lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "*" <> contents <> text "*"
+inlineToMarkdown opts (Strong lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "**" <> contents <> text "**"
+inlineToMarkdown opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '\'' <> contents <> char '\''
+inlineToMarkdown opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '"' <> contents <> char '"'
+inlineToMarkdown opts EmDash = return $ text "--"
+inlineToMarkdown opts EnDash = return $ char '-'
+inlineToMarkdown opts Apostrophe = return $ char '\''
+inlineToMarkdown opts Ellipses = return $ text "..."
+inlineToMarkdown opts (Code str) =
let tickGroups = filter (\s -> '`' `elem` s) $ group str
longest = if null tickGroups
then 0
else maximum $ map length tickGroups
marker = replicate (longest + 1) '`'
- spacer = if (longest == 0) then "" else " " in
- text (marker ++ spacer ++ str ++ spacer ++ marker)
-inlineToMarkdown (Str str) = text $ escapeString str
-inlineToMarkdown (TeX str) = text str
-inlineToMarkdown (HtmlInline str) = text str
-inlineToMarkdown (LineBreak) = text " \n"
-inlineToMarkdown Space = char ' '
-inlineToMarkdown (Link txt (Src src tit)) =
- let linktext = if (null txt) || (txt == [Str ""])
- then text "link"
- else inlineListToMarkdown txt
- linktitle = if null tit
- then empty
- else text (" \"" ++ tit ++ "\"")
- srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src in
- if (null tit) && (txt == [Str srcSuffix])
- then char '<' <> text srcSuffix <> char '>'
- else char '[' <> linktext <> char ']' <> char '(' <> text src <>
- linktitle <> char ')'
-inlineToMarkdown (Link txt (Ref ref)) =
- let first = char '[' <> inlineListToMarkdown txt <> char ']'
- second = if (txt == ref)
- then text "[]"
- else char '[' <> inlineListToMarkdown ref <> char ']' in
- first <> second
-inlineToMarkdown (Image alternate (Src source tit)) =
- let alt = if (null alternate) || (alternate == [Str ""])
- then text "image"
- else inlineListToMarkdown alternate in
- char '!' <> char '[' <> alt <> char ']' <> char '(' <> text source <>
- (if tit /= ""
- then text (" \"" ++ tit ++ "\"")
- else empty) <> char ')'
-inlineToMarkdown (Image alternate (Ref ref)) =
- char '!' <> inlineToMarkdown (Link alternate (Ref ref))
-inlineToMarkdown (NoteRef ref) =
- text "[^" <> text (escapeString ref) <> char ']'
+ spacer = if (longest == 0) then "" else " " in
+ return $ text (marker ++ spacer ++ str ++ spacer ++ marker)
+inlineToMarkdown opts (Str str) = return $ text $ escapeString str
+inlineToMarkdown opts (TeX str) = return $ text str
+inlineToMarkdown opts (HtmlInline str) = return $ text str
+inlineToMarkdown opts (LineBreak) = return $ text " \n"
+inlineToMarkdown opts Space = return $ char ' '
+inlineToMarkdown opts (Link txt (src, tit)) = do
+ linktext <- inlineListToMarkdown opts txt
+ let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\""
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let useRefLinks = writerReferenceLinks opts
+ let useAuto = null tit && txt == [Str srcSuffix]
+ ref <- if useRefLinks then getReference txt (src, tit) else return []
+ reftext <- inlineListToMarkdown opts ref
+ return $ if useAuto
+ then char '<' <> text srcSuffix <> char '>'
+ else if useRefLinks
+ then let first = char '[' <> linktext <> char ']'
+ second = if txt == ref
+ then text "[]"
+ else char '[' <> reftext <> char ']'
+ in first <> second
+ else char '[' <> linktext <> char ']' <>
+ char '(' <> text src <> linktitle <> char ')'
+inlineToMarkdown opts (Image alternate (source, tit)) = do
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
+ (alternate == [Str source]) -- to prevent autolinks
+ then [Str "image"]
+ else alternate
+ linkPart <- inlineToMarkdown opts (Link txt (source, tit))
+ return $ char '!' <> linkPart
+inlineToMarkdown opts (Note contents) = do
+ modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state
+ (notes, _) <- get
+ let ref = show $ (length notes)
+ return $ text "[^" <> text ref <> char ']'
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 27d1a596a..a00ab1cc6 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -30,204 +30,245 @@ Conversion of 'Pandoc' documents to reStructuredText.
reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
module Text.Pandoc.Writers.RST (
- writeRST
- ) where
+ writeRST
+ ) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import List ( nubBy )
+import Text.Pandoc.Shared
+import Data.List ( group, isPrefixOf, drop, find )
import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
--- | Convert Pandoc to reStructuredText.
+type Notes = [[Block]]
+type Refs = KeyTable
+type WriterState = (Notes, Refs, Refs) -- first Refs is links, second pictures
+
+-- | Convert Pandoc to RST.
writeRST :: WriterOptions -> Pandoc -> String
-writeRST options (Pandoc meta blocks) =
- let (main, refs) = unzip $ map (blockToRST (writerTabStop options))
- (reformatBlocks $ replaceReferenceLinks blocks)
- top = if (writerStandalone options)
- then (metaToRST meta) $$ text (writerHeader options)
- else empty in
- -- remove duplicate keys
- let refs' = nubBy (\x y -> (render x) == (render y)) refs in
- let body = text (writerIncludeBefore options) <>
- vcat main $$ text (writerIncludeAfter options) in
- render $ top <> body $$ vcat refs' $$ text "\n"
-
--- | Escape special RST characters.
+writeRST opts document =
+ render $ evalState (pandocToRST opts document) ([],[],[])
+
+-- | Return RST representation of document.
+pandocToRST :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToRST opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ before' = if null before then empty else text before
+ after' = if null after then empty else text after
+ metaBlock <- metaToRST opts meta
+ let head = if (writerStandalone opts)
+ then metaBlock $$ text (writerHeader opts)
+ else empty
+ body <- blockListToRST opts blocks
+ (notes, _, _) <- get
+ notes' <- notesToRST opts (reverse notes)
+ (_, refs, pics) <- get -- note that the notes may contain refs
+ refs' <- keyTableToRST opts (reverse refs)
+ pics' <- pictTableToRST opts (reverse pics)
+ return $ head <> (before' $$ body $$ notes' <> text "\n" $$ refs' $$
+ pics' $$ after')
+
+-- | Return RST representation of reference key table.
+keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
+keyTableToRST opts refs =
+ mapM (keyToRST opts) refs >>= (return . vcat)
+
+-- | Return RST representation of a reference key.
+keyToRST :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+keyToRST opts (label, (src, tit)) = do
+ label' <- inlineListToRST opts label
+ return $ text ".. _" <> label' <> text ": " <> text src
+
+-- | Return RST representation of notes.
+notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToRST opts notes =
+ mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>=
+ (return . vcat)
+
+-- | Return RST representation of a note.
+noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToRST opts num note = do
+ contents <- blockListToRST opts note
+ let marker = text ".. [" <> text (show num) <> text "] "
+ return $ hang marker 3 contents
+
+-- | Return RST representation of picture reference table.
+pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
+pictTableToRST opts refs =
+ mapM (pictToRST opts) refs >>= (return . vcat)
+
+-- | Return RST representation of a picture substitution reference.
+pictToRST :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+pictToRST opts (label, (src, _)) = do
+ label' <- inlineListToRST opts label
+ return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <>
+ text src
+
+-- | Take list of inline elements and return wrapped doc.
+wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc
+wrappedRST opts inlines =
+ mapM (wrappedRSTSection opts) (splitBy LineBreak inlines) >>=
+ (return . vcat)
+
+wrappedRSTSection :: WriterOptions -> [Inline] -> State WriterState Doc
+wrappedRSTSection opts sect = do
+ let chunks = splitBy Space sect
+ chunks' <- mapM (inlineListToRST opts) chunks
+ return $ fsep chunks'
+
+-- | Escape special characters for RST.
escapeString :: String -> String
escapeString = backslashEscape "`\\|*_"
--- | Convert list of inline elements into one 'Doc' of wrapped text
--- and another containing references.
-wrappedRST :: [Inline] -> (Doc, Doc)
-wrappedRST lst =
- let wrap_section sec = fsep $ map (fst . inlineListToRST) $
- (splitBy Space sec) in
- ((vcat $ map wrap_section $ (splitBy LineBreak lst)),
- vcat $ map (snd . inlineToRST) lst)
-
--- | Remove reference keys, and make sure there are blanks before each list.
-reformatBlocks :: [Block] -> [Block]
-reformatBlocks [] = []
-reformatBlocks ((Plain x):(OrderedList y):rest) =
- (Para x):(reformatBlocks ((OrderedList y):rest))
-reformatBlocks ((Plain x):(BulletList y):rest) =
- (Para x):(reformatBlocks ((BulletList y):rest))
-reformatBlocks ((OrderedList x):rest) =
- (OrderedList (map reformatBlocks x)):(reformatBlocks rest)
-reformatBlocks ((BulletList x):rest) =
- (BulletList (map reformatBlocks x)):(reformatBlocks rest)
-reformatBlocks ((BlockQuote x):rest) =
- (BlockQuote (reformatBlocks x)):(reformatBlocks rest)
-reformatBlocks ((Note ref x):rest) =
- (Note ref (reformatBlocks x)):(reformatBlocks rest)
-reformatBlocks ((Key x1 y1):rest) = reformatBlocks rest
-reformatBlocks (x:rest) = x:(reformatBlocks rest)
-
--- | Convert bibliographic information to 'Doc'.
-metaToRST :: Meta -> Doc
-metaToRST (Meta title authors date) =
- (titleToRST title) <> (authorsToRST authors) <> (dateToRST date)
-
--- | Convert title to 'Doc'.
-titleToRST :: [Inline] -> Doc
-titleToRST [] = empty
-titleToRST lst =
- let title = fst $ inlineListToRST lst in
- let titleLength = length $ render title in
- let border = text (replicate titleLength '=') in
- border <> char '\n' <> title <> char '\n' <> border <> text "\n\n"
-
--- | Convert author list to 'Doc'.
-authorsToRST :: [String] -> Doc
-authorsToRST [] = empty
-authorsToRST (first:rest) = text ":Author: " <> text first <>
- char '\n' <> (authorsToRST rest)
-
--- | Convert date to 'Doc'.
-dateToRST :: String -> Doc
-dateToRST [] = empty
-dateToRST str = text ":Date: " <> text (escapeString str) <> char '\n'
-
--- | Convert Pandoc block element to a 'Doc' containing the main text and
--- another one containing any references.
-blockToRST :: Int -- ^ tab stop
- -> Block -- ^ block element to convert
- -> (Doc, Doc) -- ^ first element is text, second is references for end of file
-blockToRST tabStop Null = (empty, empty)
-blockToRST tabStop (Plain lst) = wrappedRST lst
-blockToRST tabStop (Para [TeX str]) = -- raw latex block
+-- | Convert bibliographic information into RST header.
+metaToRST :: WriterOptions -> Meta -> State WriterState Doc
+metaToRST opts (Meta title authors date) = do
+ title' <- titleToRST opts title
+ authors' <- authorsToRST authors
+ date' <- dateToRST date
+ return $ title' <> authors' <> date'
+
+titleToRST :: WriterOptions -> [Inline] -> State WriterState Doc
+titleToRST opts [] = return empty
+titleToRST opts lst = do
+ contents <- inlineListToRST opts lst
+ let titleLength = length $ render contents
+ let border = text (replicate titleLength '=')
+ return $ border <> char '\n' <> contents <> char '\n' <> border <> text "\n\n"
+
+authorsToRST :: [String] -> State WriterState Doc
+authorsToRST [] = return empty
+authorsToRST (first:rest) = do
+ rest' <- authorsToRST rest
+ return $ text ":Author: " <> text first <> char '\n' <> rest'
+
+dateToRST :: String -> State WriterState Doc
+dateToRST [] = return empty
+dateToRST str = return $ text ":Date: " <> text (escapeString str) <> char '\n'
+
+-- | Convert Pandoc block element to RST.
+blockToRST :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToRST opts Null = return empty
+blockToRST opts (Plain inlines) = wrappedRST opts inlines
+blockToRST opts (Para [TeX str]) =
let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
- (hang (text "\n.. raw:: latex\n") 3 (vcat $ map text (lines str')), empty)
-blockToRST tabStop (Para lst) = ( (fst $ wrappedRST lst) <> (text "\n"),
- snd $ wrappedRST lst )
-blockToRST tabStop (BlockQuote lst) =
- let (main, refs) = unzip $ map (blockToRST tabStop) lst in
- ((nest tabStop $ vcat $ main) <> text "\n", vcat refs)
-blockToRST tabStop (Note ref blocks) =
- let (main, refs) = unzip $ map (blockToRST tabStop) blocks in
- ((hang (text ".. [" <> text (escapeString ref) <> text "] ") 3 (vcat main)),
- vcat refs)
-blockToRST tabStop (Key txt (Src src tit)) =
- (text "ERROR - KEY FOUND", empty) -- shouldn't have a key here
-blockToRST tabStop (CodeBlock str) = (hang (text "::\n") tabStop
- (vcat $ map text (lines ('\n':(str ++ "\n\n")))), empty)
-blockToRST tabStop (RawHtml str) =
+ return $ hang (text "\n.. raw:: latex\n") 3 (vcat $ map text (lines str'))
+blockToRST opts (Para inlines) = do
+ contents <- wrappedRST opts inlines
+ return $ contents <> text "\n"
+blockToRST opts (RawHtml str) =
let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
- (hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str')), empty)
-blockToRST tabStop (BulletList lst) =
- let (main, refs) = unzip $ map (bulletListItemToRST tabStop) lst in
- (vcat main <> text "\n", vcat refs)
-blockToRST tabStop (OrderedList lst) =
- let (main, refs) = unzip $ zipWith (orderedListItemToRST tabStop)
- (enumFromTo 1 (length lst)) lst in
- (vcat main <> text "\n", vcat refs)
-blockToRST tabStop HorizontalRule = (text "--------------\n", empty)
-blockToRST tabStop (Header level lst) =
- let (headerText, refs) = inlineListToRST lst in
- let headerLength = length $ render headerText in
- let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in
- let border = text $ replicate headerLength headerChar in
- (headerText <> char '\n' <> border <> char '\n', refs)
-blockToRST tabStop (Table caption _ _ headers rows) =
- blockToRST tabStop (Para [Str "pandoc: TABLE unsupported in RST writer"])
-
-
--- | Convert bullet list item (list of blocks) to reStructuredText.
--- Returns a pair of 'Doc', the first the main text, the second references
-bulletListItemToRST :: Int -- ^ tab stop
- -> [Block] -- ^ list item (list of blocks)
- -> (Doc, Doc)
-bulletListItemToRST tabStop list =
- let (main, refs) = unzip $ map (blockToRST tabStop) list in
- (hang (text "- ") tabStop (vcat main), (vcat refs))
-
--- | Convert an ordered list item (list of blocks) to reStructuredText.
--- Returns a pair of 'Doc', the first the main text, the second references
-orderedListItemToRST :: Int -- ^ tab stop
- -> Int -- ^ ordinal number of list item
- -> [Block] -- ^ list item (list of blocks)
- -> (Doc, Doc)
-orderedListItemToRST tabStop num list =
- let (main, refs) = unzip $ map (blockToRST tabStop) list
- spacer = if (length (show num) < 2) then " " else "" in
- (hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat main), (vcat refs))
-
--- | Convert a list of inline elements to reStructuredText.
--- Returns a pair of 'Doc', the first the main text, the second references.
-inlineListToRST :: [Inline] -> (Doc, Doc)
-inlineListToRST lst = let (main, refs) = unzip $ map inlineToRST lst in
- (hcat main, hcat refs)
-
--- | Convert an inline element to reStructuredText.
--- Returns a pair of 'Doc', the first the main text, the second references.
-inlineToRST :: Inline -> (Doc, Doc) -- second Doc is list of refs for end of file
-inlineToRST (Emph lst) = let (main, refs) = inlineListToRST lst in
- (text "*" <> main <> text "*", refs)
-inlineToRST (Strong lst) = let (main, refs) = inlineListToRST lst in
- (text "**" <> main <> text "**", refs)
-inlineToRST (Quoted SingleQuote lst) = let (main, refs) = inlineListToRST lst in
- (char '\'' <> main <> char '\'', refs)
-inlineToRST (Quoted DoubleQuote lst) = let (main, refs) = inlineListToRST lst in
- (char '"' <> main <> char '"', refs)
-inlineToRST EmDash = (text "--", empty)
-inlineToRST EnDash = (char '-', empty)
-inlineToRST Apostrophe = (char '\'', empty)
-inlineToRST Ellipses = (text "...", empty)
-inlineToRST (Code str) = (text $ "``" ++ str ++ "``", empty)
-inlineToRST (Str str) = (text $ escapeString str, empty)
-inlineToRST (TeX str) = (text str, empty)
-inlineToRST (HtmlInline str) = (empty, empty)
-inlineToRST (LineBreak) = inlineToRST Space -- RST doesn't have line breaks
-inlineToRST Space = (char ' ', empty)
---
--- Note: can assume reference links have been replaced where possible
--- with explicit links.
---
-inlineToRST (Link txt (Src src tit)) =
- let (linktext, ref') = if (null txt) || (txt == [Str ""])
- then (text "link", empty)
- else inlineListToRST $ normalizeSpaces txt in
- let link = char '`' <> linktext <> text "`_"
- linktext' = render linktext in
- let linktext'' = if (':' `elem` linktext')
- then "`" ++ linktext' ++ "`"
- else linktext' in
- let ref = text ".. _" <> text linktext'' <> text ": " <> text src in
- (link, ref' $$ ref)
-inlineToRST (Link txt (Ref ref)) =
- let (linktext, refs1) = inlineListToRST txt
- (reftext, refs2) = inlineListToRST ref in
- (char '[' <> linktext <> text "][" <> reftext <> char ']', refs1 $$ refs2)
-inlineToRST (Image alternate (Src source tit)) =
- let (alt, ref') = if (null alternate) || (alternate == [Str ""])
- then (text "image", empty)
- else inlineListToRST $ normalizeSpaces alternate in
- let link = char '|' <> alt <> char '|' in
- let ref = text ".. " <> link <> text " image:: " <> text source in
- (link, ref' $$ ref)
--- The following case won't normally occur...
-inlineToRST (Image alternate (Ref ref)) =
- let (alttext, refs1) = inlineListToRST alternate
- (reftext, refs2) = inlineListToRST ref in
- (char '|' <> alttext <> char '|', refs1 $$ refs2)
-inlineToRST (NoteRef ref) =
- (text " [" <> text (escapeString ref) <> char ']' <> char '_', empty)
+ return $ hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str'))
+blockToRST opts HorizontalRule = return $ text "--------------\n"
+blockToRST opts (Header level inlines) = do
+ contents <- inlineListToRST opts inlines
+ let headerLength = length $ render contents
+ let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1)
+ let border = text $ replicate headerLength headerChar
+ return $ contents <> char '\n' <> border <> char '\n'
+blockToRST opts (CodeBlock str) = return $ (text "::\n") $$ text "" $$
+ (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
+blockToRST opts (BlockQuote blocks) = do
+ contents <- blockListToRST opts blocks
+ return $ (nest (writerTabStop opts) contents) <> text "\n"
+blockToRST opts (Table caption _ _ headers rows) = blockToRST opts
+ (Para [Str "pandoc: TABLE unsupported in RST writer"])
+blockToRST opts (BulletList items) = do
+ contents <- mapM (bulletListItemToRST opts) items
+ return $ (vcat contents) <> text "\n"
+blockToRST opts (OrderedList items) = do
+ contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $
+ zip [1..] items
+ return $ (vcat contents) <> text "\n"
+
+-- | Convert bullet list item (list of blocks) to RST.
+bulletListItemToRST :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToRST opts items = do
+ contents <- blockListToRST opts items
+ return $ hang (text "- ") (writerTabStop opts) contents
+
+-- | Convert ordered list item (a list of blocks) to RST.
+orderedListItemToRST :: WriterOptions -- ^ options
+ -> Int -- ^ ordinal number of list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToRST opts num items = do
+ contents <- blockListToRST opts items
+ let spacer = if (num < 10) then " " else ""
+ return $ hang (text ((show num) ++ "." ++ spacer)) (writerTabStop opts)
+ contents
+
+-- | Convert list of Pandoc block elements to RST.
+blockListToRST :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToRST opts blocks =
+ mapM (blockToRST opts) blocks >>= (return . vcat)
+
+-- | Convert list of Pandoc inline elements to RST.
+inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= (return . hcat)
+
+-- | Convert Pandoc inline element to RST.
+inlineToRST :: WriterOptions -> Inline -> State WriterState Doc
+inlineToRST opts (Emph lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "*" <> contents <> text "*"
+inlineToRST opts (Strong lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "**" <> contents <> text "**"
+inlineToRST opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToRST opts lst
+ return $ char '\'' <> contents <> char '\''
+inlineToRST opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToRST opts lst
+ return $ char '"' <> contents <> char '"'
+inlineToRST opts EmDash = return $ text "--"
+inlineToRST opts EnDash = return $ char '-'
+inlineToRST opts Apostrophe = return $ char '\''
+inlineToRST opts Ellipses = return $ text "..."
+inlineToRST opts (Code str) = return $ text $ "``" ++ str ++ "``"
+inlineToRST opts (Str str) = return $ text $ escapeString str
+inlineToRST opts (TeX str) = return $ text str
+inlineToRST opts (HtmlInline str) = return empty
+inlineToRST opts (LineBreak) = return $ text " " -- RST doesn't have linebreaks
+inlineToRST opts Space = return $ char ' '
+inlineToRST opts (Link txt (src, tit)) = do
+ let useReferenceLinks = writerReferenceLinks opts
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let useAuto = null tit && txt == [Str srcSuffix]
+ (notes, refs, pics) <- get
+ linktext <- inlineListToRST opts $ normalizeSpaces txt
+ link <- if useReferenceLinks
+ then do let refs' = if (txt, (src, tit)) `elem` refs
+ then refs
+ else (txt, (src, tit)):refs
+ put (notes, refs', pics)
+ return $ char '`' <> linktext <> text "`_"
+ else return $ char '`' <> linktext <> text " <" <>
+ text src <> text ">`_"
+ return link
+inlineToRST opts (Image alternate (source, tit)) = do
+ (notes, refs, pics) <- get
+ let labelsUsed = map fst pics
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
+ (alternate `elem` labelsUsed)
+ then [Str $ "image" ++ show (length refs)]
+ else alternate
+ let pics' = if (txt, (source, tit)) `elem` pics
+ then pics
+ else (txt, (source, tit)):pics
+ put (notes, refs, pics')
+ label <- inlineListToRST opts txt
+ return $ char '|' <> label <> char '|'
+inlineToRST opts (Note contents) = do
+ -- add to notes in state
+ modify (\(notes, refs, pics) -> (contents:notes, refs, pics))
+ (notes, _, _) <- get
+ let ref = show $ (length notes)
+ return $ text " [" <> text ref <> text "]_"
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 2dddb857b..769ceeaf5 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -17,7 +17,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module :
+ Module : Text.Pandoc.Writers.RTF
Copyright : Copyright (C) 2006 John MacFarlane
License : GNU GPL, version 2 or above
@@ -27,26 +27,21 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to RTF (rich text format).
-}
-module Text.Pandoc.Writers.RTF (
- writeRTF
- ) where
+module Text.Pandoc.Writers.RTF ( writeRTF) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Regex ( matchRegexAll, mkRegex )
-import List ( isSuffixOf )
-import Char ( ord, chr )
+import Data.List ( isSuffixOf )
+import Data.Char ( ord, chr )
-- | Convert Pandoc to a string in rich text format.
writeRTF :: WriterOptions -> Pandoc -> String
writeRTF options (Pandoc meta blocks) =
- -- assumes all notes are at outer level
- let notes = filter isNoteBlock blocks in
let head = if writerStandalone options
- then rtfHeader notes (writerHeader options) meta
+ then rtfHeader (writerHeader options) meta
else ""
foot = if writerStandalone options then "\n}\n" else ""
- body = (writerIncludeBefore options) ++ (concatMap (blockToRTF notes 0)
- (replaceReferenceLinks blocks)) ++
+ body = (writerIncludeBefore options) ++ concatMap (blockToRTF 0) blocks ++
(writerIncludeAfter options) in
head ++ body ++ foot
@@ -120,15 +115,14 @@ orderedMarkers indent =
otherwise -> map (\x -> show x ++ ".") $ cycle ['a'..'z']
-- | Returns RTF header.
-rtfHeader :: [Block] -- ^ list of note blocks
- -> String -- ^ header text
+rtfHeader :: String -- ^ header text
-> Meta -- ^ bibliographic information
-> String
-rtfHeader notes headerText (Meta title authors date) =
+rtfHeader headerText (Meta title authors date) =
let titletext = if null title
then ""
else rtfPar 0 0 ("\\qc \\b \\fs36 " ++
- inlineListToRTF notes title)
+ inlineListToRTF title)
authorstext = if null authors
then ""
else rtfPar 0 0 ("\\qc " ++ (joinWithSep "\\"
@@ -142,35 +136,32 @@ rtfHeader notes headerText (Meta title authors date) =
headerText ++ titletext ++ authorstext ++ datetext ++ spacer
-- | Convert Pandoc block element to RTF.
-blockToRTF :: [Block] -- ^ list of note blocks
- -> Int -- ^ indent level
+blockToRTF :: Int -- ^ indent level
-> Block -- ^ block to convert
-> String
-blockToRTF notes indent Null = ""
-blockToRTF notes indent (Plain lst) =
- rtfCompact indent 0 (inlineListToRTF notes lst)
-blockToRTF notes indent (Para lst) =
- rtfPar indent 0 (inlineListToRTF notes lst)
-blockToRTF notes indent (BlockQuote lst) =
- concatMap (blockToRTF notes (indent + indentIncrement)) lst
-blockToRTF notes indent (Note ref lst) = "" -- shouldn't be any aftr filtering
-blockToRTF notes indent (Key _ _) = ""
-blockToRTF notes indent (CodeBlock str) =
+blockToRTF indent Null = ""
+blockToRTF indent (Plain lst) =
+ rtfCompact indent 0 (inlineListToRTF lst)
+blockToRTF indent (Para lst) =
+ rtfPar indent 0 (inlineListToRTF lst)
+blockToRTF indent (BlockQuote lst) =
+ concatMap (blockToRTF (indent + indentIncrement)) lst
+blockToRTF indent (CodeBlock str) =
rtfPar indent 0 ("\\f1 " ++ (codeStringToRTF str))
-blockToRTF notes indent (RawHtml str) = ""
-blockToRTF notes indent (BulletList lst) =
+blockToRTF indent (RawHtml str) = ""
+blockToRTF indent (BulletList lst) =
spaceAtEnd $
- concatMap (listItemToRTF notes indent (bulletMarker indent)) lst
-blockToRTF notes indent (OrderedList lst) =
+ concatMap (listItemToRTF indent (bulletMarker indent)) lst
+blockToRTF indent (OrderedList lst) =
spaceAtEnd $ concat $
- zipWith (listItemToRTF notes indent) (orderedMarkers indent) lst
-blockToRTF notes indent HorizontalRule =
+ zipWith (listItemToRTF indent) (orderedMarkers indent) lst
+blockToRTF indent HorizontalRule =
rtfPar indent 0 "\\qc \\emdash\\emdash\\emdash\\emdash\\emdash"
-blockToRTF notes indent (Header level lst) =
+blockToRTF indent (Header level lst) =
rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++
- (inlineListToRTF notes lst))
-blockToRTF notes indent (Table caption _ _ headers rows) =
- blockToRTF notes indent (Para [Str "pandoc: TABLE unsupported in RST writer"])
+ (inlineListToRTF lst))
+blockToRTF indent (Table caption _ _ headers rows) =
+ blockToRTF indent (Para [Str "pandoc: TABLE unsupported in RST writer"])
-- | Ensure that there's the same amount of space after compact
-- lists as after regular lists.
@@ -181,16 +172,15 @@ spaceAtEnd str =
else str
-- | Convert list item (list of blocks) to RTF.
-listItemToRTF :: [Block] -- ^ list of note blocks
- -> Int -- ^ indent level
+listItemToRTF :: Int -- ^ indent level
-> String -- ^ list start marker
-> [Block] -- ^ list item (list of blocks)
-> [Char]
-listItemToRTF notes indent marker [] =
+listItemToRTF indent marker [] =
rtfCompact (indent + listIncrement) (0 - listIncrement)
(marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
-listItemToRTF notes indent marker list =
- let (first:rest) = map (blockToRTF notes (indent + listIncrement)) list in
+listItemToRTF indent marker list =
+ let (first:rest) = map (blockToRTF (indent + listIncrement)) list in
-- insert the list marker into the (processed) first block
let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of
Just (before, matched, after, _) -> before ++ "\\fi" ++
@@ -200,47 +190,36 @@ listItemToRTF notes indent marker list =
modFirst ++ (concat rest)
-- | Convert list of inline items to RTF.
-inlineListToRTF :: [Block] -- ^ list of note blocks
- -> [Inline] -- ^ list of inlines to convert
+inlineListToRTF :: [Inline] -- ^ list of inlines to convert
-> String
-inlineListToRTF notes lst = concatMap (inlineToRTF notes) lst
+inlineListToRTF lst = concatMap inlineToRTF lst
-- | Convert inline item to RTF.
-inlineToRTF :: [Block] -- ^ list of note blocks
- -> Inline -- ^ inline to convert
+inlineToRTF :: Inline -- ^ inline to convert
-> String
-inlineToRTF notes (Emph lst) = "{\\i " ++ (inlineListToRTF notes lst) ++ "} "
-inlineToRTF notes (Strong lst) =
- "{\\b " ++ (inlineListToRTF notes lst) ++ "} "
-inlineToRTF notes (Quoted SingleQuote lst) =
- "\\u8216'" ++ (inlineListToRTF notes lst) ++ "\\u8217'"
-inlineToRTF notes (Quoted DoubleQuote lst) =
- "\\u8220\"" ++ (inlineListToRTF notes lst) ++ "\\u8221\""
-inlineToRTF notes Apostrophe = "\\u8217'"
-inlineToRTF notes Ellipses = "\\u8230?"
-inlineToRTF notes EmDash = "\\u8212-"
-inlineToRTF notes EnDash = "\\u8211-"
-inlineToRTF notes (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} "
-inlineToRTF notes (Str str) = stringToRTF str
-inlineToRTF notes (TeX str) = latexToRTF str
-inlineToRTF notes (HtmlInline str) = ""
-inlineToRTF notes (LineBreak) = "\\line "
-inlineToRTF notes Space = " "
-inlineToRTF notes (Link text (Src src tit)) =
+inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Strong lst) =
+ "{\\b " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Quoted SingleQuote lst) =
+ "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'"
+inlineToRTF (Quoted DoubleQuote lst) =
+ "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
+inlineToRTF Apostrophe = "\\u8217'"
+inlineToRTF Ellipses = "\\u8230?"
+inlineToRTF EmDash = "\\u8212-"
+inlineToRTF EnDash = "\\u8211-"
+inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} "
+inlineToRTF (Str str) = stringToRTF str
+inlineToRTF (TeX str) = latexToRTF str
+inlineToRTF (HtmlInline str) = ""
+inlineToRTF (LineBreak) = "\\line "
+inlineToRTF Space = " "
+inlineToRTF (Link text (src, tit)) =
"{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
- "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF notes text) ++ "\n}}}\n"
-inlineToRTF notes (Link text (Ref ref)) =
- "[" ++ (inlineListToRTF notes text) ++ "][" ++
- (inlineListToRTF notes ref) ++ "]" -- this is what markdown does
-inlineToRTF notes (Image alternate (Src source tit)) =
+ "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n"
+inlineToRTF (Image alternate (source, tit)) =
"{\\cf1 [image: " ++ source ++ "]\\cf0}"
-inlineToRTF notes (Image alternate (Ref ref)) = "![" ++
- (inlineListToRTF notes alternate) ++ "][" ++
- (inlineListToRTF notes ref) ++ "]"
-inlineToRTF [] (NoteRef ref) = ""
-inlineToRTF ((Note firstref firstblocks):rest) (NoteRef ref) =
- if firstref == ref
- then "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
- (concatMap (blockToRTF rest 0) firstblocks) ++ "}"
- else inlineToRTF rest (NoteRef ref)
+inlineToRTF (Note contents) =
+ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
+ (concatMap (blockToRTF 0) contents) ++ "}"
diff --git a/src/Text/ParserCombinators/Pandoc.hs b/src/Text/ParserCombinators/Pandoc.hs
index 5b1742975..a965159ed 100644
--- a/src/Text/ParserCombinators/Pandoc.hs
+++ b/src/Text/ParserCombinators/Pandoc.hs
@@ -40,12 +40,13 @@ module Text.ParserCombinators.Pandoc (
enclosed,
nullBlock,
stringAnyCase,
- parseFromStr
+ parseFromStr,
+ lineClump
) where
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Char ( toUpper, toLower )
+import Data.Char ( toUpper, toLower )
--- | Parse any line of text
anyLine :: GenParser Char st [Char]
@@ -132,4 +133,11 @@ parseFromStr parser str = try $ do
setInput oldInput
return result
+-- | Parse raw line block up to and including blank lines.
+lineClump :: GenParser Char st String
+lineClump = do
+ lines <- many1 (do{notFollowedBy blankline; anyLine})
+ blanks <- blanklines <|> (do{eof; return "\n"})
+ return ((unlines lines) ++ blanks)
+