diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Definition.hs | 18 | ||||
-rw-r--r-- | src/Text/Pandoc/Entities.hs | 30 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 19 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 23 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 217 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 130 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 227 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 95 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 289 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 133 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 348 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 425 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RTF.hs | 137 | ||||
-rw-r--r-- | src/Text/ParserCombinators/Pandoc.hs | 12 |
14 files changed, 1006 insertions, 1097 deletions
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index 5eec6bafe..2408cbaac 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -50,7 +50,6 @@ data Block = Plain [Inline] -- ^ Plain text, not a paragraph | Null -- ^ Nothing | Para [Inline] -- ^ Paragraph - | Key [Inline] Target -- ^ Reference key: name (inlines) and 'Target' | CodeBlock String -- ^ Code block (literal) | RawHtml String -- ^ Raw HTML block (literal) | BlockQuote [Block] -- ^ Block quote (list of blocks) @@ -63,24 +62,18 @@ data Block -- the term, and a block list) | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) | HorizontalRule -- ^ Horizontal rule - | Note String [Block] -- ^ Footnote or endnote - reference (string), - -- text (list of blocks) | Table [Inline] [Alignment] [Float] [[Block]] [[[Block]]] -- ^ Table, -- with caption, column alignments, -- relative column widths, column headers -- (each a list of blocks), and rows -- (each a list of lists of blocks) deriving (Eq, Read, Show) - --- | Target for a link: either a URL or an indirect (labeled) reference. -data Target - = Src String String -- ^ First string is URL, second is title - | Ref [Inline] -- ^ Label (list of inlines) for an indirect ref - deriving (Show, Eq, Read) -- | Type of quotation marks to use in Quoted inline. data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read) +type Target = (String, String) -- ^ Link target (URL, title) + -- | Inline elements. data Inline = Str String -- ^ Text (string) @@ -96,8 +89,9 @@ data Inline | LineBreak -- ^ Hard line break | TeX String -- ^ LaTeX code (literal) | HtmlInline String -- ^ HTML code (literal) - | Link [Inline] Target -- ^ Hyperlink: text (list of inlines) and target - | Image [Inline] Target -- ^ Image: alternative text (list of inlines) + | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target + | Image [Inline] Target -- ^ Image: alt text (list of inlines), target -- and target - | NoteRef String -- ^ Footnote or endnote reference + | Note [Block] -- ^ Footnote or endnote - reference (string), + -- text (list of blocks) deriving (Show, Eq, Read) diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/Entities.hs index eaa1cd158..e700398b1 100644 --- a/src/Text/Pandoc/Entities.hs +++ b/src/Text/Pandoc/Entities.hs @@ -32,8 +32,8 @@ module Text.Pandoc.Entities ( charToEntity, charToNumericalEntity, decodeEntities, - escapeSGMLChar, - escapeSGMLString, + escapeCharForXML, + escapeStringForXML, characterEntity ) where import Data.Char ( chr, ord ) @@ -49,11 +49,11 @@ charToEntity char = Map.findWithDefault (charToNumericalEntity char) char revers charToNumericalEntity :: Char -> String charToNumericalEntity ch = "&#" ++ show (ord ch) ++ ";" --- | Parse SGML character entity. +-- | Parse character entity. characterEntity :: GenParser Char st Char -characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "SGML entity" +characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "character entity" --- | Parse SGML character entity. +-- | Parse character entity. namedEntity :: GenParser Char st Char namedEntity = try $ do st <- char '&' @@ -62,7 +62,7 @@ namedEntity = try $ do let entity = "&" ++ body ++ ";" return $ Map.findWithDefault '?' entity entityTable --- | Parse SGML hexadecimal entity. +-- | Parse hexadecimal entity. hexEntity :: GenParser Char st Char hexEntity = try $ do st <- string "&#" @@ -71,7 +71,7 @@ hexEntity = try $ do end <- char ';' return $ chr $ read ('0':'x':body) --- | Parse SGML decimal entity. +-- | Parse decimal entity. decimalEntity :: GenParser Char st Char decimalEntity = try $ do st <- string "&#" @@ -79,9 +79,9 @@ decimalEntity = try $ do end <- char ';' return $ chr $ read body --- | Escape one character as needed for SGML. -escapeSGMLChar :: Char -> String -escapeSGMLChar x = +-- | Escape one character as needed for XML. +escapeCharForXML :: Char -> String +escapeCharForXML x = case x of '&' -> "&" '<' -> "<" @@ -94,13 +94,13 @@ escapeSGMLChar x = needsEscaping :: Char -> Bool needsEscaping c = c `elem` "&<>\"\160" --- | Escape string as needed for SGML. Entity references are not preserved. -escapeSGMLString :: String -> String -escapeSGMLString "" = "" -escapeSGMLString str = +-- | Escape string as needed for XML. Entity references are not preserved. +escapeStringForXML :: String -> String +escapeStringForXML "" = "" +escapeStringForXML str = case break needsEscaping str of (okay, "") -> okay - (okay, (c:cs)) -> okay ++ escapeSGMLChar c ++ escapeSGMLString cs + (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs -- | Convert entities in a string to characters. decodeEntities :: String -> String diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 96244e58f..803fc91c5 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -45,7 +45,7 @@ import Text.ParserCombinators.Pandoc import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Entities ( characterEntity, decodeEntities ) -import Maybe ( fromMaybe ) +import Data.Maybe ( fromMaybe ) import Data.List ( intersect, takeWhile, dropWhile ) import Data.Char ( toUpper, toLower, isAlphaNum ) @@ -267,9 +267,7 @@ parseHtml = do option "" (htmlEndTag "html") many anyChar -- ignore anything after </html> eof - state <- getState - let keyBlocks = stateKeyBlocks state - return (Pandoc (Meta title authors date) (blocks ++ (reverse keyBlocks))) + return (Pandoc (Meta title authors date) blocks) -- -- parsing blocks @@ -456,11 +454,7 @@ link = try $ do Nothing -> fail "no href" let title = fromMaybe "" (extractAttribute "title" attributes) label <- inlinesTilEnd "a" - state <- getState - ref <- if stateInlineLinks state - then return (Src url title) - else generateReference url title - return $ Link (normalizeSpaces label) ref + return $ Link (normalizeSpaces label) (url, title) image = try $ do (tag, attributes) <- htmlTag "img" @@ -469,8 +463,5 @@ image = try $ do Nothing -> fail "no src" let title = fromMaybe "" (extractAttribute "title" attributes) let alt = fromMaybe "" (extractAttribute "alt" attributes) - state <- getState - ref <- if stateInlineLinks state - then return (Src url title) - else generateReference url title - return $ Image [Str alt] ref + return $ Image [Str alt] (url, title) + diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 33c4a75ee..b0062ceff 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -37,8 +37,8 @@ import Text.ParserCombinators.Parsec import Text.ParserCombinators.Pandoc import Text.Pandoc.Definition import Text.Pandoc.Shared -import Maybe ( fromMaybe ) -import Char ( chr ) +import Data.Maybe ( fromMaybe ) +import Data.Char ( chr ) -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: ParserState -- ^ Parser state, including options for parser @@ -135,14 +135,11 @@ parseLaTeX = do spaces eof state <- getState - let keyBlocks = stateKeyBlocks state - let noteBlocks = stateNoteBlocks state let blocks' = filter (/= Null) blocks let title' = stateTitle state let authors' = stateAuthors state let date' = stateDate state - return (Pandoc (Meta title' authors' date') - (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks))) + return (Pandoc (Meta title' authors' date') blocks') -- -- parsing blocks @@ -618,15 +615,15 @@ link = try (do url <- manyTill anyChar (char '}') char '{' label <- manyTill inline (char '}') - return (Link (normalizeSpaces label) (Src url ""))) + return (Link (normalizeSpaces label) (url, ""))) image = try (do ("includegraphics", _, args) <- command let args' = filter isArg args -- filter out options let src = if null args' then - Src "" "" + ("", "") else - Src (stripFirstAndLast (head args')) "" + (stripFirstAndLast (head args'), "") return (Image [Str "image"] src)) footnote = try (do @@ -640,13 +637,7 @@ footnote = try (do setInput $ contents' blocks <- parseBlocks setInput rest - state <- getState - let notes = stateNoteBlocks state - let nextRef = case notes of - [] -> "1" - (Note ref body):rest -> (show ((read ref) + 1)) - setState (state { stateNoteBlocks = (Note nextRef blocks):notes }) - return (NoteRef nextRef)) + return (Note blocks)) -- | Parse any LaTeX command and return it in a raw TeX inline element. rawLaTeXInline :: GenParser Char ParserState Inline diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a2e84e8c2..353dd45dd 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect ) +import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup ) import Data.Char ( isAlphaNum ) import Text.ParserCombinators.Pandoc import Text.Pandoc.Definition @@ -160,28 +160,72 @@ titleBlock = try (do option "" blanklines return (title, author, date)) --- | Returns the number assigned to a Note block -numberOfNote :: Block -> Int -numberOfNote (Note ref _) = (read ref) -numberOfNote _ = 0 - parseMarkdown = do - updateState (\state -> state { stateParseRaw = True }) - -- need to parse raw HTML, since markdown allows it + updateState (\state -> state { stateParseRaw = True }) -- parse raw HTML: markdown allows it (title, author, date) <- option ([],[],"") titleBlock -- go through once just to get list of reference keys - keysUsed <- lookAhead $ (do {manyTill (referenceKey <|> (do{anyLine; return Null})) eof; - newState <- getState; - return $ stateKeysUsed newState}) - updateState (\st -> st { stateKeysUsed = keysUsed }) + refs <- manyTill (noteBlock <|> referenceKey <|> (do l <- lineClump + return (LineClump l))) eof + let keys = map (\(KeyBlock label target) -> (label, target)) $ + filter isKeyBlock refs + let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $ + filter isNoteBlock refs + let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs + setInput $ concat rawlines -- with note blocks and keys stripped out + updateState (\state -> state { stateKeys = keys, stateNotes = notes }) blocks <- parseBlocks -- go through again, for real let blocks' = filter (/= Null) blocks - state <- getState - let keys = reverse $ stateKeyBlocks state - let notes = reverse $ stateNoteBlocks state - let sortedNotes = sortBy (\x y -> compare (numberOfNote x) - (numberOfNote y)) notes - return (Pandoc (Meta title author date) (blocks' ++ sortedNotes ++ keys)) + return (Pandoc (Meta title author date) blocks') + +-- +-- initial pass for references +-- + +referenceKey = try $ do + nonindentSpaces + label <- reference + char labelSep + skipSpaces + option ' ' (char autoLinkStart) + src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars)) + option ' ' (char autoLinkEnd) + tit <- option "" title + blanklines + return $ KeyBlock label (removeTrailingSpace src, tit) + +noteMarker = try (do + char labelStart + char noteStart + manyTill (noneOf " \t\n") (char labelEnd)) + +rawLine = try (do + notFollowedBy' blankline + notFollowedBy' noteMarker + contents <- many1 nonEndline + end <- option "" (do + newline + option "" (try indentSpaces) + return "\n") + return (contents ++ end)) + +rawLines = do + lines <- many1 rawLine + return (concat lines) + +noteBlock = try $ do + failIfStrict + ref <- noteMarker + char ':' + option ' ' (try blankline) + option "" (try indentSpaces) + raw <- sepBy rawLines (try (do {blankline; indentSpaces})) + option "" blanklines + -- parse the extracted text, which may contain various block elements: + rest <- getInput + setInput $ (joinWithSep "\n" raw) ++ "\n\n" + contents <- parseBlocks + setInput rest + return (NoteBlock ref contents) -- -- parsing blocks @@ -189,9 +233,17 @@ parseMarkdown = do parseBlocks = manyTill block eof -block = choice [ header, table, codeBlock, note, referenceKey, hrule, list, - blockQuote, htmlBlock, rawLaTeXEnvironment', para, - plain, nullBlock ] <?> "block" +block = choice [ header + , table + , codeBlock + , hrule + , list + , blockQuote + , htmlBlock + , rawLaTeXEnvironment' + , para + , plain + , nullBlock ] <?> "block" -- -- header blocks @@ -262,45 +314,6 @@ codeBlock = do return (CodeBlock (stripTrailingNewlines result)) -- --- note block --- - -rawLine = try (do - notFollowedBy' blankline - notFollowedBy' noteMarker - contents <- many1 nonEndline - end <- option "" (do - newline - option "" (try indentSpaces) - return "\n") - return (contents ++ end)) - -rawLines = do - lines <- many1 rawLine - return (concat lines) - -note = try (do - failIfStrict - ref <- noteMarker - char ':' - skipSpaces - skipEndline - raw <- sepBy rawLines (try (do {blankline; indentSpaces})) - option "" blanklines - -- parse the extracted text, which may contain various block elements: - rest <- getInput - setInput $ (joinWithSep "\n" raw) ++ "\n\n" - contents <- parseBlocks - setInput rest - state <- getState - let identifiers = stateNoteIdentifiers state - case (findIndex (== ref) identifiers) of - Just n -> updateState (\s -> s {stateNoteBlocks = - (Note (show (n+1)) contents):(stateNoteBlocks s)}) - Nothing -> updateState id - return Null) - --- -- block quotes -- @@ -535,25 +548,6 @@ rawHtmlBlocks = try (do else combined return (RawHtml combined')) --- --- reference key --- - -referenceKey = try (do - nonindentSpaces - label <- reference - char labelSep - skipSpaces - option ' ' (char autoLinkStart) - src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars)) - option ' ' (char autoLinkEnd) - tit <- option "" title - blanklines - state <- getState - let keysUsed = stateKeysUsed state - setState state { stateKeysUsed = (label:keysUsed) } - return $ Key label (Src (removeTrailingSpace src) tit)) - -- -- LaTeX -- @@ -713,7 +707,7 @@ table = do inline = choice [ rawLaTeXInline' , escapedChar , entity - , noteRef + , note , inlineNote , link , referenceLink @@ -933,7 +927,7 @@ reference = try $ do return (normalizeSpaces label) -- source for a link, with optional title -source = try (do +source = try $ do char srcStart option ' ' (char autoLinkStart) src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners)) @@ -941,7 +935,7 @@ source = try (do tit <- option "" title skipSpaces char srcEnd - return (Src (removeTrailingSpace src) tit)) + return (removeTrailingSpace src, tit) titleWith startChar endChar = try (do skipSpaces @@ -965,30 +959,18 @@ explicitLink = try (do src <- source return (Link label src)) -referenceLink = choice [referenceLinkDouble, referenceLinkSingle] - --- a link like [this][ref] -referenceLinkDouble = try (do +-- a link like [this][ref] or [this][] or [this] +referenceLink = try $ do label <- reference - skipSpaces - option ' ' newline - skipSpaces - ref <- reference + ref <- option [] (try (do skipSpaces + option ' ' newline + skipSpaces + reference)) let ref' = if null ref then label else ref state <- getState - if ref' `elem` (stateKeysUsed state) - then return () - else fail "no corresponding key" - return (Link label (Ref ref'))) - --- a link like [this] -referenceLinkSingle = try (do - label <- reference - state <- getState - if label `elem` (stateKeysUsed state) - then return () - else fail "no corresponding key" - return (Link label (Ref label))) + case lookupKeySrc (stateKeys state) ref' of + Nothing -> fail "no corresponding key" + Just target -> return (Link label target) autoLink = autoLinkEmail <|> autoLinkRegular @@ -999,7 +981,7 @@ autoLinkEmail = try $ do domain <- sepBy1 (many1 (noneOf "/:.@<> \t\n")) (char '.') let src = name ++ "@" ++ (joinWithSep "." domain) char autoLinkEnd - return $ Link [Str src] (Src ("mailto:" ++ src) "") + return $ Link [Str src] (("mailto:" ++ src), "") -- a link <http://like.this.com> autoLinkRegular = try $ do @@ -1007,39 +989,28 @@ autoLinkRegular = try $ do prot <- oneOfStrings ["http:", "ftp:", "mailto:"] rest <- many1Till (noneOf " \t\n<>") (char autoLinkEnd) let src = prot ++ rest - return $ Link [Str src] (Src src "") + return $ Link [Str src] (src, "") image = try (do char imageStart (Link label src) <- link return (Image label src)) -noteMarker = try (do - char labelStart - char noteStart - manyTill (noneOf " \t\n") (char labelEnd)) - -noteRef = try (do +note = try $ do failIfStrict ref <- noteMarker state <- getState - let identifiers = (stateNoteIdentifiers state) ++ [ref] - setState state {stateNoteIdentifiers = identifiers} - return (NoteRef (show (length identifiers)))) + let notes = stateNotes state + case lookup ref notes of + Nothing -> fail "note not found" + Just contents -> return (Note contents) -inlineNote = try (do +inlineNote = try $ do failIfStrict char noteStart char labelStart contents <- manyTill inline (char labelEnd) - state <- getState - let identifiers = stateNoteIdentifiers state - let ref = show $ (length identifiers) + 1 - let noteBlocks = stateNoteBlocks state - setState state {stateNoteIdentifiers = (identifiers ++ [ref]), - stateNoteBlocks = - (Note ref [Para contents]):noteBlocks} - return (NoteRef ref)) + return (Note [Para contents]) rawLaTeXInline' = do failIfStrict diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index a3de0a2ea..d2143af38 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -37,8 +37,8 @@ import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag ) import Text.Regex ( matchRegex, mkRegex ) import Text.ParserCombinators.Parsec import Data.Maybe ( fromMaybe ) -import List ( findIndex ) -import Char ( toUpper ) +import Data.List ( findIndex, delete ) +import Data.Char ( toUpper ) -- | Parse reStructuredText string and return Pandoc document. readRST :: ParserState -> String -> Pandoc @@ -62,11 +62,7 @@ specialChars = "\\`|*_<>$:[-" -- parsing documents -- -isAnonKeyBlock block = case block of - (Key [Str "_"] str) -> True - otherwise -> False - -isNotAnonKeyBlock block = not (isAnonKeyBlock block) +isAnonKey (ref, src) = (ref == [Str "_"]) isHeader1 :: Block -> Bool isHeader1 (Header 1 _) = True @@ -101,20 +97,22 @@ titleTransform blocks = (blocks, []) parseRST = do -- first pass: get anonymous keys - keyBlocks <- lookAhead $ manyTill (anonymousKey <|> (do{anyLine; return Null})) eof - let anonymousKeys = filter (/= Null) keyBlocks - -- run parser again to fill in anonymous links... - updateState (\st -> st { stateKeyBlocks = anonymousKeys }) - state <- getState + refs <- manyTill (referenceKey <|> (do l <- lineClump + return (LineClump l))) eof + let keys = map (\(KeyBlock label target) -> (label, target)) $ + filter isKeyBlock refs + let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs + setInput $ concat rawlines -- with keys stripped out + updateState (\state -> state { stateKeys = keys }) blocks <- parseBlocks - let blocks' = filter isNotAnonKeyBlock blocks + let blocks' = filter (/= Null) blocks + state <- getState let (blocks'', title) = if stateStandalone state then titleTransform blocks' else (blocks', []) - state' <- getState - let authors = stateAuthors state' - let date = stateDate state' - let title' = if (null title) then (stateTitle state') else title + let authors = stateAuthors state + let date = stateDate state + let title' = if (null title) then (stateTitle state) else title return (Pandoc (Meta title' authors date) blocks'') -- @@ -124,7 +122,7 @@ parseRST = do parseBlocks = manyTill block eof block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, - referenceKey, imageBlock, unknownDirective, header, + imageBlock, unknownDirective, header, hrule, list, fieldList, lineBlock, para, plain, nullBlock ] <?> "block" @@ -221,7 +219,7 @@ plain = do imageBlock = try (do string ".. image:: " src <- manyTill anyChar newline - return (Plain [Image [Str "image"] (Src src "")])) + return (Plain [Image [Str "image"] (src, "")])) -- -- header blocks @@ -492,43 +490,43 @@ unknownDirective = try (do -- reference key -- -referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] +referenceKey = do + result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] + option "" blanklines + return result -imageKey = try (do +imageKey = try $ do string ".. |" ref <- manyTill inline (char '|') skipSpaces string "image::" src <- manyTill anyChar newline - return (Key (normalizeSpaces ref) - (Src (removeLeadingTrailingSpace src) ""))) + return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "") -anonymousKey = try (do +anonymousKey = try $ do oneOfStrings [".. __:", "__"] skipSpaces option ' ' newline src <- manyTill anyChar newline state <- getState - return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) ""))) + return $ KeyBlock [Str "_"] (removeLeadingTrailingSpace src, "") -regularKeyQuoted = try (do +regularKeyQuoted = try $ do string ".. _`" ref <- manyTill inline (char '`') char ':' skipSpaces option ' ' newline src <- manyTill anyChar newline - return (Key (normalizeSpaces ref) - (Src (removeLeadingTrailingSpace src) ""))) + return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "") -regularKey = try (do +regularKey = try $ do string ".. _" ref <- manyTill inline (char ':') skipSpaces option ' ' newline src <- manyTill anyChar newline - return (Key (normalizeSpaces ref) - (Src (removeLeadingTrailingSpace src) ""))) + return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "") -- -- inline @@ -577,7 +575,7 @@ tabchar = do return (Str "\t") str = do - notFollowedBy' oneWordReferenceLink + notFollowedBy' oneWordReference result <- many1 (noneOf (specialChars ++ "\t\n ")) return (Str result) @@ -596,46 +594,44 @@ endline = try (do -- links -- -link = choice [explicitLink, referenceLink, autoLink, - oneWordReferenceLink] <?> "link" +link = choice [explicitLink, referenceLink, autoLink] <?> "link" -explicitLink = try (do +explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` is marks start of inline code label <- manyTill inline (try (do {spaces; char '<'})) src <- manyTill (noneOf ">\n ") (char '>') skipSpaces string "`_" - return (Link (normalizeSpaces label) - (Src (removeLeadingTrailingSpace src) ""))) + return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "") -anonymousLinkEnding = try (do - char '_' - state <- getState - let anonKeys = stateKeyBlocks state - -- if there's a list of anon key refs (from previous pass), pop one off. - -- otherwise return an anon key ref for the next pass to take care of... - case anonKeys of - (Key [Str "_"] src):rest -> - do - setState (state { stateKeyBlocks = rest }) - return src - otherwise -> return (Ref [Str "_"])) - -referenceLink = try (do +reference = try $ do char '`' notFollowedBy (char '`') - label <- manyTill inline (char '`') + label <- many1Till inline (char '`') char '_' - src <- option (Ref []) anonymousLinkEnding - return (Link (normalizeSpaces label) src)) + return label -oneWordReferenceLink = try (do - label <- many1 alphaNum +oneWordReference = do + raw <- many1 alphaNum char '_' - src <- option (Ref []) anonymousLinkEnding notFollowedBy alphaNum -- because this_is_not a link - return (Link [Str label] src)) + return [Str raw] + +referenceLink = try $ do + label <- reference <|> oneWordReference + key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link + state <- getState + let keyTable = stateKeys state + src <- case lookupKeySrc keyTable key of + Nothing -> fail "no corresponding key" + Just target -> return target + -- if anonymous link, remove first anon key so it won't be used again + let keyTable' = if (key == [Str "_"]) -- anonymous link? + then delete ([Str "_"], src) keyTable -- remove first anon key + else keyTable + setState $ state { stateKeys = keyTable' } + return $ Link (normalizeSpaces label) src uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:", "news:", "telnet:" ] @@ -645,9 +641,9 @@ uri = try (do identifier <- many1 (noneOf " \t\n") return (scheme ++ identifier)) -autoURI = try (do +autoURI = try $ do src <- uri - return (Link [Str src] (Src src ""))) + return $ Link [Str src] (src, "") emailChar = alphaNum <|> oneOf "-+_." @@ -666,14 +662,20 @@ domain = try (do dom <- many1 (try (do{ char '.'; many1 domainChar })) return (joinWithSep "." (first:dom))) -autoEmail = try (do +autoEmail = try $ do src <- emailAddress - return (Link [Str src] (Src ("mailto:" ++ src) ""))) + return $ Link [Str src] ("mailto:" ++ src, "") autoLink = autoURI <|> autoEmail -- For now, we assume that all substitution references are for images. -image = try (do +image = try $ do char '|' ref <- manyTill inline (char '|') - return (Image (normalizeSpaces ref) (Ref ref))) + state <- getState + let keyTable = stateKeys state + src <- case lookupKeySrc keyTable ref of + Nothing -> fail "no corresponding key" + Just target -> return target + return (Image (normalizeSpaces ref) src) + diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 526263c4a..afb75e4c5 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -45,6 +45,10 @@ module Text.Pandoc.Shared ( -- * Parsing readWith, testStringWith, + Reference (..), + isNoteBlock, + isKeyBlock, + isLineClump, HeaderType (..), ParserContext (..), QuoteContext (..), @@ -53,27 +57,19 @@ module Text.Pandoc.Shared ( -- * Native format prettyprinting prettyPandoc, -- * Pandoc block list processing - isNoteBlock, normalizeSpaces, compactify, - generateReference, + -- * Writer options WriterOptions (..), defaultWriterOptions, + -- * Reference key lookup functions KeyTable, - keyTable, lookupKeySrc, refsMatch, - replaceReferenceLinks, - replaceRefLinksBlockList, - -- * SGML - inTags, - selfClosingTag, - inTagsSimple, - inTagsIndented ) where import Text.Pandoc.Definition import Text.ParserCombinators.Parsec as Parsec -import Text.Pandoc.Entities ( decodeEntities, escapeSGMLString ) +import Text.Pandoc.Entities ( decodeEntities, escapeStringForXML ) import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>), ($$), nest, Doc, isEmpty ) import Data.Char ( toLower, ord ) @@ -113,16 +109,37 @@ data QuoteContext | NoQuote -- ^ Used when we're not parsing inside quotes deriving (Eq, Show) +type KeyTable = [([Inline], Target)] + +type NoteTable = [(String, [Block])] + +-- | References from preliminary parsing +data Reference + = KeyBlock [Inline] Target -- ^ Key for reference-style link (label URL title) + | NoteBlock String [Block] -- ^ Footnote reference and contents + | LineClump String -- ^ Raw clump of lines with blanks at end + deriving (Eq, Read, Show) + +-- | Auxiliary functions used in preliminary parsing +isNoteBlock :: Reference -> Bool +isNoteBlock (NoteBlock _ _) = True +isNoteBlock _ = False + +isKeyBlock :: Reference -> Bool +isKeyBlock (KeyBlock _ _) = True +isKeyBlock _ = False + +isLineClump :: Reference -> Bool +isLineClump (LineClump _) = True +isLineClump _ = False + data ParserState = ParserState { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML -- and LaTeX? stateParserContext :: ParserContext, -- ^ What are we parsing? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateKeyBlocks :: [Block], -- ^ List of reference key blocks - stateKeysUsed :: [[Inline]], -- ^ List of references used - stateNoteBlocks :: [Block], -- ^ List of note blocks - stateNoteIdentifiers :: [String], -- ^ List of footnote identifiers - -- in the order encountered + stateKeys :: KeyTable, -- ^ List of reference keys + stateNotes :: NoteTable, -- ^ List of notes stateTabStop :: Int, -- ^ Tab stop stateStandalone :: Bool, -- ^ If @True@, parse -- bibliographic info @@ -133,7 +150,6 @@ data ParserState = ParserState stateSmart :: Bool, -- ^ Use smart typography stateColumns :: Int, -- ^ Number of columns in -- terminal (used for tables) - stateInlineLinks :: Bool, -- ^ Parse html links as inline stateHeaderTable :: [HeaderType] -- ^ List of header types used, -- in what order (rst only) } @@ -144,10 +160,8 @@ defaultParserState = ParserState { stateParseRaw = False, stateParserContext = NullState, stateQuoteContext = NoQuote, - stateKeyBlocks = [], - stateKeysUsed = [], - stateNoteBlocks = [], - stateNoteIdentifiers = [], + stateKeys = [], + stateNotes = [], stateTabStop = 4, stateStandalone = False, stateTitle = [], @@ -156,7 +170,6 @@ defaultParserState = stateStrict = False, stateSmart = False, stateColumns = 80, - stateInlineLinks = False, stateHeaderTable = [] } -- | Indent string as a block. @@ -182,8 +195,6 @@ prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ prettyBlock :: Block -> String prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ (prettyBlockList 2 blocks) -prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++ - (prettyBlockList 2 blocks) prettyBlock (OrderedList blockLists) = "OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" @@ -236,11 +247,6 @@ endsWith :: Char -> [Char] -> Bool endsWith char [] = False endsWith char str = (char == last str) --- | Returns @True@ if block is a @Note@ block -isNoteBlock :: Block -> Bool -isNoteBlock (Note ref blocks) = True -isNoteBlock _ = False - -- | Joins a list of lists, separated by another list. joinWithSep :: [a] -- ^ List to use as separator -> [[a]] -- ^ Lists to join @@ -351,9 +357,9 @@ data WriterOptions = WriterOptions , writerIncremental :: Bool -- ^ Incremental S5 lists , writerNumberSections :: Bool -- ^ Number sections in LaTeX , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax + , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , writerTabStop :: Int -- ^ Tabstop for conversion between -- spaces and tabs - , writerNotes :: [Block] -- ^ List of note blocks } deriving Show -- | Default writer options. @@ -362,79 +368,18 @@ defaultWriterOptions = writerHeader = "", writerTitlePrefix = "", writerTabStop = 4, - writerNotes = [], writerS5 = False, writerIncremental = False, writerNumberSections = False, writerIncludeBefore = "", writerIncludeAfter = "", - writerStrictMarkdown = False } - --- --- Functions for constructing lists of reference keys --- - --- | Returns @Just@ numerical key reference if there's already a key --- for the specified target in the list of blocks, otherwise @Nothing@. -keyFoundIn :: [Block] -- ^ List of key blocks to search - -> Target -- ^ Target to search for - -> Maybe String -keyFoundIn [] src = Nothing -keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src) - then Just num - else keyFoundIn rest src -keyFoundIn (_:rest) src = keyFoundIn rest src - --- | Return next unique numerical key, given keyList -nextUniqueKey :: [[Inline]] -> String -nextUniqueKey keys = - let nums = [1..10000] - notAKey n = not (any (== [Str (show n)]) keys) in - case (find notAKey nums) of - Just x -> show x - Nothing -> error "Could not find unique key for reference link" - --- | Generate a reference for a URL (either an existing reference, if --- there is one, or a new one, if there isn't) and update parser state. -generateReference :: String -- ^ URL - -> String -- ^ Title - -> GenParser tok ParserState Target -generateReference url title = do - let src = Src (decodeEntities url) (decodeEntities title) - state <- getState - let keyBlocks = stateKeyBlocks state - let keysUsed = stateKeysUsed state - case (keyFoundIn keyBlocks src) of - Just num -> return (Ref [Str num]) - Nothing -> do - let nextNum = nextUniqueKey keysUsed - updateState (\st -> st { stateKeyBlocks = - (Key [Str nextNum] src):keyBlocks, - stateKeysUsed = - [Str nextNum]:keysUsed }) - return (Ref [Str nextNum]) + writerStrictMarkdown = False, + writerReferenceLinks = False } -- --- code to replace reference links with real links and remove unneeded key blocks +-- code to lookup reference keys in key table -- -type KeyTable = [([Inline], Target)] - --- | Returns @True@ if block is a Key block -isRefBlock :: Block -> Bool -isRefBlock (Key _ _) = True -isRefBlock _ = False - --- | Returns a pair of a list of pairs of keys and associated sources, and a new --- list of blocks with the included key blocks deleted. -keyTable :: [Block] -> (KeyTable, [Block]) -keyTable [] = ([],[]) -keyTable ((Key ref target):lst) = (((ref, target):table), rest) - where (table, rest) = keyTable lst -keyTable (Null:lst) = keyTable lst -- get rid of Nulls -keyTable (other:lst) = (table, (other:rest)) - where (table, rest) = keyTable lst - -- | Look up key in key table and return target object. lookupKeySrc :: KeyTable -- ^ Key table -> [Inline] -- ^ Key @@ -455,8 +400,6 @@ refsMatch ((TeX x):restx) ((TeX y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((NoteRef x):restx) ((NoteRef y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((Emph x):restx) ((Emph y):resty) = refsMatch x y && refsMatch restx resty refsMatch ((Strong x):restx) ((Strong y):resty) = @@ -467,95 +410,3 @@ refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty refsMatch [] x = null x refsMatch x [] = null x --- | Replace reference links with explicit links in list of blocks, --- removing key blocks. -replaceReferenceLinks :: [Block] -> [Block] -replaceReferenceLinks blocks = - let (keytable, purged) = keyTable blocks in - replaceRefLinksBlockList keytable purged - --- | Use key table to replace reference links with explicit links in a list --- of blocks -replaceRefLinksBlockList :: KeyTable -> [Block] -> [Block] -replaceRefLinksBlockList keytable lst = - map (replaceRefLinksBlock keytable) lst - --- | Use key table to replace reference links with explicit links in a block -replaceRefLinksBlock :: KeyTable -> Block -> Block -replaceRefLinksBlock keytable (Plain lst) = - Plain (map (replaceRefLinksInline keytable) lst) -replaceRefLinksBlock keytable (Para lst) = - Para (map (replaceRefLinksInline keytable) lst) -replaceRefLinksBlock keytable (Header lvl lst) = - Header lvl (map (replaceRefLinksInline keytable) lst) -replaceRefLinksBlock keytable (BlockQuote lst) = - BlockQuote (map (replaceRefLinksBlock keytable) lst) -replaceRefLinksBlock keytable (Note ref lst) = - Note ref (map (replaceRefLinksBlock keytable) lst) -replaceRefLinksBlock keytable (OrderedList lst) = - OrderedList (map (replaceRefLinksBlockList keytable) lst) -replaceRefLinksBlock keytable (BulletList lst) = - BulletList (map (replaceRefLinksBlockList keytable) lst) -replaceRefLinksBlock keytable (DefinitionList lst) = - DefinitionList (map (\(term, def) -> - (map (replaceRefLinksInline keytable) term, - replaceRefLinksBlockList keytable def)) lst) -replaceRefLinksBlock keytable (Table caption alignment widths headers rows) = - Table (map (replaceRefLinksInline keytable) caption) alignment widths - (map (replaceRefLinksBlockList keytable) headers) - (map (map (replaceRefLinksBlockList keytable)) rows) -replaceRefLinksBlock keytable other = other - --- | Use key table to replace reference links with explicit links in an --- inline element. -replaceRefLinksInline :: KeyTable -> Inline -> Inline -replaceRefLinksInline keytable (Link text (Ref ref)) = (Link newText newRef) - where newRef = case lookupKeySrc keytable - (if (null ref) then text else ref) of - Nothing -> (Ref ref) - Just src -> src - newText = map (replaceRefLinksInline keytable) text -replaceRefLinksInline keytable (Image text (Ref ref)) = (Image newText newRef) - where newRef = case lookupKeySrc keytable - (if (null ref) then text else ref) of - Nothing -> (Ref ref) - Just src -> src - newText = map (replaceRefLinksInline keytable) text -replaceRefLinksInline keytable (Emph lst) = - Emph (map (replaceRefLinksInline keytable) lst) -replaceRefLinksInline keytable (Strong lst) = - Strong (map (replaceRefLinksInline keytable) lst) -replaceRefLinksInline keytable (Quoted t lst) = - Quoted t (map (replaceRefLinksInline keytable) lst) -replaceRefLinksInline keytable other = other - --- | Return a text object with a string of formatted SGML attributes. -attributeList :: [(String, String)] -> Doc -attributeList = text . concatMap - (\(a, b) -> " " ++ escapeSGMLString a ++ "=\"" ++ - escapeSGMLString b ++ "\"") - --- | Put the supplied contents between start and end tags of tagType, --- with specified attributes and (if specified) indentation. -inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc -inTags isIndented tagType attribs contents = - let openTag = PP.char '<' <> text tagType <> attributeList attribs <> - PP.char '>' - closeTag = text "</" <> text tagType <> PP.char '>' in - if isIndented - then openTag $$ nest 2 contents $$ closeTag - else openTag <> contents <> closeTag - --- | Return a self-closing tag of tagType with specified attributes -selfClosingTag :: String -> [(String, String)] -> Doc -selfClosingTag tagType attribs = - PP.char '<' <> text tagType <> attributeList attribs <> text " />" - --- | Put the supplied contents between start and end tags of tagType. -inTagsSimple :: String -> Doc -> Doc -inTagsSimple tagType = inTags False tagType [] - --- | Put the supplied contents in indented block btw start and end tags. -inTagsIndented :: String -> Doc -> Doc -inTagsIndented tagType = inTags True tagType [] - diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 87eba9ad0..9fce1c061 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -27,16 +27,53 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to Docbook XML. -} -module Text.Pandoc.Writers.Docbook ( - writeDocbook - ) where +module Text.Pandoc.Writers.Docbook ( writeDocbook) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( escapeSGMLString ) +import Text.Pandoc.Entities ( escapeStringForXML ) import Data.Char ( toLower, ord ) import Data.List ( isPrefixOf, partition, drop ) import Text.PrettyPrint.HughesPJ hiding ( Str ) + +-- +-- code to format XML +-- + +-- | Return a text object with a string of formatted XML attributes. +attributeList :: [(String, String)] -> Doc +attributeList = text . concatMap + (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++ + escapeStringForXML b ++ "\"") + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes and (if specified) indentation. +inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc +inTags isIndented tagType attribs contents = + let openTag = char '<' <> text tagType <> attributeList attribs <> + char '>' + closeTag = text "</" <> text tagType <> char '>' in + if isIndented + then openTag $$ nest 2 contents $$ closeTag + else openTag <> contents <> closeTag + +-- | Return a self-closing tag of tagType with specified attributes +selfClosingTag :: String -> [(String, String)] -> Doc +selfClosingTag tagType attribs = + char '<' <> text tagType <> attributeList attribs <> text " />" + +-- | Put the supplied contents between start and end tags of tagType. +inTagsSimple :: String -> Doc -> Doc +inTagsSimple tagType = inTags False tagType [] + +-- | Put the supplied contents in indented block btw start and end tags. +inTagsIndented :: String -> Doc -> Doc +inTagsIndented tagType = inTags True tagType [] + +-- +-- Docbook writer +-- + -- | Data structure for defining hierarchical Pandoc documents data Element = Blk Block | Sec [Inline] [Element] deriving (Eq, Read, Show) @@ -64,8 +101,8 @@ authorToDocbook name = inTagsIndented "author" $ then -- last name first let (lastname, rest) = break (==',') name firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ escapeSGMLString firstname) <> - inTagsSimple "surname" (text $ escapeSGMLString lastname) + inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> + inTagsSimple "surname" (text $ escapeStringForXML lastname) else -- last name last let namewords = words name lengthname = length namewords @@ -73,8 +110,8 @@ authorToDocbook name = inTagsIndented "author" $ 0 -> ("","") 1 -> ("", name) n -> (joinWithSep " " (take (n-1) namewords), last namewords) in - inTagsSimple "firstname" (text $ escapeSGMLString firstname) $$ - inTagsSimple "surname" (text $ escapeSGMLString lastname) + inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (text $ escapeStringForXML lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String @@ -86,18 +123,15 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) = then inTagsIndented "articleinfo" $ (inTagsSimple "title" (wrap opts title)) $$ (vcat (map authorToDocbook authors)) $$ - (inTagsSimple "date" (text $ escapeSGMLString date)) + (inTagsSimple "date" (text $ escapeStringForXML date)) else empty - blocks' = replaceReferenceLinks blocks - (noteBlocks, blocks'') = partition isNoteBlock blocks' - opts' = opts {writerNotes = noteBlocks} - elements = hierarchicalize blocks'' - before = writerIncludeBefore opts' - after = writerIncludeAfter opts' + elements = hierarchicalize blocks + before = writerIncludeBefore opts + after = writerIncludeAfter opts body = (if null before then empty else text before) $$ - vcat (map (elementToDocbook opts') elements) $$ + vcat (map (elementToDocbook opts) elements) $$ (if null after then empty else text after) - body' = if writerStandalone opts' + body' = if writerStandalone opts then inTagsIndented "article" (meta $$ body) else body in render $ head $$ body' $$ text "" @@ -140,15 +174,13 @@ blockToDocbook opts (Para lst) = blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" (blocksToDocbook opts blocks) blockToDocbook opts (CodeBlock str) = - text "<screen>\n" <> text (escapeSGMLString str) <> text "\n</screen>" + text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>" blockToDocbook opts (BulletList lst) = inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst blockToDocbook opts (OrderedList lst) = inTagsIndented "orderedlist" $ listItemsToDocbook opts lst blockToDocbook opts (RawHtml str) = text str -- raw XML block blockToDocbook opts HorizontalRule = empty -- not semantic -blockToDocbook opts (Note _ _) = empty -- shouldn't occur -blockToDocbook opts (Key _ _) = empty -- shouldn't occur blockToDocbook opts (Table caption aligns widths headers rows) = let alignStrings = map alignmentToString aligns captionDoc = if null caption @@ -197,7 +229,7 @@ inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst) -- | Convert an inline element to Docbook. inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook opts (Str str) = text $ escapeSGMLString str +inlineToDocbook opts (Str str) = text $ escapeStringForXML str inlineToDocbook opts (Emph lst) = inTagsSimple "emphasis" (inlinesToDocbook opts lst) inlineToDocbook opts (Strong lst) = @@ -210,31 +242,24 @@ inlineToDocbook opts Ellipses = text "…" inlineToDocbook opts EmDash = text "—" inlineToDocbook opts EnDash = text "–" inlineToDocbook opts (Code str) = - inTagsSimple "literal" $ text (escapeSGMLString str) + inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str) inlineToDocbook opts (HtmlInline str) = empty inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>" inlineToDocbook opts Space = char ' ' -inlineToDocbook opts (Link txt (Src src tit)) = +inlineToDocbook opts (Link txt (src, tit)) = if isPrefixOf "mailto:" src - then inTagsSimple "email" $ text (escapeSGMLString $ drop 7 src) + then inTagsSimple "email" $ text (escapeStringForXML $ drop 7 src) else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt -inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur -inlineToDocbook opts (Image alt (Src src tit)) = +inlineToDocbook opts (Image alt (src, tit)) = let titleDoc = if null tit then empty else inTagsIndented "objectinfo" $ inTagsIndented "title" - (text $ escapeSGMLString tit) in + (text $ escapeStringForXML tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] -inlineToDocbook opts (Image alternate (Ref ref)) = empty --shouldn't occur -inlineToDocbook opts (NoteRef ref) = - let notes = writerNotes opts - hits = filter (\(Note r _) -> r == ref) notes in - if null hits - then empty - else let (Note _ contents) = head hits in - inTagsIndented "footnote" $ blocksToDocbook opts contents +inlineToDocbook opts (Note contents) = + inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index be5eb8506..f6fc0741e 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -35,8 +35,11 @@ import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, partition ) +import Control.Monad.State import Text.XHtml.Strict +type Notes = [Html] + -- | Convert Pandoc document to Html string. writeHtmlString :: WriterOptions -> Pandoc -> String writeHtmlString opts = @@ -48,13 +51,10 @@ writeHtmlString opts = writeHtml :: WriterOptions -> Pandoc -> Html writeHtml opts (Pandoc (Meta tit authors date) blocks) = let titlePrefix = writerTitlePrefix opts - topTitle = inlineListToHtml opts tit - topTitle' = if not (null titlePrefix) - then stringToHtml titlePrefix +++ - if not (null tit) - then '-' +++ topTitle - else noHtml - else topTitle + topTitle = evalState (inlineListToHtml opts tit) [] + topTitle' = if null titlePrefix + then topTitle + else titlePrefix +++ " - " +++ topTitle head = header $ thetitle topTitle' +++ meta ! [httpequiv "Content-Type", content "text/html; charset=UTF-8"] +++ @@ -69,31 +69,30 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) = (not (writerS5 opts)) then h1 ! [theclass "title"] $ topTitle else noHtml - blocks' = replaceReferenceLinks blocks - (noteBlocks, blocks'') = partition isNoteBlock blocks' + (blocks', revnotes) = runState (blockListToHtml opts blocks) [] + notes = reverse revnotes before = primHtml $ writerIncludeBefore opts after = primHtml $ writerIncludeAfter opts - thebody = before +++ titleHeader +++ - toHtmlFromList (map (blockToHtml opts) blocks'') +++ - footnoteSection opts noteBlocks +++ after + thebody = before +++ titleHeader +++ blocks' +++ + footnoteSection opts notes +++ after in if writerStandalone opts then head +++ (body thebody) else thebody -- | Convert list of Note blocks to a footnote <div>. -- Assumes notes are sorted. -footnoteSection :: WriterOptions -> [Block] -> Html +footnoteSection :: WriterOptions -> Notes -> Html footnoteSection opts notes = if null notes - then noHtml - else thediv ! [theclass "footnotes"] $ - hr +++ (olist $ toHtmlFromList $ map (blockToHtml opts) notes) + then noHtml + else thediv ! [theclass "footnotes"] $ + hr +++ (olist << notes) -- | Obfuscate a "mailto:" link using Javascript. -obfuscateLink :: WriterOptions -> [Inline] -> String -> Html -obfuscateLink opts txt src = +obfuscateLink :: WriterOptions -> Html -> String -> Html +obfuscateLink opts text src = let emailRegex = mkRegex "mailto:*([^@]*)@(.*)" - text' = show $ inlineListToHtml opts txt + text' = show $ text src' = map toLower src in case (matchRegex emailRegex src') of (Just [name, domain]) -> @@ -117,7 +116,7 @@ obfuscateLink opts txt src = "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ noscript (primHtml $ obfuscateString altText) - _ -> anchor ! [href src] $ inlineListToHtml opts txt -- malformed email + _ -> anchor ! [href src] $ text -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String @@ -131,137 +130,153 @@ obfuscateString :: String -> String obfuscateString = (concatMap obfuscateChar) . decodeEntities -- | Convert Pandoc block element to HTML. -blockToHtml :: WriterOptions -> Block -> Html -blockToHtml opts Null = noHtml -blockToHtml opts (Plain lst) = inlineListToHtml opts lst -blockToHtml opts (Para lst) = paragraph $ inlineListToHtml opts lst -blockToHtml opts (BlockQuote blocks) = - if (writerS5 opts) - then -- in S5, treat list in blockquote specially - -- if default is incremental, make it nonincremental; - -- otherwise incremental - let inc = not (writerIncremental opts) in - case blocks of - [BulletList lst] -> blockToHtml (opts {writerIncremental = - inc}) (BulletList lst) - [OrderedList lst] -> blockToHtml (opts {writerIncremental = - inc}) (OrderedList lst) - otherwise -> blockquote $ toHtmlFromList $ - map (blockToHtml opts) blocks - else blockquote $ toHtmlFromList $ map (blockToHtml opts) blocks -blockToHtml opts (Note ref lst) = - let contents = toHtmlFromList $ map (blockToHtml opts) lst - backlink = anchor ! [href ("#fnref" ++ ref), theclass "footnoteBacklink", - title ("Jump back to footnote " ++ ref)] $ - (primHtmlChar "#8617") in - li ! [identifier ("fn" ++ ref)] $ contents +++ backlink -blockToHtml opts (Key _ _) = noHtml -blockToHtml opts (CodeBlock str) = - pre $ thecode << (str ++ "\n") -- the final \n for consistency with Markdown.pl -blockToHtml opts (RawHtml str) = primHtml str -blockToHtml opts (BulletList lst) = - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] in - unordList ! attribs $ map (blockListToHtml opts) lst -blockToHtml opts (OrderedList lst) = - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] in - ordList ! attribs $ map (blockListToHtml opts) lst -blockToHtml opts (DefinitionList lst) = - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] in - defList ! attribs $ map (\(term, def) -> (inlineListToHtml opts term, - blockListToHtml opts def)) lst -blockToHtml opts HorizontalRule = hr -blockToHtml opts (Header level lst) = - let contents = inlineListToHtml opts lst in - case level of - 1 -> h1 contents - 2 -> h2 contents - 3 -> h3 contents - 4 -> h4 contents - 5 -> h5 contents - 6 -> h6 contents - _ -> paragraph contents -blockToHtml opts (Table capt aligns widths headers rows) = - let alignStrings = map alignmentToString aligns - captionDoc = if null capt - then noHtml - else caption $ inlineListToHtml opts capt in - table $ captionDoc +++ - (colHeadsToHtml opts alignStrings widths headers) +++ - (toHtmlFromList $ map (tableRowToHtml opts alignStrings) rows) +blockToHtml :: WriterOptions -> Block -> State Notes Html +blockToHtml opts block = + case block of + (Null) -> return $ noHtml + (Plain lst) -> inlineListToHtml opts lst + (Para lst) -> inlineListToHtml opts lst >>= (return . paragraph) + (RawHtml str) -> return $ primHtml str + (HorizontalRule) -> return $ hr + (CodeBlock str) -> return $ pre $ thecode << (str ++ "\n") + -- the final \n for consistency with Markdown.pl + (BlockQuote blocks) -> -- in S5, treat list in blockquote specially + -- if default is incremental, make it nonincremental; + -- otherwise incremental + if writerS5 opts + then let inc = not (writerIncremental opts) in + case blocks of + [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) + (BulletList lst) + [OrderedList lst] -> blockToHtml (opts {writerIncremental = inc}) + (OrderedList lst) + otherwise -> blockListToHtml opts blocks >>= + (return . blockquote) + else blockListToHtml opts blocks >>= (return . blockquote) + (Header level lst) -> do contents <- inlineListToHtml opts lst + return $ case level of + 1 -> h1 contents + 2 -> h2 contents + 3 -> h3 contents + 4 -> h4 contents + 5 -> h5 contents + 6 -> h6 contents + _ -> paragraph contents + (BulletList lst) -> do contents <- mapM (blockListToHtml opts) lst + let attribs = if writerIncremental opts + then [theclass "incremental"] + else [] + return $ unordList ! attribs $ contents + (OrderedList lst) -> do contents <- mapM (blockListToHtml opts) lst + let attribs = if writerIncremental opts + then [theclass "incremental"] + else [] + return $ ordList ! attribs $ contents + (DefinitionList lst) -> do contents <- mapM (\(term, def) -> + do term' <- inlineListToHtml opts term + def' <- blockListToHtml opts def + return $ (term', def')) + lst + let attribs = if writerIncremental opts + then [theclass "incremental"] + else [] + return $ defList ! attribs $ contents + (Table capt aligns widths headers rows) -> + do let alignStrings = map alignmentToString aligns + captionDoc <- if null capt + then return noHtml + else inlineListToHtml opts capt >>= + (return . caption) + colHeads <- colHeadsToHtml opts alignStrings + widths headers + rows' <- mapM (tableRowToHtml opts alignStrings) rows + return $ table $ captionDoc +++ colHeads +++ rows' colHeadsToHtml opts alignStrings widths headers = - let heads = zipWith3 - (\align width item -> tableItemToHtml opts th align width item) - alignStrings widths headers in - tr $ toHtmlFromList heads + do heads <- sequence $ zipWith3 + (\align width item -> tableItemToHtml opts th align width item) + alignStrings widths headers + return $ tr $ toHtmlFromList heads alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" AlignDefault -> "left" + tableRowToHtml opts aligns cols = - tr $ toHtmlFromList $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols + do contents <- sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols + return $ tr $ toHtmlFromList contents tableItemToHtml opts tag align' width item = - let attrib = [align align'] ++ - if (width /= 0) - then [thestyle ("{width: " ++ show (truncate (100*width)) ++ "%;}")] - else [] in - tag ! attrib $ toHtmlFromList $ map (blockToHtml opts) item + do contents <- blockListToHtml opts item + let attrib = [align align'] ++ + if (width /= 0) + then [thestyle ("{width: " ++ show (truncate (100*width)) ++ "%;}")] + else [] + return $ tag ! attrib $ contents -blockListToHtml :: WriterOptions -> [Block] -> Html -blockListToHtml opts list = - toHtmlFromList $ map (blockToHtml opts) list +blockListToHtml :: WriterOptions -> [Block] -> State Notes Html +blockListToHtml opts lst = mapM (blockToHtml opts) lst >>= (return . toHtmlFromList) -- | Convert list of Pandoc inline elements to HTML. -inlineListToHtml :: WriterOptions -> [Inline] -> Html -inlineListToHtml opts lst = toHtmlFromList $ map (inlineToHtml opts) lst +inlineListToHtml :: WriterOptions -> [Inline] -> State Notes Html +inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= (return . toHtmlFromList) -- | Convert Pandoc inline element to HTML. -inlineToHtml :: WriterOptions -> Inline -> Html -inlineToHtml opts (Emph lst) = - emphasize $ inlineListToHtml opts lst -inlineToHtml opts (Strong lst) = - strong $ inlineListToHtml opts lst -inlineToHtml opts (Code str) = - thecode << str -inlineToHtml opts (Quoted SingleQuote lst) = - primHtmlChar "lsquo" +++ inlineListToHtml opts lst +++ primHtmlChar "rsquo" -inlineToHtml opts (Quoted DoubleQuote lst) = - primHtmlChar "ldquo" +++ inlineListToHtml opts lst +++ primHtmlChar "rdquo" -inlineToHtml opts EmDash = primHtmlChar "mdash" -inlineToHtml opts EnDash = primHtmlChar "ndash" -inlineToHtml opts Ellipses = primHtmlChar "hellip" -inlineToHtml opts Apostrophe = primHtmlChar "rsquo" -inlineToHtml opts (Str str) = stringToHtml str -inlineToHtml opts (TeX str) = stringToHtml str -inlineToHtml opts (HtmlInline str) = primHtml str -inlineToHtml opts (LineBreak) = br -inlineToHtml opts Space = stringToHtml " " -inlineToHtml opts (Link txt (Src src tit)) = - if (isPrefixOf "mailto:" src) - then obfuscateLink opts txt src - else anchor ! ([href src] ++ if null tit then [] else [title tit]) $ - inlineListToHtml opts txt -inlineToHtml opts (Link txt (Ref ref)) = - '[' +++ (inlineListToHtml opts txt) +++ - ']' +++ '[' +++ (inlineListToHtml opts ref) +++ - ']' - -- this is what markdown does, for better or worse -inlineToHtml opts (Image alttext (Src source tit)) = - let alternate = renderHtmlFragment $ inlineListToHtml opts alttext in - image ! ([src source, title tit] ++ if null alttext then [] else [alt alternate]) - -- note: null title is included, as in Markdown.pl -inlineToHtml opts (Image alternate (Ref ref)) = - '!' +++ inlineToHtml opts (Link alternate (Ref ref)) -inlineToHtml opts (NoteRef ref) = - anchor ! [href ("#fn" ++ ref), theclass "footnoteRef", identifier ("fnref" ++ ref)] << - sup << ref +inlineToHtml :: WriterOptions -> Inline -> State Notes Html +inlineToHtml opts inline = + case inline of + (Str str) -> return $ stringToHtml str + (Space) -> return $ stringToHtml " " + (LineBreak) -> return $ br + (EmDash) -> return $ primHtmlChar "mdash" + (EnDash) -> return $ primHtmlChar "ndash" + (Ellipses) -> return $ primHtmlChar "hellip" + (Apostrophe) -> return $ primHtmlChar "rsquo" + (Emph lst) -> inlineListToHtml opts lst >>= (return . emphasize) + (Strong lst) -> inlineListToHtml opts lst >>= (return . strong) + (Code str) -> return $ thecode << str + (Quoted quoteType lst) -> + let (leftQuote, rightQuote) = case quoteType of + SingleQuote -> (primHtmlChar "lsquo", + primHtmlChar "rsquo") + DoubleQuote -> (primHtmlChar "ldquo", + primHtmlChar "rdquo") in + do contents <- inlineListToHtml opts lst + return $ leftQuote +++ contents +++ rightQuote + (TeX str) -> return $ stringToHtml str + (HtmlInline str) -> return $ primHtml str + (Link txt (src,tit)) -> + do linkText <- inlineListToHtml opts txt + return $ if (isPrefixOf "mailto:" src) + then obfuscateLink opts linkText src + else anchor ! ([href src] ++ + if null tit + then [] + else [title tit]) $ + linkText + (Image txt (source,tit)) -> + do alternate <- inlineListToHtml opts txt + let alternate' = renderHtmlFragment alternate + let attributes = [src source, title tit] ++ + if null txt then [] else [alt alternate'] + return $ image ! attributes + -- note: null title included, as in Markdown.pl + (Note contents) -> do notes <- get + let number = (length notes) + 1 + let ref = show number + htmlContents <- blockListToNote opts ref contents + modify (htmlContents:) -- push contents onto front of notes + return $ anchor ! [href ("#fn" ++ ref), + theclass "footnoteRef", + identifier ("fnref" ++ ref)] << sup << ref + +blockListToNote :: WriterOptions -> String -> [Block] -> State Notes Html +blockListToNote opts ref blocks = + do contents <- blockListToHtml opts blocks + let backlink = anchor ! [href ("#fnref" ++ ref), theclass "footnoteBacklink", + title ("Jump back to footnote " ++ ref)] $ + (primHtmlChar "#8617") + return $ li ! [identifier ("fn" ++ ref)] $ contents +++ backlink diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index de1b7e207..8a9cacba3 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -33,31 +33,28 @@ module Text.Pandoc.Writers.LaTeX ( import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Printf ( printf ) -import List ( (\\) ) +import Data.List ( (\\) ) -- | Convert Pandoc to LaTeX. writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options (Pandoc meta blocks) = - let notes = filter isNoteBlock blocks in -- assumes all notes at outer level let body = (writerIncludeBefore options) ++ - (concatMap (blockToLaTeX notes) - (replaceReferenceLinks blocks)) ++ + (concatMap blockToLaTeX blocks) ++ (writerIncludeAfter options) in let head = if writerStandalone options - then latexHeader notes options meta + then latexHeader options meta else "" in let foot = if writerStandalone options then "\n\\end{document}\n" else "" in head ++ body ++ foot -- | Insert bibliographic information into LaTeX header. -latexHeader :: [Block] -- ^ List of note blocks to use in resolving note refs - -> WriterOptions -- ^ Options, including LaTeX header +latexHeader :: WriterOptions -- ^ Options, including LaTeX header -> Meta -- ^ Meta with bibliographic information -> String -latexHeader notes options (Meta title authors date) = +latexHeader options (Meta title authors date) = let titletext = if null title then "" - else "\\title{" ++ inlineListToLaTeX notes title ++ "}\n" + else "\\title{" ++ inlineListToLaTeX title ++ "}\n" authorstext = if null authors then "" else "\\author{" ++ (joinWithSep "\\\\" @@ -99,31 +96,28 @@ deVerb ((Code str):rest) = (Str str):(deVerb rest) deVerb (other:rest) = other:(deVerb rest) -- | Convert Pandoc block element to LaTeX. -blockToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs - -> Block -- ^ Block to convert +blockToLaTeX :: Block -- ^ Block to convert -> String -blockToLaTeX notes Null = "" -blockToLaTeX notes (Plain lst) = inlineListToLaTeX notes lst ++ "\n" -blockToLaTeX notes (Para lst) = (inlineListToLaTeX notes lst) ++ "\n\n" -blockToLaTeX notes (BlockQuote lst) = "\\begin{quote}\n" ++ - (concatMap (blockToLaTeX notes) lst) ++ "\\end{quote}\n" -blockToLaTeX notes (Note ref lst) = "" -blockToLaTeX notes (Key _ _) = "" -blockToLaTeX notes (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++ +blockToLaTeX Null = "" +blockToLaTeX (Plain lst) = inlineListToLaTeX lst ++ "\n" +blockToLaTeX (Para lst) = (inlineListToLaTeX lst) ++ "\n\n" +blockToLaTeX (BlockQuote lst) = "\\begin{quote}\n" ++ + (concatMap blockToLaTeX lst) ++ "\\end{quote}\n" +blockToLaTeX (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++ "\n\\end{verbatim}\n" -blockToLaTeX notes (RawHtml str) = "" -blockToLaTeX notes (BulletList lst) = "\\begin{itemize}\n" ++ - (concatMap (listItemToLaTeX notes) lst) ++ "\\end{itemize}\n" -blockToLaTeX notes (OrderedList lst) = "\\begin{enumerate}\n" ++ - (concatMap (listItemToLaTeX notes) lst) ++ "\\end{enumerate}\n" -blockToLaTeX notes HorizontalRule = +blockToLaTeX (RawHtml str) = "" +blockToLaTeX (BulletList lst) = "\\begin{itemize}\n" ++ + (concatMap listItemToLaTeX lst) ++ "\\end{itemize}\n" +blockToLaTeX (OrderedList lst) = "\\begin{enumerate}\n" ++ + (concatMap listItemToLaTeX lst) ++ "\\end{enumerate}\n" +blockToLaTeX HorizontalRule = "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n" -blockToLaTeX notes (Header level lst) = +blockToLaTeX (Header level lst) = if (level > 0) && (level <= 3) then "\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++ - (inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n" - else (inlineListToLaTeX notes lst) ++ "\n\n" -blockToLaTeX notes (Table caption aligns widths heads rows) = + (inlineListToLaTeX (deVerb lst)) ++ "}\n\n" + else (inlineListToLaTeX lst) ++ "\n\n" +blockToLaTeX (Table caption aligns widths heads rows) = let colWidths = map printDecimal widths colDescriptors = concat $ zipWith (\width align -> ">{\\PBS" ++ @@ -135,11 +129,11 @@ blockToLaTeX notes (Table caption aligns widths heads rows) = "\\hspace{0pt}}p{" ++ width ++ "\\textwidth}") colWidths aligns - headers = tableRowToLaTeX notes heads - captionText = inlineListToLaTeX notes caption + headers = tableRowToLaTeX heads + captionText = inlineListToLaTeX caption tableBody = "\\begin{tabular}{" ++ colDescriptors ++ "}\n" ++ headers ++ "\\hline\n" ++ - (concatMap (tableRowToLaTeX notes) rows) ++ + (concatMap tableRowToLaTeX rows) ++ "\\end{tabular}\n" centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n" in if null captionText @@ -151,19 +145,18 @@ blockToLaTeX notes (Table caption aligns widths heads rows) = printDecimal :: Float -> String printDecimal = printf "%.2f" -tableColumnWidths notes cols = map (length . (concatMap (blockToLaTeX notes))) cols +tableColumnWidths cols = map (length . (concatMap blockToLaTeX)) cols -tableRowToLaTeX notes cols = joinWithSep " & " (map (concatMap (blockToLaTeX notes)) cols) ++ "\\\\\n" +tableRowToLaTeX cols = joinWithSep " & " (map (concatMap blockToLaTeX) cols) ++ "\\\\\n" -listItemToLaTeX notes list = "\\item " ++ - (concatMap (blockToLaTeX notes) list) +listItemToLaTeX list = "\\item " ++ + (concatMap blockToLaTeX list) -- | Convert list of inline elements to LaTeX. -inlineListToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs - -> [Inline] -- ^ Inlines to convert +inlineListToLaTeX :: [Inline] -- ^ Inlines to convert -> String -inlineListToLaTeX notes lst = - concatMap (inlineToLaTeX notes) lst +inlineListToLaTeX lst = + concatMap inlineToLaTeX lst isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True @@ -171,47 +164,35 @@ isQuoted Apostrophe = True isQuoted _ = False -- | Convert inline element to LaTeX -inlineToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs - -> Inline -- ^ Inline to convert +inlineToLaTeX :: Inline -- ^ Inline to convert -> String -inlineToLaTeX notes (Emph lst) = "\\emph{" ++ - (inlineListToLaTeX notes (deVerb lst)) ++ "}" -inlineToLaTeX notes (Strong lst) = "\\textbf{" ++ - (inlineListToLaTeX notes (deVerb lst)) ++ "}" -inlineToLaTeX notes (Code str) = "\\verb" ++ [chr] ++ stuffing ++ [chr] +inlineToLaTeX (Emph lst) = "\\emph{" ++ + (inlineListToLaTeX (deVerb lst)) ++ "}" +inlineToLaTeX (Strong lst) = "\\textbf{" ++ + (inlineListToLaTeX (deVerb lst)) ++ "}" +inlineToLaTeX (Code str) = "\\verb" ++ [chr] ++ stuffing ++ [chr] where stuffing = str chr = ((enumFromTo '!' '~') \\ stuffing) !! 0 -inlineToLaTeX notes (Quoted SingleQuote lst) = +inlineToLaTeX (Quoted SingleQuote lst) = let s1 = if (not (null lst)) && (isQuoted (head lst)) then "\\," else "" s2 = if (not (null lst)) && (isQuoted (last lst)) then "\\," else "" in - "`" ++ s1 ++ inlineListToLaTeX notes lst ++ s2 ++ "'" -inlineToLaTeX notes (Quoted DoubleQuote lst) = + "`" ++ s1 ++ inlineListToLaTeX lst ++ s2 ++ "'" +inlineToLaTeX (Quoted DoubleQuote lst) = let s1 = if (not (null lst)) && (isQuoted (head lst)) then "\\," else "" s2 = if (not (null lst)) && (isQuoted (last lst)) then "\\," else "" in - "``" ++ s1 ++ inlineListToLaTeX notes lst ++ s2 ++ "''" -inlineToLaTeX notes Apostrophe = "'" -inlineToLaTeX notes EmDash = "---" -inlineToLaTeX notes EnDash = "--" -inlineToLaTeX notes Ellipses = "\\ldots{}" -inlineToLaTeX notes (Str str) = stringToLaTeX str -inlineToLaTeX notes (TeX str) = str -inlineToLaTeX notes (HtmlInline str) = "" -inlineToLaTeX notes (LineBreak) = "\\\\\n" -inlineToLaTeX notes Space = " " -inlineToLaTeX notes (Link text (Src src tit)) = - "\\href{" ++ src ++ "}{" ++ (inlineListToLaTeX notes (deVerb text)) ++ "}" -inlineToLaTeX notes (Link text (Ref ref)) = "[" ++ - (inlineListToLaTeX notes text) ++ "][" ++ (inlineListToLaTeX notes ref) ++ - "]" -- this is what markdown does, for better or worse -inlineToLaTeX notes (Image alternate (Src source tit)) = + "``" ++ s1 ++ inlineListToLaTeX lst ++ s2 ++ "''" +inlineToLaTeX Apostrophe = "'" +inlineToLaTeX EmDash = "---" +inlineToLaTeX EnDash = "--" +inlineToLaTeX Ellipses = "\\ldots{}" +inlineToLaTeX (Str str) = stringToLaTeX str +inlineToLaTeX (TeX str) = str +inlineToLaTeX (HtmlInline str) = "" +inlineToLaTeX (LineBreak) = "\\\\\n" +inlineToLaTeX Space = " " +inlineToLaTeX (Link text (src, tit)) = + "\\href{" ++ src ++ "}{" ++ (inlineListToLaTeX (deVerb text)) ++ "}" +inlineToLaTeX (Image alternate (source, tit)) = "\\includegraphics{" ++ source ++ "}" -inlineToLaTeX notes (Image alternate (Ref ref)) = - "![" ++ (inlineListToLaTeX notes alternate) ++ "][" ++ - (inlineListToLaTeX notes ref) ++ "]" -inlineToLaTeX [] (NoteRef ref) = "" -inlineToLaTeX ((Note firstref firstblocks):rest) (NoteRef ref) = - if (firstref == ref) - then "\\footnote{" ++ (stripTrailingNewlines - (concatMap (blockToLaTeX rest) firstblocks)) ++ "}" - else inlineToLaTeX rest (NoteRef ref) - +inlineToLaTeX (Note contents) = + "\\footnote{" ++ (stripTrailingNewlines $ concatMap blockToLaTeX contents) ++ "}" diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 687f6e6c4..8f1b3cea9 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -34,19 +34,71 @@ module Text.Pandoc.Writers.Markdown ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Data.List ( group, isPrefixOf, drop ) +import Data.List ( group, isPrefixOf, drop, find ) import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Control.Monad.State + +type Notes = [[Block]] +type Refs = KeyTable +type WriterState = (Notes, Refs) -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String -writeMarkdown options (Pandoc meta blocks) = - let body = text (writerIncludeBefore options) <> - vcat (map (blockToMarkdown (writerTabStop options)) - (formatKeys blocks)) $$ text (writerIncludeAfter options) in - let head = if (writerStandalone options) - then ((metaToMarkdown meta) $$ text (writerHeader options)) - else empty in - render $ head <> body +writeMarkdown opts document = + render $ evalState (pandocToMarkdown opts document) ([],[]) + +-- | Return markdown representation of document. +pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToMarkdown opts (Pandoc meta blocks) = do + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + before' = if null before then empty else text before + after' = if null after then empty else text after + metaBlock <- metaToMarkdown opts meta + let head = if (writerStandalone opts) + then metaBlock $$ text (writerHeader opts) + else empty + body <- blockListToMarkdown opts blocks + (notes, _) <- get + notes' <- notesToMarkdown opts (reverse notes) + (_, refs) <- get -- note that the notes may contain refs + refs' <- keyTableToMarkdown opts (reverse refs) + return $ head <> (before' $$ body <> text "\n" $$ + notes' <> text "\n" $$ refs' $$ after') + +-- | Return markdown representation of reference key table. +keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc +keyTableToMarkdown opts refs = + mapM (keyToMarkdown opts) refs >>= (return . vcat) + +-- | Return markdown representation of a reference key. +keyToMarkdown :: WriterOptions + -> ([Inline], (String, String)) + -> State WriterState Doc +keyToMarkdown opts (label, (src, tit)) = do + label' <- inlineListToMarkdown opts label + let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\"" + return $ text " " <> char '[' <> label' <> char ']' <> text ": " <> + text src <> tit' + +-- | Return markdown representation of notes. +notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToMarkdown opts notes = + mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= + (return . vcat) + +-- | Return markdown representation of a note. +noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToMarkdown opts num note = do + contents <- blockListToMarkdown opts note + let marker = text "[^" <> text (show num) <> text "]:" + return $ hang marker (writerTabStop opts) contents + +wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc +wrappedMarkdown opts sect = do + let chunks = splitBy Space sect + chunks' <- mapM (inlineListToMarkdown opts) chunks + return $ fsep chunks' -- | Escape nonbreaking space as entity escapeNbsp "" = "" @@ -59,155 +111,163 @@ escapeNbsp str = escapeString :: String -> String escapeString = backslashEscape "`<\\*_^" . escapeNbsp --- | Take list of inline elements and return wrapped doc. -wrappedMarkdown :: [Inline] -> Doc -wrappedMarkdown lst = - let wrapSection sec = fsep $ map inlineListToMarkdown $ (splitBy Space sec) - wrappedSecs = map wrapSection $ splitBy LineBreak lst - wrappedSecs' = foldr (\s rest -> if not (null rest) - then (s <> text " "):rest - else s:rest) [] wrappedSecs in - vcat wrappedSecs' - --- | Insert Blank block between key and non-key -formatKeys :: [Block] -> [Block] -formatKeys [] = [] -formatKeys [x] = [x] -formatKeys ((Key x1 y1):(Key x2 y2):rest) = - (Key x1 y1):(formatKeys ((Key x2 y2):rest)) -formatKeys ((Key x1 y1):rest) = (Key x1 y1):(Plain [Str ""]):(formatKeys rest) -formatKeys (x:(Key x1 y1):rest) = x:(Plain [Str ""]):(formatKeys ((Key x1 y1):rest)) -formatKeys (x:rest) = x:(formatKeys rest) - -- | Convert bibliographic information into Markdown header. -metaToMarkdown :: Meta -> Doc -metaToMarkdown (Meta [] [] "") = empty -metaToMarkdown (Meta title [] "") = (titleToMarkdown title) <> (text "\n") -metaToMarkdown (Meta title authors "") = (titleToMarkdown title) <> - (text "\n") <> (authorsToMarkdown authors) <> (text "\n") -metaToMarkdown (Meta title authors date) = (titleToMarkdown title) <> - (text "\n") <> (authorsToMarkdown authors) <> (text "\n") <> - (dateToMarkdown date) <> (text "\n") +metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc +metaToMarkdown opts (Meta title authors date) = do + title' <- titleToMarkdown opts title + authors' <- authorsToMarkdown authors + date' <- dateToMarkdown date + return $ title' <> authors' <> date' -titleToMarkdown :: [Inline] -> Doc -titleToMarkdown lst = text "% " <> (inlineListToMarkdown lst) +titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc +titleToMarkdown opts [] = return empty +titleToMarkdown opts lst = do + contents <- inlineListToMarkdown opts lst + return $ text "% " <> contents <> text "\n" -authorsToMarkdown :: [String] -> Doc -authorsToMarkdown lst = - text "% " <> text (joinWithSep ", " (map escapeString lst)) +authorsToMarkdown :: [String] -> State WriterState Doc +authorsToMarkdown [] = return empty +authorsToMarkdown lst = return $ + text "% " <> text (joinWithSep ", " (map escapeString lst)) <> text "\n" -dateToMarkdown :: String -> Doc -dateToMarkdown str = text "% " <> text (escapeString str) +dateToMarkdown :: String -> State WriterState Doc +dateToMarkdown [] = return empty +dateToMarkdown str = return $ text "% " <> text (escapeString str) <> text "\n" -- | Convert Pandoc block element to markdown. -blockToMarkdown :: Int -- ^ Tab stop - -> Block -- ^ Block element - -> Doc -blockToMarkdown tabStop Null = empty -blockToMarkdown tabStop (Plain lst) = wrappedMarkdown lst -blockToMarkdown tabStop (Para lst) = (wrappedMarkdown lst) <> (text "\n") -blockToMarkdown tabStop (BlockQuote lst) = - (vcat $ map (\line -> (text "> ") <> (text line)) $ lines $ render $ vcat $ - map (blockToMarkdown tabStop) lst) <> (text "\n") -blockToMarkdown tabStop (Note ref lst) = - let lns = lines $ render $ vcat $ map (blockToMarkdown tabStop) lst in - if null lns - then empty - else let first = head lns - rest = tail lns in - text ("[^" ++ (escapeString ref) ++ "]: ") <> (text first) $$ - (vcat $ map (\line -> (text " ") <> (text line)) rest) <> - text "\n" -blockToMarkdown tabStop (Key txt (Src src tit)) = - text " " <> char '[' <> inlineListToMarkdown txt <> char ']' <> - text ": " <> text src <> - if tit /= "" then text (" \"" ++ tit ++ "\"") else empty -blockToMarkdown tabStop (CodeBlock str) = - (nest tabStop $ vcat $ map text (lines str)) <> text "\n" -blockToMarkdown tabStop (RawHtml str) = text str -blockToMarkdown tabStop (BulletList lst) = - vcat (map (bulletListItemToMarkdown tabStop) lst) <> text "\n" -blockToMarkdown tabStop (OrderedList lst) = - vcat (zipWith (orderedListItemToMarkdown tabStop) - (enumFromTo 1 (length lst)) lst) <> text "\n" -blockToMarkdown tabStop HorizontalRule = text "\n* * * * *\n" -blockToMarkdown tabStop (Header level lst) = text ((replicate level '#') ++ - " ") <> (inlineListToMarkdown lst) <> (text "\n") -blockToMarkdown tabStop (Table caption _ _ headers rows) = - blockToMarkdown tabStop (Para [Str "pandoc: TABLE unsupported in Markdown writer"]) - - -bulletListItemToMarkdown tabStop list = - hang (text "- ") tabStop (vcat (map (blockToMarkdown tabStop) list)) +blockToMarkdown :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToMarkdown opts Null = return empty +blockToMarkdown opts (Plain inlines) = wrappedMarkdown opts inlines +blockToMarkdown opts (Para inlines) = do + contents <- wrappedMarkdown opts inlines + return $ contents <> text "\n" +blockToMarkdown opts (RawHtml str) = return $ text str +blockToMarkdown opts HorizontalRule = return $ text "\n* * * * *\n" +blockToMarkdown opts (Header level inlines) = do + contents <- inlineListToMarkdown opts inlines + return $ text ((replicate level '#') ++ " ") <> contents <> text "\n" +blockToMarkdown opts (CodeBlock str) = return $ + (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" +blockToMarkdown opts (BlockQuote blocks) = do + contents <- blockListToMarkdown opts blocks + let quotedContents = unlines $ map ("> " ++) $ lines $ render contents + return $ text quotedContents +blockToMarkdown opts (Table caption _ _ headers rows) = blockToMarkdown opts + (Para [Str "pandoc: TABLE unsupported in Markdown writer"]) +blockToMarkdown opts (BulletList items) = do + contents <- mapM (bulletListItemToMarkdown opts) items + return $ (vcat contents) <> text "\n" +blockToMarkdown opts (OrderedList items) = do + contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + zip [1..] items + return $ (vcat contents) <> text "\n" + +-- | Convert bullet list item (list of blocks) to markdown. +bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToMarkdown opts items = do + contents <- blockListToMarkdown opts items + return $ hang (text "- ") (writerTabStop opts) contents -- | Convert ordered list item (a list of blocks) to markdown. -orderedListItemToMarkdown :: Int -- ^ tab stop - -> Int -- ^ ordinal number of list item - -> [Block] -- ^ list item (list of blocks) - -> Doc -orderedListItemToMarkdown tabStop num list = - hang (text ((show num) ++ "." ++ spacer)) tabStop - (vcat (map (blockToMarkdown tabStop) list)) - where spacer = if (num < 10) then " " else "" +orderedListItemToMarkdown :: WriterOptions -- ^ options + -> Int -- ^ ordinal number of list item + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToMarkdown opts num items = do + contents <- blockListToMarkdown opts items + let spacer = if (num < 10) then " " else "" + return $ hang (text ((show num) ++ "." ++ spacer)) (writerTabStop opts) + contents + +-- | Convert list of Pandoc block elements to markdown. +blockListToMarkdown :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToMarkdown opts blocks = + mapM (blockToMarkdown opts) blocks >>= (return . vcat) + +-- | Get reference for target; if none exists, create unique one and return. +-- Prefer label if possible; otherwise, generate a unique key. +getReference :: [Inline] -> Target -> State WriterState [Inline] +getReference label (src, tit) = do + (_,refs) <- get + case find ((== (src, tit)) . snd) refs of + Just (ref, _) -> return ref + Nothing -> do + let label' = case find ((== label) . fst) refs of + Just _ -> -- label is used; generate numerical label + case find (\n -> not (any (== [Str (show n)]) + (map fst refs))) [1..10000] of + Just x -> [Str (show x)] + Nothing -> error "no unique label" + Nothing -> label + modify (\(notes, refs) -> (notes, (label', (src,tit)):refs)) + return label' -- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: [Inline] -> Doc -inlineListToMarkdown lst = hcat $ map inlineToMarkdown lst +inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToMarkdown opts lst = mapM (inlineToMarkdown opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: Inline -> Doc -inlineToMarkdown (Emph lst) = text "*" <> - (inlineListToMarkdown lst) <> text "*" -inlineToMarkdown (Strong lst) = text "**" <> - (inlineListToMarkdown lst) <> text "**" -inlineToMarkdown (Quoted SingleQuote lst) = char '\'' <> - (inlineListToMarkdown lst) <> char '\'' -inlineToMarkdown (Quoted DoubleQuote lst) = char '"' <> - (inlineListToMarkdown lst) <> char '"' -inlineToMarkdown EmDash = text "--" -inlineToMarkdown EnDash = char '-' -inlineToMarkdown Apostrophe = char '\'' -inlineToMarkdown Ellipses = text "..." -inlineToMarkdown (Code str) = +inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc +inlineToMarkdown opts (Emph lst) = do + contents <- inlineListToMarkdown opts lst + return $ text "*" <> contents <> text "*" +inlineToMarkdown opts (Strong lst) = do + contents <- inlineListToMarkdown opts lst + return $ text "**" <> contents <> text "**" +inlineToMarkdown opts (Quoted SingleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ char '\'' <> contents <> char '\'' +inlineToMarkdown opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ char '"' <> contents <> char '"' +inlineToMarkdown opts EmDash = return $ text "--" +inlineToMarkdown opts EnDash = return $ char '-' +inlineToMarkdown opts Apostrophe = return $ char '\'' +inlineToMarkdown opts Ellipses = return $ text "..." +inlineToMarkdown opts (Code str) = let tickGroups = filter (\s -> '`' `elem` s) $ group str longest = if null tickGroups then 0 else maximum $ map length tickGroups marker = replicate (longest + 1) '`' - spacer = if (longest == 0) then "" else " " in - text (marker ++ spacer ++ str ++ spacer ++ marker) -inlineToMarkdown (Str str) = text $ escapeString str -inlineToMarkdown (TeX str) = text str -inlineToMarkdown (HtmlInline str) = text str -inlineToMarkdown (LineBreak) = text " \n" -inlineToMarkdown Space = char ' ' -inlineToMarkdown (Link txt (Src src tit)) = - let linktext = if (null txt) || (txt == [Str ""]) - then text "link" - else inlineListToMarkdown txt - linktitle = if null tit - then empty - else text (" \"" ++ tit ++ "\"") - srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src in - if (null tit) && (txt == [Str srcSuffix]) - then char '<' <> text srcSuffix <> char '>' - else char '[' <> linktext <> char ']' <> char '(' <> text src <> - linktitle <> char ')' -inlineToMarkdown (Link txt (Ref ref)) = - let first = char '[' <> inlineListToMarkdown txt <> char ']' - second = if (txt == ref) - then text "[]" - else char '[' <> inlineListToMarkdown ref <> char ']' in - first <> second -inlineToMarkdown (Image alternate (Src source tit)) = - let alt = if (null alternate) || (alternate == [Str ""]) - then text "image" - else inlineListToMarkdown alternate in - char '!' <> char '[' <> alt <> char ']' <> char '(' <> text source <> - (if tit /= "" - then text (" \"" ++ tit ++ "\"") - else empty) <> char ')' -inlineToMarkdown (Image alternate (Ref ref)) = - char '!' <> inlineToMarkdown (Link alternate (Ref ref)) -inlineToMarkdown (NoteRef ref) = - text "[^" <> text (escapeString ref) <> char ']' + spacer = if (longest == 0) then "" else " " in + return $ text (marker ++ spacer ++ str ++ spacer ++ marker) +inlineToMarkdown opts (Str str) = return $ text $ escapeString str +inlineToMarkdown opts (TeX str) = return $ text str +inlineToMarkdown opts (HtmlInline str) = return $ text str +inlineToMarkdown opts (LineBreak) = return $ text " \n" +inlineToMarkdown opts Space = return $ char ' ' +inlineToMarkdown opts (Link txt (src, tit)) = do + linktext <- inlineListToMarkdown opts txt + let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" + let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let useRefLinks = writerReferenceLinks opts + let useAuto = null tit && txt == [Str srcSuffix] + ref <- if useRefLinks then getReference txt (src, tit) else return [] + reftext <- inlineListToMarkdown opts ref + return $ if useAuto + then char '<' <> text srcSuffix <> char '>' + else if useRefLinks + then let first = char '[' <> linktext <> char ']' + second = if txt == ref + then text "[]" + else char '[' <> reftext <> char ']' + in first <> second + else char '[' <> linktext <> char ']' <> + char '(' <> text src <> linktitle <> char ')' +inlineToMarkdown opts (Image alternate (source, tit)) = do + let txt = if (null alternate) || (alternate == [Str ""]) || + (alternate == [Str source]) -- to prevent autolinks + then [Str "image"] + else alternate + linkPart <- inlineToMarkdown opts (Link txt (source, tit)) + return $ char '!' <> linkPart +inlineToMarkdown opts (Note contents) = do + modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state + (notes, _) <- get + let ref = show $ (length notes) + return $ text "[^" <> text ref <> char ']' diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 27d1a596a..a00ab1cc6 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -30,204 +30,245 @@ Conversion of 'Pandoc' documents to reStructuredText. reStructuredText: <http://docutils.sourceforge.net/rst.html> -} module Text.Pandoc.Writers.RST ( - writeRST - ) where + writeRST + ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared -import List ( nubBy ) +import Text.Pandoc.Shared +import Data.List ( group, isPrefixOf, drop, find ) import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Control.Monad.State --- | Convert Pandoc to reStructuredText. +type Notes = [[Block]] +type Refs = KeyTable +type WriterState = (Notes, Refs, Refs) -- first Refs is links, second pictures + +-- | Convert Pandoc to RST. writeRST :: WriterOptions -> Pandoc -> String -writeRST options (Pandoc meta blocks) = - let (main, refs) = unzip $ map (blockToRST (writerTabStop options)) - (reformatBlocks $ replaceReferenceLinks blocks) - top = if (writerStandalone options) - then (metaToRST meta) $$ text (writerHeader options) - else empty in - -- remove duplicate keys - let refs' = nubBy (\x y -> (render x) == (render y)) refs in - let body = text (writerIncludeBefore options) <> - vcat main $$ text (writerIncludeAfter options) in - render $ top <> body $$ vcat refs' $$ text "\n" - --- | Escape special RST characters. +writeRST opts document = + render $ evalState (pandocToRST opts document) ([],[],[]) + +-- | Return RST representation of document. +pandocToRST :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToRST opts (Pandoc meta blocks) = do + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + before' = if null before then empty else text before + after' = if null after then empty else text after + metaBlock <- metaToRST opts meta + let head = if (writerStandalone opts) + then metaBlock $$ text (writerHeader opts) + else empty + body <- blockListToRST opts blocks + (notes, _, _) <- get + notes' <- notesToRST opts (reverse notes) + (_, refs, pics) <- get -- note that the notes may contain refs + refs' <- keyTableToRST opts (reverse refs) + pics' <- pictTableToRST opts (reverse pics) + return $ head <> (before' $$ body $$ notes' <> text "\n" $$ refs' $$ + pics' $$ after') + +-- | Return RST representation of reference key table. +keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc +keyTableToRST opts refs = + mapM (keyToRST opts) refs >>= (return . vcat) + +-- | Return RST representation of a reference key. +keyToRST :: WriterOptions + -> ([Inline], (String, String)) + -> State WriterState Doc +keyToRST opts (label, (src, tit)) = do + label' <- inlineListToRST opts label + return $ text ".. _" <> label' <> text ": " <> text src + +-- | Return RST representation of notes. +notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToRST opts notes = + mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>= + (return . vcat) + +-- | Return RST representation of a note. +noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToRST opts num note = do + contents <- blockListToRST opts note + let marker = text ".. [" <> text (show num) <> text "] " + return $ hang marker 3 contents + +-- | Return RST representation of picture reference table. +pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc +pictTableToRST opts refs = + mapM (pictToRST opts) refs >>= (return . vcat) + +-- | Return RST representation of a picture substitution reference. +pictToRST :: WriterOptions + -> ([Inline], (String, String)) + -> State WriterState Doc +pictToRST opts (label, (src, _)) = do + label' <- inlineListToRST opts label + return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <> + text src + +-- | Take list of inline elements and return wrapped doc. +wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc +wrappedRST opts inlines = + mapM (wrappedRSTSection opts) (splitBy LineBreak inlines) >>= + (return . vcat) + +wrappedRSTSection :: WriterOptions -> [Inline] -> State WriterState Doc +wrappedRSTSection opts sect = do + let chunks = splitBy Space sect + chunks' <- mapM (inlineListToRST opts) chunks + return $ fsep chunks' + +-- | Escape special characters for RST. escapeString :: String -> String escapeString = backslashEscape "`\\|*_" --- | Convert list of inline elements into one 'Doc' of wrapped text --- and another containing references. -wrappedRST :: [Inline] -> (Doc, Doc) -wrappedRST lst = - let wrap_section sec = fsep $ map (fst . inlineListToRST) $ - (splitBy Space sec) in - ((vcat $ map wrap_section $ (splitBy LineBreak lst)), - vcat $ map (snd . inlineToRST) lst) - --- | Remove reference keys, and make sure there are blanks before each list. -reformatBlocks :: [Block] -> [Block] -reformatBlocks [] = [] -reformatBlocks ((Plain x):(OrderedList y):rest) = - (Para x):(reformatBlocks ((OrderedList y):rest)) -reformatBlocks ((Plain x):(BulletList y):rest) = - (Para x):(reformatBlocks ((BulletList y):rest)) -reformatBlocks ((OrderedList x):rest) = - (OrderedList (map reformatBlocks x)):(reformatBlocks rest) -reformatBlocks ((BulletList x):rest) = - (BulletList (map reformatBlocks x)):(reformatBlocks rest) -reformatBlocks ((BlockQuote x):rest) = - (BlockQuote (reformatBlocks x)):(reformatBlocks rest) -reformatBlocks ((Note ref x):rest) = - (Note ref (reformatBlocks x)):(reformatBlocks rest) -reformatBlocks ((Key x1 y1):rest) = reformatBlocks rest -reformatBlocks (x:rest) = x:(reformatBlocks rest) - --- | Convert bibliographic information to 'Doc'. -metaToRST :: Meta -> Doc -metaToRST (Meta title authors date) = - (titleToRST title) <> (authorsToRST authors) <> (dateToRST date) - --- | Convert title to 'Doc'. -titleToRST :: [Inline] -> Doc -titleToRST [] = empty -titleToRST lst = - let title = fst $ inlineListToRST lst in - let titleLength = length $ render title in - let border = text (replicate titleLength '=') in - border <> char '\n' <> title <> char '\n' <> border <> text "\n\n" - --- | Convert author list to 'Doc'. -authorsToRST :: [String] -> Doc -authorsToRST [] = empty -authorsToRST (first:rest) = text ":Author: " <> text first <> - char '\n' <> (authorsToRST rest) - --- | Convert date to 'Doc'. -dateToRST :: String -> Doc -dateToRST [] = empty -dateToRST str = text ":Date: " <> text (escapeString str) <> char '\n' - --- | Convert Pandoc block element to a 'Doc' containing the main text and --- another one containing any references. -blockToRST :: Int -- ^ tab stop - -> Block -- ^ block element to convert - -> (Doc, Doc) -- ^ first element is text, second is references for end of file -blockToRST tabStop Null = (empty, empty) -blockToRST tabStop (Plain lst) = wrappedRST lst -blockToRST tabStop (Para [TeX str]) = -- raw latex block +-- | Convert bibliographic information into RST header. +metaToRST :: WriterOptions -> Meta -> State WriterState Doc +metaToRST opts (Meta title authors date) = do + title' <- titleToRST opts title + authors' <- authorsToRST authors + date' <- dateToRST date + return $ title' <> authors' <> date' + +titleToRST :: WriterOptions -> [Inline] -> State WriterState Doc +titleToRST opts [] = return empty +titleToRST opts lst = do + contents <- inlineListToRST opts lst + let titleLength = length $ render contents + let border = text (replicate titleLength '=') + return $ border <> char '\n' <> contents <> char '\n' <> border <> text "\n\n" + +authorsToRST :: [String] -> State WriterState Doc +authorsToRST [] = return empty +authorsToRST (first:rest) = do + rest' <- authorsToRST rest + return $ text ":Author: " <> text first <> char '\n' <> rest' + +dateToRST :: String -> State WriterState Doc +dateToRST [] = return empty +dateToRST str = return $ text ":Date: " <> text (escapeString str) <> char '\n' + +-- | Convert Pandoc block element to RST. +blockToRST :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToRST opts Null = return empty +blockToRST opts (Plain inlines) = wrappedRST opts inlines +blockToRST opts (Para [TeX str]) = let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in - (hang (text "\n.. raw:: latex\n") 3 (vcat $ map text (lines str')), empty) -blockToRST tabStop (Para lst) = ( (fst $ wrappedRST lst) <> (text "\n"), - snd $ wrappedRST lst ) -blockToRST tabStop (BlockQuote lst) = - let (main, refs) = unzip $ map (blockToRST tabStop) lst in - ((nest tabStop $ vcat $ main) <> text "\n", vcat refs) -blockToRST tabStop (Note ref blocks) = - let (main, refs) = unzip $ map (blockToRST tabStop) blocks in - ((hang (text ".. [" <> text (escapeString ref) <> text "] ") 3 (vcat main)), - vcat refs) -blockToRST tabStop (Key txt (Src src tit)) = - (text "ERROR - KEY FOUND", empty) -- shouldn't have a key here -blockToRST tabStop (CodeBlock str) = (hang (text "::\n") tabStop - (vcat $ map text (lines ('\n':(str ++ "\n\n")))), empty) -blockToRST tabStop (RawHtml str) = + return $ hang (text "\n.. raw:: latex\n") 3 (vcat $ map text (lines str')) +blockToRST opts (Para inlines) = do + contents <- wrappedRST opts inlines + return $ contents <> text "\n" +blockToRST opts (RawHtml str) = let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in - (hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str')), empty) -blockToRST tabStop (BulletList lst) = - let (main, refs) = unzip $ map (bulletListItemToRST tabStop) lst in - (vcat main <> text "\n", vcat refs) -blockToRST tabStop (OrderedList lst) = - let (main, refs) = unzip $ zipWith (orderedListItemToRST tabStop) - (enumFromTo 1 (length lst)) lst in - (vcat main <> text "\n", vcat refs) -blockToRST tabStop HorizontalRule = (text "--------------\n", empty) -blockToRST tabStop (Header level lst) = - let (headerText, refs) = inlineListToRST lst in - let headerLength = length $ render headerText in - let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in - let border = text $ replicate headerLength headerChar in - (headerText <> char '\n' <> border <> char '\n', refs) -blockToRST tabStop (Table caption _ _ headers rows) = - blockToRST tabStop (Para [Str "pandoc: TABLE unsupported in RST writer"]) - - --- | Convert bullet list item (list of blocks) to reStructuredText. --- Returns a pair of 'Doc', the first the main text, the second references -bulletListItemToRST :: Int -- ^ tab stop - -> [Block] -- ^ list item (list of blocks) - -> (Doc, Doc) -bulletListItemToRST tabStop list = - let (main, refs) = unzip $ map (blockToRST tabStop) list in - (hang (text "- ") tabStop (vcat main), (vcat refs)) - --- | Convert an ordered list item (list of blocks) to reStructuredText. --- Returns a pair of 'Doc', the first the main text, the second references -orderedListItemToRST :: Int -- ^ tab stop - -> Int -- ^ ordinal number of list item - -> [Block] -- ^ list item (list of blocks) - -> (Doc, Doc) -orderedListItemToRST tabStop num list = - let (main, refs) = unzip $ map (blockToRST tabStop) list - spacer = if (length (show num) < 2) then " " else "" in - (hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat main), (vcat refs)) - --- | Convert a list of inline elements to reStructuredText. --- Returns a pair of 'Doc', the first the main text, the second references. -inlineListToRST :: [Inline] -> (Doc, Doc) -inlineListToRST lst = let (main, refs) = unzip $ map inlineToRST lst in - (hcat main, hcat refs) - --- | Convert an inline element to reStructuredText. --- Returns a pair of 'Doc', the first the main text, the second references. -inlineToRST :: Inline -> (Doc, Doc) -- second Doc is list of refs for end of file -inlineToRST (Emph lst) = let (main, refs) = inlineListToRST lst in - (text "*" <> main <> text "*", refs) -inlineToRST (Strong lst) = let (main, refs) = inlineListToRST lst in - (text "**" <> main <> text "**", refs) -inlineToRST (Quoted SingleQuote lst) = let (main, refs) = inlineListToRST lst in - (char '\'' <> main <> char '\'', refs) -inlineToRST (Quoted DoubleQuote lst) = let (main, refs) = inlineListToRST lst in - (char '"' <> main <> char '"', refs) -inlineToRST EmDash = (text "--", empty) -inlineToRST EnDash = (char '-', empty) -inlineToRST Apostrophe = (char '\'', empty) -inlineToRST Ellipses = (text "...", empty) -inlineToRST (Code str) = (text $ "``" ++ str ++ "``", empty) -inlineToRST (Str str) = (text $ escapeString str, empty) -inlineToRST (TeX str) = (text str, empty) -inlineToRST (HtmlInline str) = (empty, empty) -inlineToRST (LineBreak) = inlineToRST Space -- RST doesn't have line breaks -inlineToRST Space = (char ' ', empty) --- --- Note: can assume reference links have been replaced where possible --- with explicit links. --- -inlineToRST (Link txt (Src src tit)) = - let (linktext, ref') = if (null txt) || (txt == [Str ""]) - then (text "link", empty) - else inlineListToRST $ normalizeSpaces txt in - let link = char '`' <> linktext <> text "`_" - linktext' = render linktext in - let linktext'' = if (':' `elem` linktext') - then "`" ++ linktext' ++ "`" - else linktext' in - let ref = text ".. _" <> text linktext'' <> text ": " <> text src in - (link, ref' $$ ref) -inlineToRST (Link txt (Ref ref)) = - let (linktext, refs1) = inlineListToRST txt - (reftext, refs2) = inlineListToRST ref in - (char '[' <> linktext <> text "][" <> reftext <> char ']', refs1 $$ refs2) -inlineToRST (Image alternate (Src source tit)) = - let (alt, ref') = if (null alternate) || (alternate == [Str ""]) - then (text "image", empty) - else inlineListToRST $ normalizeSpaces alternate in - let link = char '|' <> alt <> char '|' in - let ref = text ".. " <> link <> text " image:: " <> text source in - (link, ref' $$ ref) --- The following case won't normally occur... -inlineToRST (Image alternate (Ref ref)) = - let (alttext, refs1) = inlineListToRST alternate - (reftext, refs2) = inlineListToRST ref in - (char '|' <> alttext <> char '|', refs1 $$ refs2) -inlineToRST (NoteRef ref) = - (text " [" <> text (escapeString ref) <> char ']' <> char '_', empty) + return $ hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str')) +blockToRST opts HorizontalRule = return $ text "--------------\n" +blockToRST opts (Header level inlines) = do + contents <- inlineListToRST opts inlines + let headerLength = length $ render contents + let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) + let border = text $ replicate headerLength headerChar + return $ contents <> char '\n' <> border <> char '\n' +blockToRST opts (CodeBlock str) = return $ (text "::\n") $$ text "" $$ + (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" +blockToRST opts (BlockQuote blocks) = do + contents <- blockListToRST opts blocks + return $ (nest (writerTabStop opts) contents) <> text "\n" +blockToRST opts (Table caption _ _ headers rows) = blockToRST opts + (Para [Str "pandoc: TABLE unsupported in RST writer"]) +blockToRST opts (BulletList items) = do + contents <- mapM (bulletListItemToRST opts) items + return $ (vcat contents) <> text "\n" +blockToRST opts (OrderedList items) = do + contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $ + zip [1..] items + return $ (vcat contents) <> text "\n" + +-- | Convert bullet list item (list of blocks) to RST. +bulletListItemToRST :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToRST opts items = do + contents <- blockListToRST opts items + return $ hang (text "- ") (writerTabStop opts) contents + +-- | Convert ordered list item (a list of blocks) to RST. +orderedListItemToRST :: WriterOptions -- ^ options + -> Int -- ^ ordinal number of list item + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToRST opts num items = do + contents <- blockListToRST opts items + let spacer = if (num < 10) then " " else "" + return $ hang (text ((show num) ++ "." ++ spacer)) (writerTabStop opts) + contents + +-- | Convert list of Pandoc block elements to RST. +blockListToRST :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToRST opts blocks = + mapM (blockToRST opts) blocks >>= (return . vcat) + +-- | Convert list of Pandoc inline elements to RST. +inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= (return . hcat) + +-- | Convert Pandoc inline element to RST. +inlineToRST :: WriterOptions -> Inline -> State WriterState Doc +inlineToRST opts (Emph lst) = do + contents <- inlineListToRST opts lst + return $ text "*" <> contents <> text "*" +inlineToRST opts (Strong lst) = do + contents <- inlineListToRST opts lst + return $ text "**" <> contents <> text "**" +inlineToRST opts (Quoted SingleQuote lst) = do + contents <- inlineListToRST opts lst + return $ char '\'' <> contents <> char '\'' +inlineToRST opts (Quoted DoubleQuote lst) = do + contents <- inlineListToRST opts lst + return $ char '"' <> contents <> char '"' +inlineToRST opts EmDash = return $ text "--" +inlineToRST opts EnDash = return $ char '-' +inlineToRST opts Apostrophe = return $ char '\'' +inlineToRST opts Ellipses = return $ text "..." +inlineToRST opts (Code str) = return $ text $ "``" ++ str ++ "``" +inlineToRST opts (Str str) = return $ text $ escapeString str +inlineToRST opts (TeX str) = return $ text str +inlineToRST opts (HtmlInline str) = return empty +inlineToRST opts (LineBreak) = return $ text " " -- RST doesn't have linebreaks +inlineToRST opts Space = return $ char ' ' +inlineToRST opts (Link txt (src, tit)) = do + let useReferenceLinks = writerReferenceLinks opts + let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let useAuto = null tit && txt == [Str srcSuffix] + (notes, refs, pics) <- get + linktext <- inlineListToRST opts $ normalizeSpaces txt + link <- if useReferenceLinks + then do let refs' = if (txt, (src, tit)) `elem` refs + then refs + else (txt, (src, tit)):refs + put (notes, refs', pics) + return $ char '`' <> linktext <> text "`_" + else return $ char '`' <> linktext <> text " <" <> + text src <> text ">`_" + return link +inlineToRST opts (Image alternate (source, tit)) = do + (notes, refs, pics) <- get + let labelsUsed = map fst pics + let txt = if (null alternate) || (alternate == [Str ""]) || + (alternate `elem` labelsUsed) + then [Str $ "image" ++ show (length refs)] + else alternate + let pics' = if (txt, (source, tit)) `elem` pics + then pics + else (txt, (source, tit)):pics + put (notes, refs, pics') + label <- inlineListToRST opts txt + return $ char '|' <> label <> char '|' +inlineToRST opts (Note contents) = do + -- add to notes in state + modify (\(notes, refs, pics) -> (contents:notes, refs, pics)) + (notes, _, _) <- get + let ref = show $ (length notes) + return $ text " [" <> text ref <> text "]_" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 2dddb857b..769ceeaf5 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -17,7 +17,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : + Module : Text.Pandoc.Writers.RTF Copyright : Copyright (C) 2006 John MacFarlane License : GNU GPL, version 2 or above @@ -27,26 +27,21 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to RTF (rich text format). -} -module Text.Pandoc.Writers.RTF ( - writeRTF - ) where +module Text.Pandoc.Writers.RTF ( writeRTF) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Regex ( matchRegexAll, mkRegex ) -import List ( isSuffixOf ) -import Char ( ord, chr ) +import Data.List ( isSuffixOf ) +import Data.Char ( ord, chr ) -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String writeRTF options (Pandoc meta blocks) = - -- assumes all notes are at outer level - let notes = filter isNoteBlock blocks in let head = if writerStandalone options - then rtfHeader notes (writerHeader options) meta + then rtfHeader (writerHeader options) meta else "" foot = if writerStandalone options then "\n}\n" else "" - body = (writerIncludeBefore options) ++ (concatMap (blockToRTF notes 0) - (replaceReferenceLinks blocks)) ++ + body = (writerIncludeBefore options) ++ concatMap (blockToRTF 0) blocks ++ (writerIncludeAfter options) in head ++ body ++ foot @@ -120,15 +115,14 @@ orderedMarkers indent = otherwise -> map (\x -> show x ++ ".") $ cycle ['a'..'z'] -- | Returns RTF header. -rtfHeader :: [Block] -- ^ list of note blocks - -> String -- ^ header text +rtfHeader :: String -- ^ header text -> Meta -- ^ bibliographic information -> String -rtfHeader notes headerText (Meta title authors date) = +rtfHeader headerText (Meta title authors date) = let titletext = if null title then "" else rtfPar 0 0 ("\\qc \\b \\fs36 " ++ - inlineListToRTF notes title) + inlineListToRTF title) authorstext = if null authors then "" else rtfPar 0 0 ("\\qc " ++ (joinWithSep "\\" @@ -142,35 +136,32 @@ rtfHeader notes headerText (Meta title authors date) = headerText ++ titletext ++ authorstext ++ datetext ++ spacer -- | Convert Pandoc block element to RTF. -blockToRTF :: [Block] -- ^ list of note blocks - -> Int -- ^ indent level +blockToRTF :: Int -- ^ indent level -> Block -- ^ block to convert -> String -blockToRTF notes indent Null = "" -blockToRTF notes indent (Plain lst) = - rtfCompact indent 0 (inlineListToRTF notes lst) -blockToRTF notes indent (Para lst) = - rtfPar indent 0 (inlineListToRTF notes lst) -blockToRTF notes indent (BlockQuote lst) = - concatMap (blockToRTF notes (indent + indentIncrement)) lst -blockToRTF notes indent (Note ref lst) = "" -- shouldn't be any aftr filtering -blockToRTF notes indent (Key _ _) = "" -blockToRTF notes indent (CodeBlock str) = +blockToRTF indent Null = "" +blockToRTF indent (Plain lst) = + rtfCompact indent 0 (inlineListToRTF lst) +blockToRTF indent (Para lst) = + rtfPar indent 0 (inlineListToRTF lst) +blockToRTF indent (BlockQuote lst) = + concatMap (blockToRTF (indent + indentIncrement)) lst +blockToRTF indent (CodeBlock str) = rtfPar indent 0 ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF notes indent (RawHtml str) = "" -blockToRTF notes indent (BulletList lst) = +blockToRTF indent (RawHtml str) = "" +blockToRTF indent (BulletList lst) = spaceAtEnd $ - concatMap (listItemToRTF notes indent (bulletMarker indent)) lst -blockToRTF notes indent (OrderedList lst) = + concatMap (listItemToRTF indent (bulletMarker indent)) lst +blockToRTF indent (OrderedList lst) = spaceAtEnd $ concat $ - zipWith (listItemToRTF notes indent) (orderedMarkers indent) lst -blockToRTF notes indent HorizontalRule = + zipWith (listItemToRTF indent) (orderedMarkers indent) lst +blockToRTF indent HorizontalRule = rtfPar indent 0 "\\qc \\emdash\\emdash\\emdash\\emdash\\emdash" -blockToRTF notes indent (Header level lst) = +blockToRTF indent (Header level lst) = rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ - (inlineListToRTF notes lst)) -blockToRTF notes indent (Table caption _ _ headers rows) = - blockToRTF notes indent (Para [Str "pandoc: TABLE unsupported in RST writer"]) + (inlineListToRTF lst)) +blockToRTF indent (Table caption _ _ headers rows) = + blockToRTF indent (Para [Str "pandoc: TABLE unsupported in RST writer"]) -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. @@ -181,16 +172,15 @@ spaceAtEnd str = else str -- | Convert list item (list of blocks) to RTF. -listItemToRTF :: [Block] -- ^ list of note blocks - -> Int -- ^ indent level +listItemToRTF :: Int -- ^ indent level -> String -- ^ list start marker -> [Block] -- ^ list item (list of blocks) -> [Char] -listItemToRTF notes indent marker [] = +listItemToRTF indent marker [] = rtfCompact (indent + listIncrement) (0 - listIncrement) (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") -listItemToRTF notes indent marker list = - let (first:rest) = map (blockToRTF notes (indent + listIncrement)) list in +listItemToRTF indent marker list = + let (first:rest) = map (blockToRTF (indent + listIncrement)) list in -- insert the list marker into the (processed) first block let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of Just (before, matched, after, _) -> before ++ "\\fi" ++ @@ -200,47 +190,36 @@ listItemToRTF notes indent marker list = modFirst ++ (concat rest) -- | Convert list of inline items to RTF. -inlineListToRTF :: [Block] -- ^ list of note blocks - -> [Inline] -- ^ list of inlines to convert +inlineListToRTF :: [Inline] -- ^ list of inlines to convert -> String -inlineListToRTF notes lst = concatMap (inlineToRTF notes) lst +inlineListToRTF lst = concatMap inlineToRTF lst -- | Convert inline item to RTF. -inlineToRTF :: [Block] -- ^ list of note blocks - -> Inline -- ^ inline to convert +inlineToRTF :: Inline -- ^ inline to convert -> String -inlineToRTF notes (Emph lst) = "{\\i " ++ (inlineListToRTF notes lst) ++ "} " -inlineToRTF notes (Strong lst) = - "{\\b " ++ (inlineListToRTF notes lst) ++ "} " -inlineToRTF notes (Quoted SingleQuote lst) = - "\\u8216'" ++ (inlineListToRTF notes lst) ++ "\\u8217'" -inlineToRTF notes (Quoted DoubleQuote lst) = - "\\u8220\"" ++ (inlineListToRTF notes lst) ++ "\\u8221\"" -inlineToRTF notes Apostrophe = "\\u8217'" -inlineToRTF notes Ellipses = "\\u8230?" -inlineToRTF notes EmDash = "\\u8212-" -inlineToRTF notes EnDash = "\\u8211-" -inlineToRTF notes (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} " -inlineToRTF notes (Str str) = stringToRTF str -inlineToRTF notes (TeX str) = latexToRTF str -inlineToRTF notes (HtmlInline str) = "" -inlineToRTF notes (LineBreak) = "\\line " -inlineToRTF notes Space = " " -inlineToRTF notes (Link text (Src src tit)) = +inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "} " +inlineToRTF (Strong lst) = + "{\\b " ++ (inlineListToRTF lst) ++ "} " +inlineToRTF (Quoted SingleQuote lst) = + "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" +inlineToRTF (Quoted DoubleQuote lst) = + "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" +inlineToRTF Apostrophe = "\\u8217'" +inlineToRTF Ellipses = "\\u8230?" +inlineToRTF EmDash = "\\u8212-" +inlineToRTF EnDash = "\\u8211-" +inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} " +inlineToRTF (Str str) = stringToRTF str +inlineToRTF (TeX str) = latexToRTF str +inlineToRTF (HtmlInline str) = "" +inlineToRTF (LineBreak) = "\\line " +inlineToRTF Space = " " +inlineToRTF (Link text (src, tit)) = "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ - "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF notes text) ++ "\n}}}\n" -inlineToRTF notes (Link text (Ref ref)) = - "[" ++ (inlineListToRTF notes text) ++ "][" ++ - (inlineListToRTF notes ref) ++ "]" -- this is what markdown does -inlineToRTF notes (Image alternate (Src source tit)) = + "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" +inlineToRTF (Image alternate (source, tit)) = "{\\cf1 [image: " ++ source ++ "]\\cf0}" -inlineToRTF notes (Image alternate (Ref ref)) = "![" ++ - (inlineListToRTF notes alternate) ++ "][" ++ - (inlineListToRTF notes ref) ++ "]" -inlineToRTF [] (NoteRef ref) = "" -inlineToRTF ((Note firstref firstblocks):rest) (NoteRef ref) = - if firstref == ref - then "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - (concatMap (blockToRTF rest 0) firstblocks) ++ "}" - else inlineToRTF rest (NoteRef ref) +inlineToRTF (Note contents) = + "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + (concatMap (blockToRTF 0) contents) ++ "}" diff --git a/src/Text/ParserCombinators/Pandoc.hs b/src/Text/ParserCombinators/Pandoc.hs index 5b1742975..a965159ed 100644 --- a/src/Text/ParserCombinators/Pandoc.hs +++ b/src/Text/ParserCombinators/Pandoc.hs @@ -40,12 +40,13 @@ module Text.ParserCombinators.Pandoc ( enclosed, nullBlock, stringAnyCase, - parseFromStr + parseFromStr, + lineClump ) where import Text.ParserCombinators.Parsec import Text.Pandoc.Definition import Text.Pandoc.Shared -import Char ( toUpper, toLower ) +import Data.Char ( toUpper, toLower ) --- | Parse any line of text anyLine :: GenParser Char st [Char] @@ -132,4 +133,11 @@ parseFromStr parser str = try $ do setInput oldInput return result +-- | Parse raw line block up to and including blank lines. +lineClump :: GenParser Char st String +lineClump = do + lines <- many1 (do{notFollowedBy blankline; anyLine}) + blanks <- blanklines <|> (do{eof; return "\n"}) + return ((unlines lines) ++ blanks) + |