From a9e32505debea9077c0cca49b84a8c0d363bf3e8 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Mon, 1 Jan 2007 21:08:12 +0000 Subject: Merged changes from docbook branch since r363. git-svn-id: https://pandoc.googlecode.com/svn/trunk@386 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Main.hs | 8 +- src/Text/Pandoc/Shared.hs | 3 +- src/Text/Pandoc/Writers/Docbook.hs | 236 +++++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Writers/HTML.hs | 4 +- src/headers/DocbookHeader | 3 + src/templates/DefaultHeaders.hs | 4 + 6 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 src/Text/Pandoc/Writers/Docbook.hs create mode 100644 src/headers/DocbookHeader (limited to 'src') diff --git a/src/Main.hs b/src/Main.hs index 35a61c63e..b5320c258 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -37,12 +37,14 @@ import Text.Pandoc.Writers.RST ( writeRST ) import Text.Pandoc.Readers.RST ( readRST ) import Text.Pandoc.ASCIIMathML ( asciiMathMLScript ) import Text.Pandoc.Writers.HTML ( writeHtml ) +import Text.Pandoc.Writers.Docbook ( writeDocbook ) import Text.Pandoc.Writers.LaTeX ( writeLaTeX ) import Text.Pandoc.Readers.LaTeX ( readLaTeX ) import Text.Pandoc.Writers.RTF ( writeRTF ) import Text.Pandoc.Writers.Markdown ( writeMarkdown ) import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader, - defaultRTFHeader, defaultS5Header, defaultLaTeXHeader ) + defaultRTFHeader, defaultS5Header, defaultLaTeXHeader, + defaultDocbookHeader ) import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Regex ( mkRegex, matchRegex ) @@ -79,6 +81,7 @@ writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ] writers = [("native" , (writeDoc, "")) ,("html" , (writeHtml, defaultHtmlHeader)) ,("s5" , (writeS5, defaultS5Header)) + ,("docbook" , (writeDocbook, defaultDocbookHeader)) ,("latex" , (writeLaTeX, defaultLaTeXHeader)) ,("markdown" , (writeMarkdown, "")) ,("rst" , (writeRST, "")) @@ -331,6 +334,8 @@ defaultWriterName x = Just ["text"] -> "markdown" Just ["md"] -> "markdown" Just ["markdown"] -> "markdown" + Just ["db"] -> "docbook" + Just ["xml"] -> "docbook" Just _ -> "html" main = do @@ -423,6 +428,7 @@ main = do writerSmart = smart && (not strict), writerTabStop = tabStop, + writerNotes = [], writerS5 = (writerName=="s5"), writerIncremental = incremental, writerNumberSections = numberSections, diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 7e4f63ffa..eb6c7be78 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -322,7 +322,7 @@ containsPara (x:rest) = containsPara rest -- | Options for writers data WriterOptions = WriterOptions - { writerStandalone :: Bool -- ^ If @True@, writer header and footer + { writerStandalone :: Bool -- ^ Include header and footer , writerTitlePrefix :: String -- ^ Prefix for HTML titles , writerHeader :: String -- ^ Header for the document , writerIncludeBefore :: String -- ^ String to include before the body @@ -334,6 +334,7 @@ data WriterOptions = WriterOptions , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax , writerTabStop :: Int -- ^ Tabstop for conversion between -- spaces and tabs + , writerNotes :: [Block] -- ^ List of note blocks } deriving Show -- diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs new file mode 100644 index 000000000..9924d50fe --- /dev/null +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -0,0 +1,236 @@ +{- +Copyright (C) 2006 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Docbook + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to Docbook XML. +-} +module Text.Pandoc.Writers.Docbook ( + writeDocbook + ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Writers.HTML ( stringToSmartHtml, stringToHtml ) +import Text.Html ( stringToHtmlString ) +import Text.Regex ( mkRegex, matchRegex ) +import Data.Char ( toLower ) +import Data.List ( isPrefixOf, partition ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) + +-- | Data structure for defining hierarchical Pandoc documents +data Element = Blk Block + | Sec [Inline] [Element] deriving (Eq, Read, Show) + +-- | Returns true on Header block with level at least 'level' +headerAtLeast :: Int -> Block -> Bool +headerAtLeast level (Header x _) = x <= level +headerAtLeast level _ = False + +-- | Convert list of Pandoc blocks into list of Elements (hierarchical) +hierarchicalize :: [Block] -> [Element] +hierarchicalize [] = [] +hierarchicalize (block:rest) = + case block of + (Header level title) -> let (thisSection, rest') = break (headerAtLeast level) rest in + (Sec title (hierarchicalize thisSection)):(hierarchicalize rest') + x -> (Blk x):(hierarchicalize rest) + +-- | Convert list of authors to a docbook section +authorToDocbook :: WriterOptions -> [Char] -> Doc +authorToDocbook options name = indentedInTags options "author" $ + if ',' `elem` name + then -- last name first + let (lastname, rest) = break (==',') name + firstname = removeLeadingSpace rest in + inTags "firstname" (text $ stringToXML options firstname) <> + inTags "surname" (text $ stringToXML options lastname) + else -- last name last + let namewords = words name + lengthname = length namewords + (firstname, lastname) = case lengthname of + 0 -> ("","") + 1 -> ("", name) + n -> (joinWithSep " " (take (n-1) namewords), last namewords) in + inTags "firstname" (text $ stringToXML options firstname) $$ + inTags "surname" (text $ stringToXML options lastname) + +-- | Convert Pandoc document to string in Docbook format. +writeDocbook :: WriterOptions -> Pandoc -> String +writeDocbook options (Pandoc (Meta title authors date) blocks) = + let head = if (writerStandalone options) + then text (writerHeader options) + else empty + meta = if (writerStandalone options) + then indentedInTags options "articleinfo" $ + (inTags "title" (inlinesToDocbook options title)) $$ + (vcat (map (authorToDocbook options) authors)) $$ + (inTags "date" (text date)) + else empty + blocks' = replaceReferenceLinks blocks + (noteBlocks, blocks'') = partition isNoteBlock blocks' + options' = options {writerNotes = noteBlocks} + elements = hierarchicalize blocks'' + body = text (writerIncludeBefore options') <> + vcat (map (elementToDocbook options') elements) $$ + text (writerIncludeAfter options') + body' = if writerStandalone options' + then indentedInTags options' "article" (meta $$ body) + else body in + render $ head $$ body' <> text "\n" + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes. +inTagsWithAttrib :: String -> [(String, String)] -> Doc -> Doc +inTagsWithAttrib tagType attribs contents = text ("<" ++ tagType ++ + (concatMap (\(a, b) -> " " ++ attributeStringToXML a ++ + "=\"" ++ attributeStringToXML b ++ "\"") attribs)) <> + if isEmpty contents + then text " />" -- self-closing tag + else text ">" <> contents <> text ("") + +-- | Put the supplied contents between start and end tags of tagType. +inTags :: String -> Doc -> Doc +inTags tagType contents = inTagsWithAttrib tagType [] contents + +-- | Put the supplied contents in indented block btw start and end tags. +indentedInTags :: WriterOptions -> [Char] -> Doc -> Doc +indentedInTags options tagType contents = text ("<" ++ tagType ++ ">") $$ + nest 2 contents $$ text ("") + +-- | Convert an Element to Docbook. +elementToDocbook :: WriterOptions -> Element -> Doc +elementToDocbook options (Blk block) = blockToDocbook options block +elementToDocbook options (Sec title elements) = + -- Docbook doesn't allow sections with no content, so insert some if needed + let elements' = if null elements + then [Blk (Para [])] + else elements in + indentedInTags options "section" $ + inTags "title" (wrap options title) $$ + vcat (map (elementToDocbook options) elements') + +-- | Convert a list of Pandoc blocks to Docbook. +blocksToDocbook :: WriterOptions -> [Block] -> Doc +blocksToDocbook options = vcat . map (blockToDocbook options) + +-- | Convert a list of lists of blocks to a list of Docbook list items. +listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc +listItemsToDocbook options items = + vcat $ map (listItemToDocbook options) items + +-- | Convert a list of blocks into a Docbook list item. +listItemToDocbook :: WriterOptions -> [Block] -> Doc +listItemToDocbook options item = + let plainToPara (Plain x) = Para x + plainToPara y = y in + let item' = map plainToPara item in + indentedInTags options "listitem" (blocksToDocbook options item') + +-- | Convert a Pandoc block element to Docbook. +blockToDocbook :: WriterOptions -> Block -> Doc +blockToDocbook options Blank = text "" +blockToDocbook options Null = empty +blockToDocbook options (Plain lst) = wrap options lst +blockToDocbook options (Para lst) = + indentedInTags options "para" (wrap options lst) +blockToDocbook options (BlockQuote blocks) = + indentedInTags options "blockquote" (blocksToDocbook options blocks) +blockToDocbook options (CodeBlock str) = + text "" <> (cdata str) <> text "" +blockToDocbook options (BulletList lst) = + indentedInTags options "itemizedlist" $ listItemsToDocbook options lst +blockToDocbook options (OrderedList lst) = + indentedInTags options "orderedlist" $ listItemsToDocbook options lst +blockToDocbook options (RawHtml str) = text str -- raw XML block +blockToDocbook options HorizontalRule = empty -- not semantic +blockToDocbook options (Note _ _) = empty -- shouldn't occur +blockToDocbook options (Key _ _) = empty -- shouldn't occur +blockToDocbook options _ = indentedInTags options "para" (text "Unknown block type") + +-- | Put string in CDATA section +cdata :: String -> Doc +cdata str = text $ "" + +-- | Take list of inline elements and return wrapped doc. +wrap :: WriterOptions -> [Inline] -> Doc +wrap options lst = fsep $ map (hcat . (map (inlineToDocbook options))) (splitBySpace lst) + +-- | Escape a string for XML (with "smart" option if specified). +stringToXML :: WriterOptions -> String -> String +stringToXML options = if writerSmart options + then stringToSmartHtml + else stringToHtml + +-- | Escape string to XML appropriate for attributes +attributeStringToXML :: String -> String +attributeStringToXML = gsub "\"" """ . codeStringToXML + +-- | Escape a literal string for XML. +codeStringToXML :: String -> String +codeStringToXML = gsub "<" "<" . gsub "&" "&" + +-- | Convert a list of inline elements to Docbook. +inlinesToDocbook :: WriterOptions -> [Inline] -> Doc +inlinesToDocbook options lst = hcat (map (inlineToDocbook options) lst) + +-- | Convert an inline element to Docbook. +inlineToDocbook :: WriterOptions -> Inline -> Doc +inlineToDocbook options (Str str) = text $ stringToXML options str +inlineToDocbook options (Emph lst) = + inTags "emphasis" (inlinesToDocbook options lst) +inlineToDocbook options (Strong lst) = + inTagsWithAttrib "emphasis" [("role", "strong")] + (inlinesToDocbook options lst) +inlineToDocbook options (Code str) = + inTags "literal" $ text (codeStringToXML str) +inlineToDocbook options (TeX str) = inlineToDocbook options (Code str) +inlineToDocbook options (HtmlInline str) = empty +inlineToDocbook options LineBreak = + text $ "" +inlineToDocbook options Space = char ' ' +inlineToDocbook options (Link txt (Src src tit)) = + case (matchRegex (mkRegex "mailto:(.*)") src) of + Just [addr] -> inTags "email" $ text (codeStringToXML addr) + Nothing -> inTagsWithAttrib "ulink" [("url", src)] $ + inlinesToDocbook options txt +inlineToDocbook options (Link text (Ref ref)) = empty -- shouldn't occur +inlineToDocbook options (Image alt (Src src tit)) = + let titleDoc = if null tit + then empty + else indentedInTags options "objectinfo" $ + indentedInTags options "title" + (text $ stringToXML options tit) in + indentedInTags options "inlinemediaobject" $ + indentedInTags options "imageobject" $ + titleDoc $$ inTagsWithAttrib "imagedata" [("fileref", src)] empty +inlineToDocbook options (Image alternate (Ref ref)) = empty --shouldn't occur +inlineToDocbook options (NoteRef ref) = + let notes = writerNotes options + hits = filter (\(Note r _) -> r == ref) notes in + if null hits + then empty + else let (Note _ contents) = head hits in + indentedInTags options "footnote" $ blocksToDocbook options contents +inlineToDocbook options _ = empty diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 4456a61b5..8de1de43f 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -28,7 +28,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( - writeHtml + writeHtml, + stringToSmartHtml, + stringToHtml ) where import Text.Pandoc.Definition import Text.Pandoc.Shared diff --git a/src/headers/DocbookHeader b/src/headers/DocbookHeader new file mode 100644 index 000000000..7b26b2c73 --- /dev/null +++ b/src/headers/DocbookHeader @@ -0,0 +1,3 @@ + + diff --git a/src/templates/DefaultHeaders.hs b/src/templates/DefaultHeaders.hs index 4c552cea7..0274aa30f 100644 --- a/src/templates/DefaultHeaders.hs +++ b/src/templates/DefaultHeaders.hs @@ -1,6 +1,7 @@ -- | Default headers for Pandoc writers. module Text.Pandoc.Writers.DefaultHeaders ( defaultLaTeXHeader, + defaultDocbookHeader, defaultHtmlHeader, defaultS5Header, defaultRTFHeader @@ -10,6 +11,9 @@ import Text.Pandoc.Writers.S5 defaultLaTeXHeader :: String defaultLaTeXHeader = "@LaTeXHeader@" +defaultDocbookHeader :: String +defaultDocbookHeader = "@DocbookHeader@" + defaultHtmlHeader :: String defaultHtmlHeader = "@HtmlHeader@" -- cgit v1.2.3