aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/Docbook.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Writers/Docbook.hs')
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs440
1 files changed, 440 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
new file mode 100644
index 000000000..597851f65
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -0,0 +1,440 @@
+{-# LANGUAGE OverloadedStrings, PatternGuards #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Docbook
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Docbook XML.
+-}
+module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.XML
+import Text.Pandoc.Shared
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Math
+import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf )
+import Data.Char ( toLower )
+import Data.Monoid ( Any(..) )
+import Text.Pandoc.Highlighting ( languages, languagesByExtension )
+import Text.Pandoc.Pretty
+import Text.Pandoc.ImageSize
+import qualified Text.Pandoc.Builder as B
+import Text.TeXMath
+import qualified Text.XML.Light as Xml
+import Data.Generics (everywhere, mkT)
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+import Control.Monad.Reader
+
+data DocBookVersion = DocBook4 | DocBook5
+ deriving (Eq, Show)
+
+type DB = ReaderT DocBookVersion
+
+-- | Convert list of authors to a docbook <author> section
+authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines
+authorToDocbook opts name' = do
+ name <- render Nothing <$> inlinesToDocbook opts name'
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ return $ B.rawInline "docbook" $ render colwidth $
+ if ',' `elem` name
+ then -- last name first
+ let (lastname, rest) = break (==',') name
+ firstname = triml rest in
+ inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+ else -- last name last
+ let namewords = words name
+ lengthname = length namewords
+ (firstname, lastname) = case lengthname of
+ 0 -> ("","")
+ 1 -> ("", name)
+ n -> (intercalate " " (take (n-1) namewords), last namewords)
+ in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+
+writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeDocbook4 opts d =
+ runReaderT (writeDocbook opts d) DocBook4
+
+writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeDocbook5 opts d =
+ runReaderT (writeDocbook opts d) DocBook5
+
+-- | Convert Pandoc document to string in Docbook format.
+writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m String
+writeDocbook opts (Pandoc meta blocks) = do
+ let elements = hierarchicalize blocks
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ let render' = render colwidth
+ let opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr)
+ (writerTemplate opts) &&
+ TopLevelDefault == writerTopLevelDivision opts)
+ then opts{ writerTopLevelDivision = TopLevelChapter }
+ else opts
+ -- The numbering here follows LaTeX's internal numbering
+ let startLvl = case writerTopLevelDivision opts' of
+ TopLevelPart -> -1
+ TopLevelChapter -> 0
+ TopLevelSection -> 1
+ TopLevelDefault -> 1
+ auths' <- mapM (authorToDocbook opts) $ docAuthors meta
+ let meta' = B.setMeta "author" auths' meta
+ metadata <- metaToJSON opts
+ (fmap (render colwidth . vcat) .
+ (mapM (elementToDocbook opts' startLvl) .
+ hierarchicalize))
+ (fmap (render colwidth) . inlinesToDocbook opts')
+ meta'
+ main <- (render' . vcat) <$> (mapM (elementToDocbook opts' startLvl) elements)
+ let context = defField "body" main
+ $ defField "mathml" (case writerHTMLMathMethod opts of
+ MathML -> True
+ _ -> False)
+ $ metadata
+ return $ case writerTemplate opts of
+ Nothing -> main
+ Just tpl -> renderTemplate' tpl context
+
+-- | Convert an Element to Docbook.
+elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc
+elementToDocbook opts _ (Blk block) = blockToDocbook opts block
+elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = do
+ version <- ask
+ -- Docbook doesn't allow sections with no content, so insert some if needed
+ let elements' = if null elements
+ then [Blk (Para [])]
+ else elements
+ tag = case lvl of
+ -1 -> "part"
+ 0 -> "chapter"
+ n | n >= 1 && n <= 5 -> if version == DocBook5
+ then "section"
+ else "sect" ++ show n
+ _ -> "simplesect"
+ idName = if version == DocBook5
+ then "xml:id"
+ else "id"
+ idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')]
+ nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
+ else []
+ attribs = nsAttr ++ idAttr
+ contents <- mapM (elementToDocbook opts (lvl + 1)) elements'
+ title' <- inlinesToDocbook opts title
+ return $ inTags True tag attribs $
+ inTagsSimple "title" title' $$ vcat contents
+
+-- | Convert a list of Pandoc blocks to Docbook.
+blocksToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc
+blocksToDocbook opts = fmap vcat . mapM (blockToDocbook opts)
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara :: Block -> Block
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+-- | Convert a list of pairs of terms and definitions into a list of
+-- Docbook varlistentrys.
+deflistItemsToDocbook :: PandocMonad m
+ => WriterOptions -> [([Inline],[[Block]])] -> DB m Doc
+deflistItemsToDocbook opts items =
+ vcat <$> mapM (\(term, defs) -> deflistItemToDocbook opts term defs) items
+
+-- | Convert a term and a list of blocks into a Docbook varlistentry.
+deflistItemToDocbook :: PandocMonad m
+ => WriterOptions -> [Inline] -> [[Block]] -> DB m Doc
+deflistItemToDocbook opts term defs = do
+ term' <- inlinesToDocbook opts term
+ def' <- blocksToDocbook opts $ concatMap (map plainToPara) defs
+ return $ inTagsIndented "varlistentry" $
+ inTagsIndented "term" term' $$
+ inTagsIndented "listitem" def'
+
+-- | Convert a list of lists of blocks to a list of Docbook list items.
+listItemsToDocbook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m Doc
+listItemsToDocbook opts items = vcat <$> mapM (listItemToDocbook opts) items
+
+-- | Convert a list of blocks into a Docbook list item.
+listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc
+listItemToDocbook opts item =
+ inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item)
+
+imageToDocbook :: WriterOptions -> Attr -> String -> Doc
+imageToDocbook _ attr src = selfClosingTag "imagedata" $
+ ("fileref", src) : idAndRole attr ++ dims
+ where
+ dims = go Width "width" ++ go Height "depth"
+ go dir dstr = case (dimension dir attr) of
+ Just a -> [(dstr, show a)]
+ Nothing -> []
+
+-- | Convert a Pandoc block element to Docbook.
+blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m Doc
+blockToDocbook _ Null = return empty
+-- Add ids to paragraphs in divs with ids - this is needed for
+-- pandoc-citeproc to get link anchors in bibliographies:
+blockToDocbook opts (Div (ident,_,_) [Para lst]) =
+ let attribs = [("id", ident) | not (null ident)] in
+ if hasLineBreaks lst
+ then (flush . nowrap . inTags False "literallayout" attribs)
+ <$> inlinesToDocbook opts lst
+ else inTags True "para" attribs <$> inlinesToDocbook opts lst
+blockToDocbook opts (Div (ident,_,_) bs) = do
+ contents <- blocksToDocbook opts (map plainToPara bs)
+ return $
+ (if null ident
+ then mempty
+ else selfClosingTag "anchor" [("id", ident)]) $$ contents
+blockToDocbook _ (Header _ _ _) =
+ return empty -- should not occur after hierarchicalize
+blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
+-- title beginning with fig: indicates that the image is a figure
+blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do
+ alt <- inlinesToDocbook opts txt
+ let capt = if null txt
+ then empty
+ else inTagsSimple "title" alt
+ return $ inTagsIndented "figure" $
+ capt $$
+ (inTagsIndented "mediaobject" $
+ (inTagsIndented "imageobject"
+ (imageToDocbook opts attr src)) $$
+ inTagsSimple "textobject" (inTagsSimple "phrase" alt))
+blockToDocbook opts (Para lst)
+ | hasLineBreaks lst = (flush . nowrap . inTagsSimple "literallayout")
+ <$> inlinesToDocbook opts lst
+ | otherwise = inTagsIndented "para" <$> inlinesToDocbook opts lst
+blockToDocbook opts (LineBlock lns) =
+ blockToDocbook opts $ linesToPara lns
+blockToDocbook opts (BlockQuote blocks) =
+ inTagsIndented "blockquote" <$> blocksToDocbook opts blocks
+blockToDocbook _ (CodeBlock (_,classes,_) str) = return $
+ text ("<programlisting" ++ lang ++ ">") <> cr <>
+ flush (text (escapeStringForXML str) <> cr <> text "</programlisting>")
+ where lang = if null langs
+ then ""
+ else " language=\"" ++ escapeStringForXML (head langs) ++
+ "\""
+ isLang l = map toLower l `elem` map (map toLower) languages
+ langsFrom s = if isLang s
+ then [s]
+ else languagesByExtension . map toLower $ s
+ langs = concatMap langsFrom classes
+blockToDocbook opts (BulletList lst) = do
+ let attribs = [("spacing", "compact") | isTightList lst]
+ inTags True "itemizedlist" attribs <$> listItemsToDocbook opts lst
+blockToDocbook _ (OrderedList _ []) = return empty
+blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do
+ let numeration = case numstyle of
+ DefaultStyle -> []
+ Decimal -> [("numeration", "arabic")]
+ Example -> [("numeration", "arabic")]
+ UpperAlpha -> [("numeration", "upperalpha")]
+ LowerAlpha -> [("numeration", "loweralpha")]
+ UpperRoman -> [("numeration", "upperroman")]
+ LowerRoman -> [("numeration", "lowerroman")]
+ spacing = [("spacing", "compact") | isTightList (first:rest)]
+ attribs = numeration ++ spacing
+ items <- if start == 1
+ then listItemsToDocbook opts (first:rest)
+ else do
+ first' <- blocksToDocbook opts (map plainToPara first)
+ rest' <- listItemsToDocbook opts rest
+ return $
+ (inTags True "listitem" [("override",show start)] first') $$
+ rest'
+ return $ inTags True "orderedlist" attribs items
+blockToDocbook opts (DefinitionList lst) = do
+ let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst]
+ inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst
+blockToDocbook _ b@(RawBlock f str)
+ | f == "docbook" = return $ text str -- raw XML block
+ | f == "html" = do
+ version <- ask
+ if version == DocBook5
+ then return empty -- No html in Docbook5
+ else return $ text str -- allow html for backwards compatibility
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToDocbook _ HorizontalRule = return empty -- not semantic
+blockToDocbook opts (Table caption aligns widths headers rows) = do
+ captionDoc <- if null caption
+ then return empty
+ else inTagsIndented "title" <$>
+ inlinesToDocbook opts caption
+ let tableType = if isEmpty captionDoc then "informaltable" else "table"
+ percent w = show (truncate (100*w) :: Integer) ++ "*"
+ coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec"
+ ([("colwidth", percent w) | w > 0] ++
+ [("align", alignmentToString al)])) widths aligns
+ head' <- if all null headers
+ then return empty
+ else inTagsIndented "thead" <$> tableRowToDocbook opts headers
+ body' <- (inTagsIndented "tbody" . vcat) <$>
+ mapM (tableRowToDocbook opts) rows
+ return $ inTagsIndented tableType $ captionDoc $$
+ (inTags True "tgroup" [("cols", show (length headers))] $
+ coltags $$ head' $$ body')
+
+hasLineBreaks :: [Inline] -> Bool
+hasLineBreaks = getAny . query isLineBreak . walk removeNote
+ where
+ removeNote :: Inline -> Inline
+ removeNote (Note _) = Str ""
+ removeNote x = x
+ isLineBreak :: Inline -> Any
+ isLineBreak LineBreak = Any True
+ isLineBreak _ = Any False
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToDocbook :: PandocMonad m
+ => WriterOptions
+ -> [[Block]]
+ -> DB m Doc
+tableRowToDocbook opts cols =
+ (inTagsIndented "row" . vcat) <$> mapM (tableItemToDocbook opts) cols
+
+tableItemToDocbook :: PandocMonad m
+ => WriterOptions
+ -> [Block]
+ -> DB m Doc
+tableItemToDocbook opts item =
+ (inTags True "entry" [] . vcat) <$> mapM (blockToDocbook opts) item
+
+-- | Convert a list of inline elements to Docbook.
+inlinesToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m Doc
+inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst
+
+-- | Convert an inline element to Docbook.
+inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m Doc
+inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str
+inlineToDocbook opts (Emph lst) =
+ inTagsSimple "emphasis" <$> inlinesToDocbook opts lst
+inlineToDocbook opts (Strong lst) =
+ inTags False "emphasis" [("role", "strong")] <$> inlinesToDocbook opts lst
+inlineToDocbook opts (Strikeout lst) =
+ inTags False "emphasis" [("role", "strikethrough")] <$>
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Superscript lst) =
+ inTagsSimple "superscript" <$> inlinesToDocbook opts lst
+inlineToDocbook opts (Subscript lst) =
+ inTagsSimple "subscript" <$> inlinesToDocbook opts lst
+inlineToDocbook opts (SmallCaps lst) =
+ inTags False "emphasis" [("role", "smallcaps")] <$>
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Quoted _ lst) =
+ inTagsSimple "quote" <$> inlinesToDocbook opts lst
+inlineToDocbook opts (Cite _ lst) =
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Span (ident,_,_) ils) =
+ ((if null ident
+ then mempty
+ else selfClosingTag "anchor" [("id", ident)]) <>) <$>
+ inlinesToDocbook opts ils
+inlineToDocbook _ (Code _ str) =
+ return $ inTagsSimple "literal" $ text (escapeStringForXML str)
+inlineToDocbook opts (Math t str)
+ | isMathML (writerHTMLMathMethod opts) = do
+ res <- convertMath writeMathML t str
+ case res of
+ Right r -> return $ inTagsSimple tagtype
+ $ text $ Xml.ppcElement conf
+ $ fixNS
+ $ removeAttr r
+ Left il -> inlineToDocbook opts il
+ | otherwise =
+ texMathToInlines t str >>= inlinesToDocbook opts
+ where tagtype = case t of
+ InlineMath -> "inlineequation"
+ DisplayMath -> "informalequation"
+ conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP
+ removeAttr e = e{ Xml.elAttribs = [] }
+ fixNS' qname = qname{ Xml.qPrefix = Just "mml" }
+ fixNS = everywhere (mkT fixNS')
+inlineToDocbook _ il@(RawInline f x)
+ | f == "html" || f == "docbook" = return $ text x
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToDocbook _ LineBreak = return $ text "\n"
+-- currently ignore, would require the option to add custom
+-- styles to the document
+inlineToDocbook _ Space = return space
+-- because we use \n for LineBreak, we can't do soft breaks:
+inlineToDocbook _ SoftBreak = return space
+inlineToDocbook opts (Link attr txt (src, _))
+ | Just email <- stripPrefix "mailto:" src =
+ let emailLink = inTagsSimple "email" $ text $
+ escapeStringForXML $ email
+ in case txt of
+ [Str s] | escapeURI s == email -> return emailLink
+ _ -> do contents <- inlinesToDocbook opts txt
+ return $ contents <+>
+ char '(' <> emailLink <> char ')'
+ | otherwise = do
+ version <- ask
+ (if isPrefixOf "#" src
+ then inTags False "link" $ ("linkend", writerIdentifierPrefix opts ++ drop 1 src) : idAndRole attr
+ else if version == DocBook5
+ then inTags False "link" $ ("xlink:href", src) : idAndRole attr
+ else inTags False "ulink" $ ("url", src) : idAndRole attr )
+ <$> inlinesToDocbook opts txt
+inlineToDocbook opts (Image attr _ (src, tit)) = return $
+ let titleDoc = if null tit
+ then empty
+ else inTagsIndented "objectinfo" $
+ inTagsIndented "title" (text $ escapeStringForXML tit)
+ in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
+ titleDoc $$ imageToDocbook opts attr src
+inlineToDocbook opts (Note contents) =
+ inTagsIndented "footnote" <$> blocksToDocbook opts contents
+
+isMathML :: HTMLMathMethod -> Bool
+isMathML MathML = True
+isMathML _ = False
+
+idAndRole :: Attr -> [(String, String)]
+idAndRole (id',cls,_) = ident ++ role
+ where
+ ident = if null id'
+ then []
+ else [("id", id')]
+ role = if null cls
+ then []
+ else [("role", unwords cls)]