aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs41
-rw-r--r--tests/writer.docbook233
2 files changed, 126 insertions, 148 deletions
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index d8b85c1a2..a53c3fb86 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -35,13 +35,13 @@ import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.Readers.TeXMath
import Data.List ( isPrefixOf, intercalate )
import Data.Char ( toLower )
-import Text.PrettyPrint.HughesPJ hiding ( Str )
import Text.Pandoc.Highlighting (languages, languagesByExtension)
+import Text.Pandoc.Pretty
-- | Convert list of authors to a docbook <author> section
authorToDocbook :: WriterOptions -> [Inline] -> Doc
authorToDocbook opts name' =
- let name = render $ inlinesToDocbook opts name'
+ let name = render Nothing $ inlinesToDocbook opts name'
in if ',' `elem` name
then -- last name first
let (lastname, rest) = break (==',') name
@@ -61,16 +61,20 @@ authorToDocbook opts name' =
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
writeDocbook opts (Pandoc (Meta tit auths dat) blocks) =
- let title = wrap opts tit
+ let title = inlinesToDocbook opts tit
authors = map (authorToDocbook opts) auths
date = inlinesToDocbook opts dat
elements = hierarchicalize blocks
- main = render $ vcat (map (elementToDocbook opts) elements)
+ colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ render' = render colwidth
+ main = render' $ vcat (map (elementToDocbook opts) elements)
context = writerVariables opts ++
[ ("body", main)
- , ("title", render title)
- , ("date", render date) ] ++
- [ ("author", render a) | a <- authors ]
+ , ("title", render' title)
+ , ("date", render' date) ] ++
+ [ ("author", render' a) | a <- authors ]
in if writerStandalone opts
then renderTemplate context $ writerTemplate opts
else main
@@ -84,7 +88,7 @@ elementToDocbook opts (Sec _ _num id' title elements) =
then [Blk (Para [])]
else elements
in inTags True "section" [("id",id')] $
- inTagsSimple "title" (wrap opts title) $$
+ inTagsSimple "title" (inlinesToDocbook opts title) $$
vcat (map (elementToDocbook opts) elements')
-- | Convert a list of Pandoc blocks to Docbook.
@@ -123,7 +127,7 @@ listItemToDocbook opts item =
blockToDocbook :: WriterOptions -> Block -> Doc
blockToDocbook _ Null = empty
blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize
-blockToDocbook opts (Plain lst) = wrap opts lst
+blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
blockToDocbook opts (Para [Image txt (src,_)]) =
let capt = inlinesToDocbook opts txt
in inTagsIndented "figure" $
@@ -132,12 +136,13 @@ blockToDocbook opts (Para [Image txt (src,_)]) =
(inTagsIndented "imageobject"
(selfClosingTag "imagedata" [("fileref",src)])) $$
inTagsSimple "textobject" (inTagsSimple "phrase" capt))
-blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
+blockToDocbook opts (Para lst) =
+ inTagsIndented "para" $ inlinesToDocbook opts lst
blockToDocbook opts (BlockQuote blocks) =
inTagsIndented "blockquote" $ blocksToDocbook opts blocks
blockToDocbook _ (CodeBlock (_,classes,_) str) =
- text ("<screen" ++ lang ++ ">\n") <>
- text (escapeStringForXML str) <> text "\n</screen>"
+ text ("<screen" ++ lang ++ ">") <> cr <>
+ flush (text (escapeStringForXML str) <> cr <> text "</screen>")
where lang = if null langs
then ""
else " language=\"" ++ escapeStringForXML (head langs) ++
@@ -214,12 +219,6 @@ tableItemToDocbook opts tag align item =
let attrib = [("align", align)]
in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
--- | Take list of inline elements and return wrapped doc.
-wrap :: WriterOptions -> [Inline] -> Doc
-wrap opts lst = if writerWrapText opts
- then fsep $ map (inlinesToDocbook opts) (splitBy (== Space) lst)
- else inlinesToDocbook opts lst
-
-- | Convert a list of inline elements to Docbook.
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
@@ -254,8 +253,8 @@ inlineToDocbook _ (Code str) =
inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str
inlineToDocbook _ (TeX _) = empty
inlineToDocbook _ (HtmlInline _) = empty
-inlineToDocbook _ LineBreak = text $ "<literallayout></literallayout>"
-inlineToDocbook _ Space = char ' '
+inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty
+inlineToDocbook _ Space = space
inlineToDocbook opts (Link txt (src, _)) =
if isPrefixOf "mailto:" src
then let src' = drop 7 src
@@ -275,6 +274,6 @@ inlineToDocbook _ (Image _ (src, tit)) =
else inTagsIndented "objectinfo" $
inTagsIndented "title" (text $ escapeStringForXML tit)
in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
- titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
+ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
inlineToDocbook opts (Note contents) =
inTagsIndented "footnote" $ blocksToDocbook opts contents
diff --git a/tests/writer.docbook b/tests/writer.docbook
index c17513cf9..15704f8bf 100644
--- a/tests/writer.docbook
+++ b/tests/writer.docbook
@@ -15,14 +15,13 @@
<date>July 17, 2006</date>
</articleinfo>
<para>
- This is a set of tests for pandoc. Most of them are adapted from
- John Gruber's markdown test suite.
+ This is a set of tests for pandoc. Most of them are adapted from John
+ Gruber's markdown test suite.
</para>
<section id="headers">
<title>Headers</title>
<section id="level-2-with-an-embedded-link">
- <title>Level 2 with an
- <ulink url="/url">embedded link</ulink></title>
+ <title>Level 2 with an <ulink url="/url">embedded link</ulink></title>
<section id="level-3-with-emphasis">
<title>Level 3 with <emphasis>emphasis</emphasis></title>
<section id="level-4">
@@ -60,16 +59,15 @@
Here's a regular paragraph.
</para>
<para>
- In Markdown 1.0.0 and earlier. Version 8. This line turns into a
- list item. Because a hard-wrapped line in the middle of a paragraph
- looked like a list item.
+ In Markdown 1.0.0 and earlier. Version 8. This line turns into a list
+ item. Because a hard-wrapped line in the middle of a paragraph looked like
+ a list item.
</para>
<para>
Here's one with a bullet. * criminey.
</para>
<para>
- There should be a hard line
- break<literallayout></literallayout>here.
+ There should be a hard line break<literallayout></literallayout>here.
</para>
</section>
<section id="block-quotes">
@@ -866,45 +864,41 @@ These should not be escaped: \$ \\ \&gt; \[ \{
<div>
foo
</div>
-
<para>
And nested without indentation:
</para>
<div>
-<div>
-<div>
+ <div>
+ <div>
foo
</div>
-</div>
-<div>
+ </div>
+ <div>
bar
</div>
-</div>
-
+ </div>
<para>
Interpreted markdown in a table:
</para>
<table>
-<tr>
-<td>
+ <tr>
+ <td>
This is <emphasis>emphasized</emphasis>
</td>
-<td>
+ <td>
And this is <emphasis role="strong">strong</emphasis>
</td>
-</tr>
-</table>
-
-<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>
+ </tr>
+ </table>
+ <script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>
<para>
Here's a simple block:
</para>
<div>
-
+
foo
</div>
-
<para>
This should be a code block, though:
</para>
@@ -923,31 +917,28 @@ These should not be escaped: \$ \\ \&gt; \[ \{
Now, nested:
</para>
<div>
- <div>
- <div>
-
+ <div>
+ <div>
+
foo
</div>
- </div>
-</div>
-
+ </div>
+ </div>
<para>
This should just be an HTML comment:
</para>
<!-- Comment -->
-
<para>
Multiline:
</para>
<!--
-Blah
-Blah
--->
-
-<!--
- This is another comment.
--->
+ Blah
+ Blah
+ -->
+ <!--
+ This is another comment.
+ -->
<para>
Code block:
</para>
@@ -958,7 +949,6 @@ Blah
Just plain comment, with trailing spaces on the line:
</para>
<!-- foo -->
-
<para>
Code:
</para>
@@ -970,28 +960,27 @@ Blah
</para>
<hr>
-<hr />
+ <hr />
-<hr />
+ <hr />
-<hr>
+ <hr>
-<hr />
+ <hr />
-<hr />
+ <hr />
-<hr class="foo" id="bar" />
+ <hr class="foo" id="bar" />
-<hr class="foo" id="bar" />
-
-<hr class="foo" id="bar">
+ <hr class="foo" id="bar" />
+ <hr class="foo" id="bar">
</section>
<section id="inline-markup">
<title>Inline Markup</title>
<para>
- This is <emphasis>emphasized</emphasis>, and so
- <emphasis>is this</emphasis>.
+ This is <emphasis>emphasized</emphasis>, and so <emphasis>is
+ this</emphasis>.
</para>
<para>
This is <emphasis role="strong">strong</emphasis>, and so
@@ -1001,18 +990,18 @@ Blah
An <emphasis><ulink url="/url">emphasized link</ulink></emphasis>.
</para>
<para>
- <emphasis role="strong"><emphasis>This is strong and em.</emphasis></emphasis>
+ <emphasis role="strong"><emphasis>This is strong and
+ em.</emphasis></emphasis>
</para>
<para>
- So is <emphasis role="strong"><emphasis>this</emphasis></emphasis>
- word.
+ So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word.
</para>
<para>
- <emphasis role="strong"><emphasis>This is strong and em.</emphasis></emphasis>
+ <emphasis role="strong"><emphasis>This is strong and
+ em.</emphasis></emphasis>
</para>
<para>
- So is <emphasis role="strong"><emphasis>this</emphasis></emphasis>
- word.
+ So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word.
</para>
<para>
This is code: <literal>&gt;</literal>, <literal>$</literal>,
@@ -1020,7 +1009,8 @@ Blah
<literal>&lt;html&gt;</literal>.
</para>
<para>
- <emphasis role="strikethrough">This is <emphasis>strikeout</emphasis>.</emphasis>
+ <emphasis role="strikethrough">This is
+ <emphasis>strikeout</emphasis>.</emphasis>
</para>
<para>
Superscripts: a<superscript>bc</superscript>d
@@ -1028,35 +1018,35 @@ Blah
a<superscript>hello there</superscript>.
</para>
<para>
- Subscripts: H<subscript>2</subscript>O,
- H<subscript>23</subscript>O, H<subscript>many of them</subscript>O.
+ Subscripts: H<subscript>2</subscript>O, H<subscript>23</subscript>O,
+ H<subscript>many of them</subscript>O.
</para>
<para>
- These should not be superscripts or subscripts, because of the
- unescaped spaces: a^b c^d, a~b c~d.
+ These should not be superscripts or subscripts, because of the unescaped
+ spaces: a^b c^d, a~b c~d.
</para>
</section>
<section id="smart-quotes-ellipses-dashes">
<title>Smart quotes, ellipses, dashes</title>
<para>
- <quote>Hello,</quote> said the spider.
- <quote><quote>Shelob</quote> is my name.</quote>
+ <quote>Hello,</quote> said the spider. <quote><quote>Shelob</quote> is my
+ name.</quote>
</para>
<para>
- <quote>A</quote>, <quote>B</quote>, and <quote>C</quote> are
- letters.
+ <quote>A</quote>, <quote>B</quote>, and <quote>C</quote> are letters.
</para>
<para>
- <quote>Oak,</quote> <quote>elm,</quote> and <quote>beech</quote>
- are names of trees. So is <quote>pine.</quote>
+ <quote>Oak,</quote> <quote>elm,</quote> and <quote>beech</quote> are names
+ of trees. So is <quote>pine.</quote>
</para>
<para>
- <quote>He said, <quote>I want to go.</quote></quote> Were you alive
- in the 70's?
+ <quote>He said, <quote>I want to go.</quote></quote> Were you alive in the
+ 70's?
</para>
<para>
Here is some quoted <quote><literal>code</literal></quote> and a
- <quote><ulink url="http://example.com/?foo=1&amp;bar=2">quoted link</ulink></quote>.
+ <quote><ulink url="http://example.com/?foo=1&amp;bar=2">quoted
+ link</ulink></quote>.
</para>
<para>
Some dashes: one—two — three—four — five.
@@ -1135,8 +1125,8 @@ Blah
</listitem>
<listitem>
<para>
- Escaped <literal>$</literal>: $73
- <emphasis>this should be emphasized</emphasis> 23$.
+ Escaped <literal>$</literal>: $73 <emphasis>this should be
+ emphasized</emphasis> 23$.
</para>
</listitem>
</itemizedlist>
@@ -1316,8 +1306,8 @@ Blah
<section id="with-ampersands">
<title>With ampersands</title>
<para>
- Here's a
- <ulink url="http://example.com/?foo=1&amp;bar=2">link with an ampersand in the URL</ulink>.
+ Here's a <ulink url="http://example.com/?foo=1&amp;bar=2">link with an
+ ampersand in the URL</ulink>.
</para>
<para>
Here's a link with an amersand in the link text:
@@ -1327,8 +1317,8 @@ Blah
Here's an <ulink url="/script?foo=1&amp;bar=2">inline link</ulink>.
</para>
<para>
- Here's an
- <ulink url="/script?foo=1&amp;bar=2">inline link in pointy braces</ulink>.
+ Here's an <ulink url="/script?foo=1&amp;bar=2">inline link in pointy
+ braces</ulink>.
</para>
</section>
<section id="autolinks">
@@ -1387,78 +1377,67 @@ or here: &lt;http://example.com/&gt;
</mediaobject>
</figure>
<para>
- Here is a movie
- <inlinemediaobject>
+ Here is a movie <inlinemediaobject>
<imageobject>
<imagedata fileref="movie.jpg" />
</imageobject>
- </inlinemediaobject>
- icon.
+ </inlinemediaobject> icon.
</para>
</section>
<section id="footnotes">
<title>Footnotes</title>
<para>
- Here is a footnote
- reference,<footnote>
- <para>
- Here is the footnote. It can go anywhere after the footnote
- reference. It need not be placed at the end of the document.
- </para>
- </footnote>
- and
- another.<footnote>
- <para>
- Here's the long note. This one contains multiple blocks.
- </para>
- <para>
- Subsequent blocks are indented to show that they belong to the
- footnote (as with list items).
- </para>
- <screen>
+ Here is a footnote reference,<footnote>
+ <para>
+ Here is the footnote. It can go anywhere after the footnote reference.
+ It need not be placed at the end of the document.
+ </para>
+ </footnote> and another.<footnote>
+ <para>
+ Here's the long note. This one contains multiple blocks.
+ </para>
+ <para>
+ Subsequent blocks are indented to show that they belong to the
+ footnote (as with list items).
+ </para>
+ <screen>
{ &lt;code&gt; }
</screen>
- <para>
- If you want, you can indent every line, but you can also be lazy
- and just indent the first line of each block.
- </para>
- </footnote>
- This should <emphasis>not</emphasis> be a footnote reference,
- because it contains a space.[^my note] Here is an inline
- note.<footnote>
- <para>
- This is <emphasis>easier</emphasis> to type. Inline notes may
- contain <ulink url="http://google.com">links</ulink> and
- <literal>]</literal> verbatim characters, as well as [bracketed
- text].
- </para>
- </footnote>
+ <para>
+ If you want, you can indent every line, but you can also be lazy and
+ just indent the first line of each block.
+ </para>
+ </footnote> This should <emphasis>not</emphasis> be a footnote reference,
+ because it contains a space.[^my note] Here is an inline note.<footnote>
+ <para>
+ This is <emphasis>easier</emphasis> to type. Inline notes may contain
+ <ulink url="http://google.com">links</ulink> and <literal>]</literal>
+ verbatim characters, as well as [bracketed text].
+ </para>
+ </footnote>
</para>
<blockquote>
<para>
- Notes can go in
- quotes.<footnote>
- <para>
- In quote.
- </para>
- </footnote>
+ Notes can go in quotes.<footnote>
+ <para>
+ In quote.
+ </para>
+ </footnote>
</para>
</blockquote>
<orderedlist numeration="arabic">
<listitem>
<para>
- And in list
- items.<footnote>
- <para>
- In list.
- </para>
- </footnote>
+ And in list items.<footnote>
+ <para>
+ In list.
+ </para>
+ </footnote>
</para>
</listitem>
</orderedlist>
<para>
- This paragraph should not be part of the note, as it is not
- indented.
+ This paragraph should not be part of the note, as it is not indented.
</para>
</section>
</article>