diff options
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 41 | ||||
-rw-r--r-- | tests/writer.docbook | 233 |
2 files changed, 126 insertions, 148 deletions
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index d8b85c1a2..a53c3fb86 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -35,13 +35,13 @@ import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath import Data.List ( isPrefixOf, intercalate ) import Data.Char ( toLower ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.Pretty -- | Convert list of authors to a docbook <author> section authorToDocbook :: WriterOptions -> [Inline] -> Doc authorToDocbook opts name' = - let name = render $ inlinesToDocbook opts name' + let name = render Nothing $ inlinesToDocbook opts name' in if ',' `elem` name then -- last name first let (lastname, rest) = break (==',') name @@ -61,16 +61,20 @@ authorToDocbook opts name' = -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = - let title = wrap opts tit + let title = inlinesToDocbook opts tit authors = map (authorToDocbook opts) auths date = inlinesToDocbook opts dat elements = hierarchicalize blocks - main = render $ vcat (map (elementToDocbook opts) elements) + colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + render' = render colwidth + main = render' $ vcat (map (elementToDocbook opts) elements) context = writerVariables opts ++ [ ("body", main) - , ("title", render title) - , ("date", render date) ] ++ - [ ("author", render a) | a <- authors ] + , ("title", render' title) + , ("date", render' date) ] ++ + [ ("author", render' a) | a <- authors ] in if writerStandalone opts then renderTemplate context $ writerTemplate opts else main @@ -84,7 +88,7 @@ elementToDocbook opts (Sec _ _num id' title elements) = then [Blk (Para [])] else elements in inTags True "section" [("id",id')] $ - inTagsSimple "title" (wrap opts title) $$ + inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts) elements') -- | Convert a list of Pandoc blocks to Docbook. @@ -123,7 +127,7 @@ listItemToDocbook opts item = blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize -blockToDocbook opts (Plain lst) = wrap opts lst +blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst blockToDocbook opts (Para [Image txt (src,_)]) = let capt = inlinesToDocbook opts txt in inTagsIndented "figure" $ @@ -132,12 +136,13 @@ blockToDocbook opts (Para [Image txt (src,_)]) = (inTagsIndented "imageobject" (selfClosingTag "imagedata" [("fileref",src)])) $$ inTagsSimple "textobject" (inTagsSimple "phrase" capt)) -blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst +blockToDocbook opts (Para lst) = + inTagsIndented "para" $ inlinesToDocbook opts lst blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" $ blocksToDocbook opts blocks blockToDocbook _ (CodeBlock (_,classes,_) str) = - text ("<screen" ++ lang ++ ">\n") <> - text (escapeStringForXML str) <> text "\n</screen>" + text ("<screen" ++ lang ++ ">") <> cr <> + flush (text (escapeStringForXML str) <> cr <> text "</screen>") where lang = if null langs then "" else " language=\"" ++ escapeStringForXML (head langs) ++ @@ -214,12 +219,6 @@ tableItemToDocbook opts tag align item = let attrib = [("align", align)] in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item --- | Take list of inline elements and return wrapped doc. -wrap :: WriterOptions -> [Inline] -> Doc -wrap opts lst = if writerWrapText opts - then fsep $ map (inlinesToDocbook opts) (splitBy (== Space) lst) - else inlinesToDocbook opts lst - -- | Convert a list of inline elements to Docbook. inlinesToDocbook :: WriterOptions -> [Inline] -> Doc inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst @@ -254,8 +253,8 @@ inlineToDocbook _ (Code str) = inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str inlineToDocbook _ (TeX _) = empty inlineToDocbook _ (HtmlInline _) = empty -inlineToDocbook _ LineBreak = text $ "<literallayout></literallayout>" -inlineToDocbook _ Space = char ' ' +inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty +inlineToDocbook _ Space = space inlineToDocbook opts (Link txt (src, _)) = if isPrefixOf "mailto:" src then let src' = drop 7 src @@ -275,6 +274,6 @@ inlineToDocbook _ (Image _ (src, tit)) = else inTagsIndented "objectinfo" $ inTagsIndented "title" (text $ escapeStringForXML tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ - titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] + titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] inlineToDocbook opts (Note contents) = inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/tests/writer.docbook b/tests/writer.docbook index c17513cf9..15704f8bf 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -15,14 +15,13 @@ <date>July 17, 2006</date> </articleinfo> <para> - This is a set of tests for pandoc. Most of them are adapted from - John Gruber's markdown test suite. + This is a set of tests for pandoc. Most of them are adapted from John + Gruber's markdown test suite. </para> <section id="headers"> <title>Headers</title> <section id="level-2-with-an-embedded-link"> - <title>Level 2 with an - <ulink url="/url">embedded link</ulink></title> + <title>Level 2 with an <ulink url="/url">embedded link</ulink></title> <section id="level-3-with-emphasis"> <title>Level 3 with <emphasis>emphasis</emphasis></title> <section id="level-4"> @@ -60,16 +59,15 @@ Here's a regular paragraph. </para> <para> - In Markdown 1.0.0 and earlier. Version 8. This line turns into a - list item. Because a hard-wrapped line in the middle of a paragraph - looked like a list item. + In Markdown 1.0.0 and earlier. Version 8. This line turns into a list + item. Because a hard-wrapped line in the middle of a paragraph looked like + a list item. </para> <para> Here's one with a bullet. * criminey. </para> <para> - There should be a hard line - break<literallayout></literallayout>here. + There should be a hard line break<literallayout></literallayout>here. </para> </section> <section id="block-quotes"> @@ -866,45 +864,41 @@ These should not be escaped: \$ \\ \> \[ \{ <div> foo </div> - <para> And nested without indentation: </para> <div> -<div> -<div> + <div> + <div> foo </div> -</div> -<div> + </div> + <div> bar </div> -</div> - + </div> <para> Interpreted markdown in a table: </para> <table> -<tr> -<td> + <tr> + <td> This is <emphasis>emphasized</emphasis> </td> -<td> + <td> And this is <emphasis role="strong">strong</emphasis> </td> -</tr> -</table> - -<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script> + </tr> + </table> + <script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script> <para> Here's a simple block: </para> <div> - + foo </div> - <para> This should be a code block, though: </para> @@ -923,31 +917,28 @@ These should not be escaped: \$ \\ \> \[ \{ Now, nested: </para> <div> - <div> - <div> - + <div> + <div> + foo </div> - </div> -</div> - + </div> + </div> <para> This should just be an HTML comment: </para> <!-- Comment --> - <para> Multiline: </para> <!-- -Blah -Blah ---> - -<!-- - This is another comment. ---> + Blah + Blah + --> + <!-- + This is another comment. + --> <para> Code block: </para> @@ -958,7 +949,6 @@ Blah Just plain comment, with trailing spaces on the line: </para> <!-- foo --> - <para> Code: </para> @@ -970,28 +960,27 @@ Blah </para> <hr> -<hr /> + <hr /> -<hr /> + <hr /> -<hr> + <hr> -<hr /> + <hr /> -<hr /> + <hr /> -<hr class="foo" id="bar" /> + <hr class="foo" id="bar" /> -<hr class="foo" id="bar" /> - -<hr class="foo" id="bar"> + <hr class="foo" id="bar" /> + <hr class="foo" id="bar"> </section> <section id="inline-markup"> <title>Inline Markup</title> <para> - This is <emphasis>emphasized</emphasis>, and so - <emphasis>is this</emphasis>. + This is <emphasis>emphasized</emphasis>, and so <emphasis>is + this</emphasis>. </para> <para> This is <emphasis role="strong">strong</emphasis>, and so @@ -1001,18 +990,18 @@ Blah An <emphasis><ulink url="/url">emphasized link</ulink></emphasis>. </para> <para> - <emphasis role="strong"><emphasis>This is strong and em.</emphasis></emphasis> + <emphasis role="strong"><emphasis>This is strong and + em.</emphasis></emphasis> </para> <para> - So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> - word. + So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word. </para> <para> - <emphasis role="strong"><emphasis>This is strong and em.</emphasis></emphasis> + <emphasis role="strong"><emphasis>This is strong and + em.</emphasis></emphasis> </para> <para> - So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> - word. + So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word. </para> <para> This is code: <literal>></literal>, <literal>$</literal>, @@ -1020,7 +1009,8 @@ Blah <literal><html></literal>. </para> <para> - <emphasis role="strikethrough">This is <emphasis>strikeout</emphasis>.</emphasis> + <emphasis role="strikethrough">This is + <emphasis>strikeout</emphasis>.</emphasis> </para> <para> Superscripts: a<superscript>bc</superscript>d @@ -1028,35 +1018,35 @@ Blah a<superscript>hello there</superscript>. </para> <para> - Subscripts: H<subscript>2</subscript>O, - H<subscript>23</subscript>O, H<subscript>many of them</subscript>O. + Subscripts: H<subscript>2</subscript>O, H<subscript>23</subscript>O, + H<subscript>many of them</subscript>O. </para> <para> - These should not be superscripts or subscripts, because of the - unescaped spaces: a^b c^d, a~b c~d. + These should not be superscripts or subscripts, because of the unescaped + spaces: a^b c^d, a~b c~d. </para> </section> <section id="smart-quotes-ellipses-dashes"> <title>Smart quotes, ellipses, dashes</title> <para> - <quote>Hello,</quote> said the spider. - <quote><quote>Shelob</quote> is my name.</quote> + <quote>Hello,</quote> said the spider. <quote><quote>Shelob</quote> is my + name.</quote> </para> <para> - <quote>A</quote>, <quote>B</quote>, and <quote>C</quote> are - letters. + <quote>A</quote>, <quote>B</quote>, and <quote>C</quote> are letters. </para> <para> - <quote>Oak,</quote> <quote>elm,</quote> and <quote>beech</quote> - are names of trees. So is <quote>pine.</quote> + <quote>Oak,</quote> <quote>elm,</quote> and <quote>beech</quote> are names + of trees. So is <quote>pine.</quote> </para> <para> - <quote>He said, <quote>I want to go.</quote></quote> Were you alive - in the 70's? + <quote>He said, <quote>I want to go.</quote></quote> Were you alive in the + 70's? </para> <para> Here is some quoted <quote><literal>code</literal></quote> and a - <quote><ulink url="http://example.com/?foo=1&bar=2">quoted link</ulink></quote>. + <quote><ulink url="http://example.com/?foo=1&bar=2">quoted + link</ulink></quote>. </para> <para> Some dashes: one—two — three—four — five. @@ -1135,8 +1125,8 @@ Blah </listitem> <listitem> <para> - Escaped <literal>$</literal>: $73 - <emphasis>this should be emphasized</emphasis> 23$. + Escaped <literal>$</literal>: $73 <emphasis>this should be + emphasized</emphasis> 23$. </para> </listitem> </itemizedlist> @@ -1316,8 +1306,8 @@ Blah <section id="with-ampersands"> <title>With ampersands</title> <para> - Here's a - <ulink url="http://example.com/?foo=1&bar=2">link with an ampersand in the URL</ulink>. + Here's a <ulink url="http://example.com/?foo=1&bar=2">link with an + ampersand in the URL</ulink>. </para> <para> Here's a link with an amersand in the link text: @@ -1327,8 +1317,8 @@ Blah Here's an <ulink url="/script?foo=1&bar=2">inline link</ulink>. </para> <para> - Here's an - <ulink url="/script?foo=1&bar=2">inline link in pointy braces</ulink>. + Here's an <ulink url="/script?foo=1&bar=2">inline link in pointy + braces</ulink>. </para> </section> <section id="autolinks"> @@ -1387,78 +1377,67 @@ or here: <http://example.com/> </mediaobject> </figure> <para> - Here is a movie - <inlinemediaobject> + Here is a movie <inlinemediaobject> <imageobject> <imagedata fileref="movie.jpg" /> </imageobject> - </inlinemediaobject> - icon. + </inlinemediaobject> icon. </para> </section> <section id="footnotes"> <title>Footnotes</title> <para> - Here is a footnote - reference,<footnote> - <para> - Here is the footnote. It can go anywhere after the footnote - reference. It need not be placed at the end of the document. - </para> - </footnote> - and - another.<footnote> - <para> - Here's the long note. This one contains multiple blocks. - </para> - <para> - Subsequent blocks are indented to show that they belong to the - footnote (as with list items). - </para> - <screen> + Here is a footnote reference,<footnote> + <para> + Here is the footnote. It can go anywhere after the footnote reference. + It need not be placed at the end of the document. + </para> + </footnote> and another.<footnote> + <para> + Here's the long note. This one contains multiple blocks. + </para> + <para> + Subsequent blocks are indented to show that they belong to the + footnote (as with list items). + </para> + <screen> { <code> } </screen> - <para> - If you want, you can indent every line, but you can also be lazy - and just indent the first line of each block. - </para> - </footnote> - This should <emphasis>not</emphasis> be a footnote reference, - because it contains a space.[^my note] Here is an inline - note.<footnote> - <para> - This is <emphasis>easier</emphasis> to type. Inline notes may - contain <ulink url="http://google.com">links</ulink> and - <literal>]</literal> verbatim characters, as well as [bracketed - text]. - </para> - </footnote> + <para> + If you want, you can indent every line, but you can also be lazy and + just indent the first line of each block. + </para> + </footnote> This should <emphasis>not</emphasis> be a footnote reference, + because it contains a space.[^my note] Here is an inline note.<footnote> + <para> + This is <emphasis>easier</emphasis> to type. Inline notes may contain + <ulink url="http://google.com">links</ulink> and <literal>]</literal> + verbatim characters, as well as [bracketed text]. + </para> + </footnote> </para> <blockquote> <para> - Notes can go in - quotes.<footnote> - <para> - In quote. - </para> - </footnote> + Notes can go in quotes.<footnote> + <para> + In quote. + </para> + </footnote> </para> </blockquote> <orderedlist numeration="arabic"> <listitem> <para> - And in list - items.<footnote> - <para> - In list. - </para> - </footnote> + And in list items.<footnote> + <para> + In list. + </para> + </footnote> </para> </listitem> </orderedlist> <para> - This paragraph should not be part of the note, as it is not - indented. + This paragraph should not be part of the note, as it is not indented. </para> </section> </article> |