diff options
| -rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 41 | ||||
| -rw-r--r-- | tests/writer.docbook | 233 | 
2 files changed, 126 insertions, 148 deletions
| diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index d8b85c1a2..a53c3fb86 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -35,13 +35,13 @@ import Text.Pandoc.Templates (renderTemplate)  import Text.Pandoc.Readers.TeXMath  import Data.List ( isPrefixOf, intercalate )  import Data.Char ( toLower ) -import Text.PrettyPrint.HughesPJ hiding ( Str )  import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.Pretty  -- | Convert list of authors to a docbook <author> section  authorToDocbook :: WriterOptions -> [Inline] -> Doc  authorToDocbook opts name' = -  let name = render $ inlinesToDocbook opts name' +  let name = render Nothing $ inlinesToDocbook opts name'    in  if ',' `elem` name           then -- last name first                let (lastname, rest) = break (==',') name  @@ -61,16 +61,20 @@ authorToDocbook opts name' =  -- | Convert Pandoc document to string in Docbook format.  writeDocbook :: WriterOptions -> Pandoc -> String  writeDocbook opts (Pandoc (Meta tit auths dat) blocks) =  -  let title = wrap opts tit +  let title = inlinesToDocbook opts tit        authors = map (authorToDocbook opts) auths        date = inlinesToDocbook opts dat        elements = hierarchicalize blocks -      main     = render $ vcat (map (elementToDocbook opts) elements) +      colwidth = if writerWrapText opts +                    then Just $ writerColumns opts +                    else Nothing +      render' = render colwidth +      main     = render' $ vcat (map (elementToDocbook opts) elements)        context = writerVariables opts ++                  [ ("body", main) -                , ("title", render title) -                , ("date", render date) ] ++ -                [ ("author", render a) | a <- authors ] +                , ("title", render' title) +                , ("date", render' date) ] ++ +                [ ("author", render' a) | a <- authors ]    in  if writerStandalone opts           then renderTemplate context $ writerTemplate opts           else main @@ -84,7 +88,7 @@ elementToDocbook opts (Sec _ _num id' title elements) =                      then [Blk (Para [])]                      else elements    in  inTags True "section" [("id",id')] $ -      inTagsSimple "title" (wrap opts title) $$ +      inTagsSimple "title" (inlinesToDocbook opts title) $$        vcat (map (elementToDocbook opts) elements')   -- | Convert a list of Pandoc blocks to Docbook. @@ -123,7 +127,7 @@ listItemToDocbook opts item =  blockToDocbook :: WriterOptions -> Block -> Doc  blockToDocbook _ Null = empty  blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize -blockToDocbook opts (Plain lst) = wrap opts lst +blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst  blockToDocbook opts (Para [Image txt (src,_)]) =    let capt = inlinesToDocbook opts txt    in  inTagsIndented "figure" $ @@ -132,12 +136,13 @@ blockToDocbook opts (Para [Image txt (src,_)]) =             (inTagsIndented "imageobject"               (selfClosingTag "imagedata" [("fileref",src)])) $$             inTagsSimple "textobject" (inTagsSimple "phrase" capt)) -blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst +blockToDocbook opts (Para lst) = +  inTagsIndented "para" $ inlinesToDocbook opts lst  blockToDocbook opts (BlockQuote blocks) =    inTagsIndented "blockquote" $ blocksToDocbook opts blocks  blockToDocbook _ (CodeBlock (_,classes,_) str) =  -  text ("<screen" ++ lang ++ ">\n") <> -     text (escapeStringForXML str) <> text "\n</screen>" +  text ("<screen" ++ lang ++ ">") <> cr <> +     flush (text (escapeStringForXML str) <> cr <> text "</screen>")      where lang  = if null langs                       then ""                       else " language=\"" ++ escapeStringForXML (head langs) ++ @@ -214,12 +219,6 @@ tableItemToDocbook opts tag align item =    let attrib = [("align", align)]    in  inTags True tag attrib $ vcat $ map (blockToDocbook opts) item --- | Take list of inline elements and return wrapped doc. -wrap :: WriterOptions -> [Inline] -> Doc -wrap opts lst = if writerWrapText opts -                   then fsep $ map (inlinesToDocbook opts) (splitBy (== Space) lst) -                   else inlinesToDocbook opts lst -  -- | Convert a list of inline elements to Docbook.  inlinesToDocbook :: WriterOptions -> [Inline] -> Doc  inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst @@ -254,8 +253,8 @@ inlineToDocbook _ (Code str) =  inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str  inlineToDocbook _ (TeX _) = empty  inlineToDocbook _ (HtmlInline _) = empty -inlineToDocbook _ LineBreak = text $ "<literallayout></literallayout>"  -inlineToDocbook _ Space = char ' ' +inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty +inlineToDocbook _ Space = space  inlineToDocbook opts (Link txt (src, _)) =    if isPrefixOf "mailto:" src       then let src' = drop 7 src @@ -275,6 +274,6 @@ inlineToDocbook _ (Image _ (src, tit)) =                     else inTagsIndented "objectinfo" $                          inTagsIndented "title" (text $ escapeStringForXML tit)    in  inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ -      titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]  +      titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]  inlineToDocbook opts (Note contents) =     inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/tests/writer.docbook b/tests/writer.docbook index c17513cf9..15704f8bf 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -15,14 +15,13 @@      <date>July 17, 2006</date>    </articleinfo>  <para> -  This is a set of tests for pandoc. Most of them are adapted from -  John Gruber's markdown test suite. +  This is a set of tests for pandoc. Most of them are adapted from John +  Gruber's markdown test suite.  </para>  <section id="headers">    <title>Headers</title>    <section id="level-2-with-an-embedded-link"> -    <title>Level 2 with an -           <ulink url="/url">embedded link</ulink></title> +    <title>Level 2 with an <ulink url="/url">embedded link</ulink></title>      <section id="level-3-with-emphasis">        <title>Level 3 with <emphasis>emphasis</emphasis></title>        <section id="level-4"> @@ -60,16 +59,15 @@      Here's a regular paragraph.    </para>    <para> -    In Markdown 1.0.0 and earlier. Version 8. This line turns into a -    list item. Because a hard-wrapped line in the middle of a paragraph -    looked like a list item. +    In Markdown 1.0.0 and earlier. Version 8. This line turns into a list +    item. Because a hard-wrapped line in the middle of a paragraph looked like +    a list item.    </para>    <para>      Here's one with a bullet. * criminey.    </para>    <para> -    There should be a hard line -    break<literallayout></literallayout>here. +    There should be a hard line break<literallayout></literallayout>here.    </para>  </section>  <section id="block-quotes"> @@ -866,45 +864,41 @@ These should not be escaped:  \$ \\ \> \[ \{    <div>    foo    </div> -    <para>      And nested without indentation:    </para>    <div> -<div> -<div> +  <div> +  <div>    foo    </div> -</div> -<div> +  </div> +  <div>    bar    </div> -</div> - +  </div>    <para>      Interpreted markdown in a table:    </para>    <table> -<tr> -<td> +  <tr> +  <td>    This is <emphasis>emphasized</emphasis>    </td> -<td> +  <td>    And this is <emphasis role="strong">strong</emphasis>    </td> -</tr> -</table> - -<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script> +  </tr> +  </table> +  <script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>    <para>      Here's a simple block:    </para>    <div> -     +          foo    </div> -    <para>      This should be a code block, though:    </para> @@ -923,31 +917,28 @@ These should not be escaped:  \$ \\ \> \[ \{      Now, nested:    </para>    <div> -    <div> -        <div> -             +      <div> +          <div> +                  foo    </div> -    </div> -</div> - +      </div> +  </div>    <para>      This should just be an HTML comment:    </para>    <!-- Comment --> -    <para>      Multiline:    </para>    <!-- -Blah -Blah ---> - -<!-- -    This is another comment. ---> +  Blah +  Blah +  --> +  <!-- +      This is another comment. +  -->    <para>      Code block:    </para> @@ -958,7 +949,6 @@ Blah      Just plain comment, with trailing spaces on the line:    </para>    <!-- foo -->    -    <para>      Code:    </para> @@ -970,28 +960,27 @@ Blah    </para>    <hr> -<hr /> +  <hr /> -<hr /> +  <hr /> -<hr>    +  <hr>    -<hr />   +  <hr />   -<hr />  +  <hr />  -<hr class="foo" id="bar" /> +  <hr class="foo" id="bar" /> -<hr class="foo" id="bar" /> - -<hr class="foo" id="bar"> +  <hr class="foo" id="bar" /> +  <hr class="foo" id="bar">  </section>  <section id="inline-markup">    <title>Inline Markup</title>    <para> -    This is <emphasis>emphasized</emphasis>, and so -    <emphasis>is this</emphasis>. +    This is <emphasis>emphasized</emphasis>, and so <emphasis>is +    this</emphasis>.    </para>    <para>      This is <emphasis role="strong">strong</emphasis>, and so @@ -1001,18 +990,18 @@ Blah      An <emphasis><ulink url="/url">emphasized link</ulink></emphasis>.    </para>    <para> -    <emphasis role="strong"><emphasis>This is strong and em.</emphasis></emphasis> +    <emphasis role="strong"><emphasis>This is strong and +    em.</emphasis></emphasis>    </para>    <para> -    So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> -    word. +    So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word.    </para>    <para> -    <emphasis role="strong"><emphasis>This is strong and em.</emphasis></emphasis> +    <emphasis role="strong"><emphasis>This is strong and +    em.</emphasis></emphasis>    </para>    <para> -    So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> -    word. +    So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word.    </para>    <para>      This is code: <literal>></literal>, <literal>$</literal>, @@ -1020,7 +1009,8 @@ Blah      <literal><html></literal>.    </para>    <para> -    <emphasis role="strikethrough">This is <emphasis>strikeout</emphasis>.</emphasis> +    <emphasis role="strikethrough">This is +    <emphasis>strikeout</emphasis>.</emphasis>    </para>    <para>      Superscripts: a<superscript>bc</superscript>d @@ -1028,35 +1018,35 @@ Blah      a<superscript>hello there</superscript>.    </para>    <para> -    Subscripts: H<subscript>2</subscript>O, -    H<subscript>23</subscript>O, H<subscript>many of them</subscript>O. +    Subscripts: H<subscript>2</subscript>O, H<subscript>23</subscript>O, +    H<subscript>many of them</subscript>O.    </para>    <para> -    These should not be superscripts or subscripts, because of the -    unescaped spaces: a^b c^d, a~b c~d. +    These should not be superscripts or subscripts, because of the unescaped +    spaces: a^b c^d, a~b c~d.    </para>  </section>  <section id="smart-quotes-ellipses-dashes">    <title>Smart quotes, ellipses, dashes</title>    <para> -    <quote>Hello,</quote> said the spider. -    <quote><quote>Shelob</quote> is my name.</quote> +    <quote>Hello,</quote> said the spider. <quote><quote>Shelob</quote> is my +    name.</quote>    </para>    <para> -    <quote>A</quote>, <quote>B</quote>, and <quote>C</quote> are -    letters. +    <quote>A</quote>, <quote>B</quote>, and <quote>C</quote> are letters.    </para>    <para> -    <quote>Oak,</quote> <quote>elm,</quote> and <quote>beech</quote> -    are names of trees. So is <quote>pine.</quote> +    <quote>Oak,</quote> <quote>elm,</quote> and <quote>beech</quote> are names +    of trees. So is <quote>pine.</quote>    </para>    <para> -    <quote>He said, <quote>I want to go.</quote></quote> Were you alive -    in the 70's? +    <quote>He said, <quote>I want to go.</quote></quote> Were you alive in the +    70's?    </para>    <para>      Here is some quoted <quote><literal>code</literal></quote> and a -    <quote><ulink url="http://example.com/?foo=1&bar=2">quoted link</ulink></quote>. +    <quote><ulink url="http://example.com/?foo=1&bar=2">quoted +    link</ulink></quote>.    </para>    <para>      Some dashes: one—two — three—four — five. @@ -1135,8 +1125,8 @@ Blah      </listitem>      <listitem>        <para> -        Escaped <literal>$</literal>: $73 -        <emphasis>this should be emphasized</emphasis> 23$. +        Escaped <literal>$</literal>: $73 <emphasis>this should be +        emphasized</emphasis> 23$.        </para>      </listitem>    </itemizedlist> @@ -1316,8 +1306,8 @@ Blah    <section id="with-ampersands">      <title>With ampersands</title>      <para> -      Here's a -      <ulink url="http://example.com/?foo=1&bar=2">link with an ampersand in the URL</ulink>. +      Here's a <ulink url="http://example.com/?foo=1&bar=2">link with an +      ampersand in the URL</ulink>.      </para>      <para>        Here's a link with an amersand in the link text: @@ -1327,8 +1317,8 @@ Blah        Here's an <ulink url="/script?foo=1&bar=2">inline link</ulink>.      </para>      <para> -      Here's an -      <ulink url="/script?foo=1&bar=2">inline link in pointy braces</ulink>. +      Here's an <ulink url="/script?foo=1&bar=2">inline link in pointy +      braces</ulink>.      </para>    </section>    <section id="autolinks"> @@ -1387,78 +1377,67 @@ or here: <http://example.com/>      </mediaobject>    </figure>    <para> -    Here is a movie -    <inlinemediaobject> +    Here is a movie <inlinemediaobject>        <imageobject>          <imagedata fileref="movie.jpg" />        </imageobject> -    </inlinemediaobject> -    icon. +    </inlinemediaobject> icon.    </para>  </section>  <section id="footnotes">    <title>Footnotes</title>    <para> -    Here is a footnote -    reference,<footnote> -                <para> -                  Here is the footnote. It can go anywhere after the footnote -                  reference. It need not be placed at the end of the document. -                </para> -              </footnote> -    and -    another.<footnote> -              <para> -                Here's the long note. This one contains multiple blocks. -              </para> -              <para> -                Subsequent blocks are indented to show that they belong to the -                footnote (as with list items). -              </para> -              <screen> +    Here is a footnote reference,<footnote> +      <para> +        Here is the footnote. It can go anywhere after the footnote reference. +        It need not be placed at the end of the document. +      </para> +    </footnote> and another.<footnote> +      <para> +        Here's the long note. This one contains multiple blocks. +      </para> +      <para> +        Subsequent blocks are indented to show that they belong to the +        footnote (as with list items). +      </para> +      <screen>    { <code> }  </screen> -              <para> -                If you want, you can indent every line, but you can also be lazy -                and just indent the first line of each block. -              </para> -            </footnote> -    This should <emphasis>not</emphasis> be a footnote reference, -    because it contains a space.[^my note] Here is an inline -    note.<footnote> -           <para> -             This is <emphasis>easier</emphasis> to type. Inline notes may -             contain <ulink url="http://google.com">links</ulink> and -             <literal>]</literal> verbatim characters, as well as [bracketed -             text]. -           </para> -         </footnote> +      <para> +        If you want, you can indent every line, but you can also be lazy and +        just indent the first line of each block. +      </para> +    </footnote> This should <emphasis>not</emphasis> be a footnote reference, +    because it contains a space.[^my note] Here is an inline note.<footnote> +      <para> +        This is <emphasis>easier</emphasis> to type. Inline notes may contain +        <ulink url="http://google.com">links</ulink> and <literal>]</literal> +        verbatim characters, as well as [bracketed text]. +      </para> +    </footnote>    </para>    <blockquote>      <para> -      Notes can go in -      quotes.<footnote> -               <para> -                 In quote. -               </para> -             </footnote> +      Notes can go in quotes.<footnote> +        <para> +          In quote. +        </para> +      </footnote>      </para>    </blockquote>    <orderedlist numeration="arabic">      <listitem>        <para> -        And in list -        items.<footnote> -                <para> -                  In list. -                </para> -              </footnote> +        And in list items.<footnote> +          <para> +            In list. +          </para> +        </footnote>        </para>      </listitem>    </orderedlist>    <para> -    This paragraph should not be part of the note, as it is not -    indented. +    This paragraph should not be part of the note, as it is not indented.    </para>  </section>  </article> | 
