From 82e363a727456deb3f32d1683defee0fd8bbd32c Mon Sep 17 00:00:00 2001 From: mb21 Date: Sun, 21 Jun 2015 16:24:17 +0200 Subject: DocBook reader mediaobjects and figures, closes #2184 --- src/Text/Pandoc/Readers/DocBook.hs | 31 ++++++++++++++++++++----------- tests/docbook-reader.docbook | 22 ++++++++++++++++++++-- tests/docbook-reader.native | 5 +++-- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 98a142840..3cc2a4479 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -167,7 +167,9 @@ List of all DocBook tags, with [x] indicating implemented, [x] glossseealso - A cross-reference from one GlossEntry to another [x] glossterm - A glossary term [ ] graphic - A displayed graphical object (not an inline) + Note: in DocBook v5 `graphic` is discarded [ ] graphicco - A graphic that contains callout areas + Note: in DocBook v5 `graphicco` is discarded [ ] group - A group of elements in a CmdSynopsis [ ] guibutton - The text on a button in a GUI [ ] guiicon - Graphic and/or text appearing as a icon in a GUI @@ -180,8 +182,9 @@ List of all DocBook tags, with [x] indicating implemented, [ ] holder - The name of the individual or organization that holds a copyright [o] honorific - The title of a person [ ] html:form - An HTML form -[ ] imagedata - Pointer to external image data -[ ] imageobject - A wrapper for image data and its associated meta-information +[x] imagedata - Pointer to external image data (only `fileref` attribute + implemented but not `entityref` which would require parsing of the DTD) +[x] imageobject - A wrapper for image data and its associated meta-information [ ] imageobjectco - A wrapper for an image object with callouts [x] important - An admonition set off from the text [x] index - An index @@ -627,18 +630,24 @@ addToStart toadd bs = -- function that is used by both mediaobject (in parseBlock) -- and inlinemediaobject (in parseInline) -getImage :: Element -> DB Inlines -getImage e = do +-- A DocBook mediaobject is a wrapper around a set of alternative presentations +getMediaobject :: Element -> DB Inlines +getMediaobject e = do imageUrl <- case filterChild (named "imageobject") e of Nothing -> return mempty Just z -> case filterChild (named "imagedata") z of Nothing -> return mempty Just i -> return $ attrValue "fileref" i - caption <- case filterChild - (\x -> named "caption" x || named "textobject" x) e of - Nothing -> gets dbFigureTitle - Just z -> mconcat <$> (mapM parseInline $ elContent z) - return $ image imageUrl "" caption + let getCaption el = case filterChild (\x -> named "caption" x + || named "textobject" x + || named "alt" x) el of + Nothing -> return mempty + Just z -> mconcat <$> (mapM parseInline $ elContent z) + figTitle <- gets dbFigureTitle + let (caption, title) = if isNull figTitle + then (getCaption e, "") + else (return figTitle, "fig:") + liftM (image imageUrl title) caption getBlocks :: Element -> DB Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -734,7 +743,7 @@ parseBlock (Elem e) = <$> listitems "variablelist" -> definitionList <$> deflistitems "figure" -> getFigure e - "mediaobject" -> para <$> getImage e + "mediaobject" -> para <$> getMediaobject e "caption" -> return mempty "info" -> metaBlock "articleinfo" -> metaBlock @@ -902,7 +911,7 @@ parseInline (Elem e) = "inlineequation" -> equation math "subscript" -> subscript <$> innerInlines "superscript" -> superscript <$> innerInlines - "inlinemediaobject" -> getImage e + "inlinemediaobject" -> getMediaobject e "quote" -> do qt <- gets dbQuoteType let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote diff --git a/tests/docbook-reader.docbook b/tests/docbook-reader.docbook index cf5059646..3a4fc77c6 100644 --- a/tests/docbook-reader.docbook +++ b/tests/docbook-reader.docbook @@ -1033,12 +1033,12 @@ or here: <http://example.com/> From Voyage dans la Lune by Georges Melies (1902):
- lalune + lalune fig caption - lalune + lalune alt text shadowed by fig caption
@@ -1047,7 +1047,25 @@ or here: <http://example.com/> icon. + And here a second movie + alt text + + + + icon. + And here a third movie + alt text + + + + icon. + + + + + lalune no figure alt text + Footnotes diff --git a/tests/docbook-reader.native b/tests/docbook-reader.native index 353a352a2..b87e28d1c 100644 --- a/tests/docbook-reader.native +++ b/tests/docbook-reader.native @@ -270,8 +270,9 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,CodeBlock ("",[],[]) "or here: " ,Header 1 ("images",[],[]) [Str "Images"] ,Para [Str "From",Space,Quoted DoubleQuote [Str "Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune"],Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902):"] -,Para [Image [Str "lalune"] ("lalune.jpg","")] -,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [] ("movie.jpg",""),Space,Str "icon."] +,Para [Image [Str "lalune",Space,Str "fig",Space,Str "caption"] ("lalune.jpg","fig:")] +,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [] ("movie.jpg",""),Space,Str "icon.",Space,Str "And",Space,Str "here",Space,Str "a",Space,Str "second",Space,Str "movie",Space,Image [Str "alt",Space,Str "text"] ("movie.jpg",""),Space,Str "icon.",Space,Str "And",Space,Str "here",Space,Str "a",Space,Str "third",Space,Str "movie",Space,Image [Str "alt",Space,Str "text"] ("movie.jpg",""),Space,Str "icon."] +,Para [Image [Str "lalune",Space,Str "no",Space,Str "figure",Space,Str "alt",Space,Str "text"] ("lalune.jpg","")] ,Header 1 ("footnotes",[],[]) [Str "Footnotes"] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Note [Para [Str "Here",Space,Str "is",Space,Str "the",Space,Str "footnote.",Space,Str "It",Space,Str "can",Space,Str "go",Space,Str "anywhere",Space,Str "after",Space,Str "the",Space,Str "footnote",Space,Str "reference.",Space,Str "It",Space,Str "need",Space,Str "not",Space,Str "be",Space,Str "placed",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]],Space,Str "and",Space,Str "another.",Note [Para [Str "Here\8217s",Space,Str "the",Space,Str "long",Space,Str "note.",Space,Str "This",Space,Str "one",Space,Str "contains",Space,Str "multiple",Space,Str "blocks."],Para [Str "Subsequent",Space,Str "blocks",Space,Str "are",Space,Str "indented",Space,Str "to",Space,Str "show",Space,Str "that",Space,Str "they",Space,Str "belong",Space,Str "to",Space,Str "the",Space,Str "footnote",Space,Str "(as",Space,Str "with",Space,Str "list",Space,Str "items)."],CodeBlock ("",[],[]) " { }",Para [Str "If",Space,Str "you",Space,Str "want,",Space,Str "you",Space,Str "can",Space,Str "indent",Space,Str "every",Space,Str "line,",Space,Str "but",Space,Str "you",Space,Str "can",Space,Str "also",Space,Str "be",Space,Str "lazy",Space,Str "and",Space,Str "just",Space,Str "indent",Space,Str "the",Space,Str "first",Space,Str "line",Space,Str "of",Space,Str "each",Space,Str "block."]],Space,Str "This",Space,Str "should",Space,Emph [Str "not"],Space,Str "be",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Space,Str "because",Space,Str "it",Space,Str "contains",Space,Str "a",Space,Str "space.[^my",Space,Str "note]",Space,Str "Here",Space,Str "is",Space,Str "an",Space,Str "inline",Space,Str "note.",Note [Para [Str "This",Space,Str "is",Space,Emph [Str "easier"],Space,Str "to",Space,Str "type.",Space,Str "Inline",Space,Str "notes",Space,Str "may",Space,Str "contain",Space,Link [Str "links"] ("http://google.com",""),Space,Str "and",Space,Code ("",[],[]) "]",Space,Str "verbatim",Space,Str "characters,",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str "[bracketed",Space,Str "text]."]]] ,BlockQuote -- cgit v1.2.3