From a02f276ff125eb9bede33524371cf8c7b660bb40 Mon Sep 17 00:00:00 2001 From: Hubert Plociniczak Date: Mon, 17 Oct 2016 16:35:13 +0200 Subject: Infer caption from the text following the img Frame can contain other frames with the text boxes. This is something that has not been considered before and meant that the whole construction of images was broken in those cases. Also the captions were fixed/ignored. --- src/Text/Pandoc/Readers/Odt/ContentReader.hs | 67 +++++++++++++++++++--------- tests/Tests/Readers/Odt.hs | 6 ++- tests/odt/native/image.native | 1 + tests/odt/native/imageIndex.native | 1 + tests/odt/native/imageWithCaption.native | 1 + 5 files changed, 54 insertions(+), 22 deletions(-) create mode 100644 tests/odt/native/image.native create mode 100644 tests/odt/native/imageIndex.native create mode 100644 tests/odt/native/imageWithCaption.native diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index d61707976..166fce681 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -543,6 +543,10 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover extractText (XML.Text cData) = succeedWith (XML.cdData cData) extractText _ = failEmpty +read_text_seq :: InlineMatcher +read_text_seq = matchingElement NsText "sequence" + $ matchChildContent [] read_plain_text + -- specifically. I honor that, although the current implementation of '(<>)' -- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein. @@ -591,7 +595,8 @@ read_paragraph = matchingElement NsText "p" , read_reference_start , read_bookmark_ref , read_reference_ref - , read_frame + , read_maybe_nested_img_frame + , read_text_seq ] read_plain_text @@ -616,7 +621,7 @@ read_header = matchingElement NsText "h" , read_reference_start , read_bookmark_ref , read_reference_ref - , read_frame + , read_maybe_nested_img_frame ] read_plain_text ) -< blocks anchor <- getHeaderAnchor -< children @@ -726,18 +731,26 @@ read_table_cell = matchingElement NsTable "table-cell" ---------------------- -- -read_frame :: InlineMatcher -read_frame = matchingElement NsDraw "frame" - $ proc blocks -> do - w <- ( findAttr' NsSVG "width" ) -< () - h <- ( findAttr' NsSVG "height" ) -< () - attr <- arr (uncurry image_attributes) -< (w, h) - titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks - title <- arr inlineListToIdentifier -< (toList titleNodes) - src <- matchChildContent' [ read_image_src ] -< blocks - resource <- lookupResource -< src - _ <- updateMediaWithResource -< resource - arr (uncurry4 imageWith ) -< (attr, src, title, mempty) +read_maybe_nested_img_frame :: InlineMatcher +read_maybe_nested_img_frame = matchingElement NsDraw "frame" + $ proc blocks -> do + img <- (findChild' NsDraw "image") -< () + case img of + Just _ -> read_frame -< blocks + Nothing -> matchChildContent' [ read_frame_text_box ] -< blocks + +read_frame :: OdtReaderSafe Inlines Inlines +read_frame = + proc blocks -> do + w <- ( findAttr' NsSVG "width" ) -< () + h <- ( findAttr' NsSVG "height" ) -< () + titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks + src <- matchChildContent' [ read_image_src ] -< blocks + resource <- lookupResource -< src + _ <- updateMediaWithResource -< resource + alt <- (matchChildContent [] read_plain_text) -< blocks + arr (uncurry4 imageWith ) -< + (image_attributes w h, src, inlineListToIdentifier (toList titleNodes), alt) image_attributes :: Maybe String -> Maybe String -> Attr image_attributes x y = @@ -749,17 +762,31 @@ image_attributes x y = read_image_src :: (Namespace, ElementName, OdtReader Anchor Anchor) read_image_src = matchingElement NsDraw "image" - $ proc _ -> do - imgSrc <- findAttr NsXLink "href" -< () - case imgSrc of - Right src -> returnV src -<< () - Left _ -> returnV "" -< () + $ proc _ -> do + imgSrc <- findAttr NsXLink "href" -< () + case imgSrc of + Right src -> returnV src -<< () + Left _ -> returnV "" -< () read_frame_title :: InlineMatcher read_frame_title = matchingElement NsSVG "title" $ (matchChildContent [] read_plain_text) - +read_frame_text_box :: InlineMatcher +read_frame_text_box = matchingElement NsDraw "text-box" + $ proc blocks -> do + paragraphs <- (matchChildContent' [ read_paragraph ]) -< blocks + case toList paragraphs of + (p : []) -> -- require only a single paragraph + arr read_img_with_caption -< p + _ -> + arr fromList -< [] + +read_img_with_caption :: Block -> Inlines +read_img_with_caption (Para ((Image attr _ target) : txt)) = + singleton (Image attr txt target) -- override caption with the text that follows +read_img_with_caption _ = + fromList [] ---------------------- -- Internal links diff --git a/tests/Tests/Readers/Odt.hs b/tests/Tests/Readers/Odt.hs index cf30b8398..74796d899 100644 --- a/tests/Tests/Readers/Odt.hs +++ b/tests/Tests/Readers/Odt.hs @@ -141,7 +141,6 @@ namesOfTestsComparingToMarkdown = [ "bold" , "footnote" , "headers" -- , "horizontalRule" --- , "image" , "italic" -- , "listBlocks" , "paragraph" @@ -152,6 +151,9 @@ namesOfTestsComparingToMarkdown = [ "bold" namesOfTestsComparingToNative :: [ String ] namesOfTestsComparingToNative = [ "blockquote" + , "image" + , "imageIndex" + , "imageWithCaption" , "orderedListMixed" , "orderedListRoman" , "orderedListSimple" @@ -162,4 +164,4 @@ namesOfTestsComparingToNative = [ "blockquote" -- , "table" , "unicode" , "unorderedList" - ] \ No newline at end of file + ] diff --git a/tests/odt/native/image.native b/tests/odt/native/image.native new file mode 100644 index 000000000..667442539 --- /dev/null +++ b/tests/odt/native/image.native @@ -0,0 +1 @@ +[Para [Image ("",[],[("width","5.292cm"),("height","5.292cm")]) [] ("Pictures/10000000000000FA000000FAD6A15225.jpg","")]] diff --git a/tests/odt/native/imageIndex.native b/tests/odt/native/imageIndex.native new file mode 100644 index 000000000..75c6e4135 --- /dev/null +++ b/tests/odt/native/imageIndex.native @@ -0,0 +1 @@ +[Para [Image ("",[],[("width","5.292cm"),("height","5.292cm")]) [Str "Abbildung",Space,Str "1:",Space,Str "Image",Space,Str "caption"] ("Pictures/10000000000000FA000000FAD6A15225.jpg","")]] diff --git a/tests/odt/native/imageWithCaption.native b/tests/odt/native/imageWithCaption.native new file mode 100644 index 000000000..75c6e4135 --- /dev/null +++ b/tests/odt/native/imageWithCaption.native @@ -0,0 +1 @@ +[Para [Image ("",[],[("width","5.292cm"),("height","5.292cm")]) [Str "Abbildung",Space,Str "1:",Space,Str "Image",Space,Str "caption"] ("Pictures/10000000000000FA000000FAD6A15225.jpg","")]] -- cgit v1.2.3