From 7d9b782f73edfc49fbe6f0c3d6ce61328811cbc7 Mon Sep 17 00:00:00 2001 From: Mauro Bieg Date: Sat, 22 Jul 2017 19:22:56 +0200 Subject: HTML Reader: parse figure and figcaption (#3813) --- src/Text/Pandoc/Readers/HTML.hs | 20 ++++++++++++++++++ test/command/html-read-figure.md | 45 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 test/command/html-read-figure.md diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 734973e33..3a0d6eb14 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -188,6 +188,7 @@ block = do , pBody , pDiv , pPlain + , pFigure , pRawHtmlBlock ] trace (take 60 $ show $ B.toList res) @@ -553,6 +554,25 @@ pPara = do contents <- trimInlines <$> pInTags "p" inline return $ B.para contents +pFigure :: PandocMonad m => TagParser m Blocks +pFigure = do + TagOpen _ _ <- pSatisfy (matchTagOpen "figure" []) + skipMany pBlank + let pImg = pOptInTag "p" pImage <* skipMany pBlank + pCapt = option mempty $ pInTags "figcaption" inline <* skipMany pBlank + pImgCapt = do + img <- pImg + cap <- pCapt + return (img, cap) + pCaptImg = do + cap <- pCapt + img <- pImg + return (img, cap) + (imgMany, caption) <- pImgCapt <|> pCaptImg + TagClose _ <- pSatisfy (matchTagClose "figure") + let (Image attr _ (url, tit)):_ = B.toList imgMany + return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption + pCodeBlock :: PandocMonad m => TagParser m Blocks pCodeBlock = try $ do TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) diff --git a/test/command/html-read-figure.md b/test/command/html-read-figure.md new file mode 100644 index 000000000..9c604c706 --- /dev/null +++ b/test/command/html-read-figure.md @@ -0,0 +1,45 @@ +``` +% pandoc -f html -t native +
+ +
bar
+
+^D +[Para [Image ("",[],[]) [Str "bar"] ("foo.png","fig:voyage")]] +``` + +``` +% pandoc -f html -t native +
+
bar
+ +
+^D +[Para [Image ("",[],[]) [Str "bar"] ("foo.png","fig:voyage")]] +``` + +``` +% pandoc -f html -t native +
+ +
+^D +[Para [Image ("",[],[]) [] ("foo.png","fig:voyage")]] +``` + +``` +% pandoc -f html -t native +
+

+
bar
+
+^D +[Para [Image ("",[],[]) [Str "bar"] ("foo.png","fig:voyage")]] +``` + +``` +% pandoc -f html -t native +
this is ignored
bar baz
+^D +[Para [Image ("",[],[]) [Str "bar",Space,Strong [Str "baz"]] ("foo.png","fig:voyage")]] +``` -- cgit v1.2.3