aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs45
-rw-r--r--tests/html-reader.html9
-rw-r--r--tests/html-reader.native8
3 files changed, 49 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c7832fbf0..969fabb3a 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -83,6 +83,18 @@ inlinesTilEnd tag = try (do
inlines <- manyTill inline (htmlEndTag tag)
return inlines)
+-- | Parse blocks between open and close tag.
+blocksIn tag = try $ do
+ htmlTag tag
+ spaces
+ blocksTilEnd tag
+
+-- | Parse inlines between open and close tag.
+inlinesIn tag = try $ do
+ htmlTag tag
+ spaces
+ inlinesTilEnd tag
+
-- | Extract type from a tag: e.g. @br@ from @\<br\>@
extractTagType :: String -> String
extractTagType ('<':rest) =
@@ -339,27 +351,34 @@ blockQuote = try (do
-- list blocks
--
-list = choice [ bulletList, orderedList ] <?> "list"
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-orderedList = try (do
- tag <- htmlTag "ol"
+orderedList = try $ do
+ htmlTag "ol"
spaces
- items <- sepEndBy1 listItem spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
htmlEndTag "ol"
- return (OrderedList items))
+ return (OrderedList items)
-bulletList = try (do
- tag <- htmlTag "ul"
+bulletList = try $ do
+ htmlTag "ul"
spaces
- items <- sepEndBy1 listItem spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
htmlEndTag "ul"
- return (BulletList items))
+ return (BulletList items)
-listItem = try (do
- tag <- htmlTag "li"
+definitionList = try $ do
+ tag <- htmlTag "dl"
spaces
- blocks <- blocksTilEnd "li"
- return blocks)
+ items <- sepEndBy1 definitionListItem spaces
+ htmlEndTag "dl"
+ return (DefinitionList items)
+
+definitionListItem = try $ do
+ terms <- sepEndBy1 (inlinesIn "dt") spaces
+ defs <- sepEndBy1 (blocksIn "dd") spaces
+ let term = joinWithSep [LineBreak] terms
+ return (term, concat defs)
--
-- paragraph block
diff --git a/tests/html-reader.html b/tests/html-reader.html
index b89a93299..2c00f48b4 100644
--- a/tests/html-reader.html
+++ b/tests/html-reader.html
@@ -232,6 +232,15 @@ These should not be escaped: \$ \\ \> \[ \{
</ul>
</li>
</ul>
+<h2>Definition</h2>
+<dl>
+ <dt>Violin</dt>
+ <dd>Stringed musical instrument.</dd>
+ <dd>Torture device.</dd>
+ <dt>Cello</dt>
+ <dt>Violoncello</dt>
+ <dd>Low-voiced stringed instrument.</dd>
+</dl>
<hr />
<h1>HTML Blocks</h1>
<p>Simple block on one line:</p>
diff --git a/tests/html-reader.native b/tests/html-reader.native
index f7de8df4a..a432bda7e 100644
--- a/tests/html-reader.native
+++ b/tests/html-reader.native
@@ -150,6 +150,14 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] [] "")
, BulletList
[ [ Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"] ]
, [ Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"] ] ] ] ]
+, Header 2 [Str "Definition"]
+, DefinitionList
+ [ ([Str "Violin"],
+ [ Plain [Str "Stringed",Space,Str "musical",Space,Str "instrument."]
+ , Plain [Str "Torture",Space,Str "device."] ] ),
+ ([Str "Cello",LineBreak,Str "Violoncello"],
+ [ Plain [Str "Low-voiced",Space,Str "stringed",Space,Str "instrument."] ]
+ ) ]
, HorizontalRule
, Header 1 [Str "HTML",Space,Str "Blocks"]
, Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"]