diff options
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 45 | ||||
-rw-r--r-- | tests/html-reader.html | 9 | ||||
-rw-r--r-- | tests/html-reader.native | 8 |
3 files changed, 49 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index c7832fbf0..969fabb3a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -83,6 +83,18 @@ inlinesTilEnd tag = try (do inlines <- manyTill inline (htmlEndTag tag) return inlines) +-- | Parse blocks between open and close tag. +blocksIn tag = try $ do + htmlTag tag + spaces + blocksTilEnd tag + +-- | Parse inlines between open and close tag. +inlinesIn tag = try $ do + htmlTag tag + spaces + inlinesTilEnd tag + -- | Extract type from a tag: e.g. @br@ from @\<br\>@ extractTagType :: String -> String extractTagType ('<':rest) = @@ -339,27 +351,34 @@ blockQuote = try (do -- list blocks -- -list = choice [ bulletList, orderedList ] <?> "list" +list = choice [ bulletList, orderedList, definitionList ] <?> "list" -orderedList = try (do - tag <- htmlTag "ol" +orderedList = try $ do + htmlTag "ol" spaces - items <- sepEndBy1 listItem spaces + items <- sepEndBy1 (blocksIn "li") spaces htmlEndTag "ol" - return (OrderedList items)) + return (OrderedList items) -bulletList = try (do - tag <- htmlTag "ul" +bulletList = try $ do + htmlTag "ul" spaces - items <- sepEndBy1 listItem spaces + items <- sepEndBy1 (blocksIn "li") spaces htmlEndTag "ul" - return (BulletList items)) + return (BulletList items) -listItem = try (do - tag <- htmlTag "li" +definitionList = try $ do + tag <- htmlTag "dl" spaces - blocks <- blocksTilEnd "li" - return blocks) + items <- sepEndBy1 definitionListItem spaces + htmlEndTag "dl" + return (DefinitionList items) + +definitionListItem = try $ do + terms <- sepEndBy1 (inlinesIn "dt") spaces + defs <- sepEndBy1 (blocksIn "dd") spaces + let term = joinWithSep [LineBreak] terms + return (term, concat defs) -- -- paragraph block diff --git a/tests/html-reader.html b/tests/html-reader.html index b89a93299..2c00f48b4 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -232,6 +232,15 @@ These should not be escaped: \$ \\ \> \[ \{ </ul> </li> </ul> +<h2>Definition</h2> +<dl> + <dt>Violin</dt> + <dd>Stringed musical instrument.</dd> + <dd>Torture device.</dd> + <dt>Cello</dt> + <dt>Violoncello</dt> + <dd>Low-voiced stringed instrument.</dd> +</dl> <hr /> <h1>HTML Blocks</h1> <p>Simple block on one line:</p> diff --git a/tests/html-reader.native b/tests/html-reader.native index f7de8df4a..a432bda7e 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -150,6 +150,14 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] [] "") , BulletList [ [ Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"] ] , [ Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"] ] ] ] ] +, Header 2 [Str "Definition"] +, DefinitionList + [ ([Str "Violin"], + [ Plain [Str "Stringed",Space,Str "musical",Space,Str "instrument."] + , Plain [Str "Torture",Space,Str "device."] ] ), + ([Str "Cello",LineBreak,Str "Violoncello"], + [ Plain [Str "Low-voiced",Space,Str "stringed",Space,Str "instrument."] ] + ) ] , HorizontalRule , Header 1 [Str "HTML",Space,Str "Blocks"] , Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] |