aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2011-10-25 12:44:20 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2011-10-25 12:44:20 -0700
commit1b81981c5f681d5c3f48c8d54d6f0a64c9e482ca (patch)
treeb90494974135760a7467e158969ec43035b4cfa7 /src/Text/Pandoc/Readers/HTML.hs
parentc8bc0391a8bccb9b301e9d1ff84dec2aa168691e (diff)
downloadpandoc-1b81981c5f681d5c3f48c8d54d6f0a64c9e482ca.tar.gz
HTML reader now recognizes DocBook block and inline tags.
It was always possible to include raw DocBook tags in a markdown document, but now pandoc will be able to distinguish block from inline tags and behave accordingly. Thus, for example, <sidebar> hello </sidebar> will not be wrapped in `<para>` tags.
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs29
1 files changed, 24 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 2ad2c8e0f..70ea24680 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -503,16 +503,35 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
"dt", "frameset", "li", "tbody", "td", "tfoot",
"th", "thead", "tr", "script", "style"]
+-- We want to allow raw docbook in markdown documents, so we
+-- include docbook block tags here too.
+blockDocBookTags :: [String]
+blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
+ "orderedlist", "segmentedlist", "simplelist",
+ "variablelist", "caution", "important", "note", "tip",
+ "warning", "address", "literallayout", "programlisting",
+ "programlistingco", "screen", "screenco", "screenshot",
+ "synopsis", "example", "informalexample", "figure",
+ "informalfigure", "table", "informaltable", "para",
+ "simpara", "formalpara", "equation", "informalequation",
+ "figure", "screenshot", "mediaobject", "qandaset",
+ "procedure", "task", "cmdsynopsis", "funcsynopsis",
+ "classsynopsis", "blockquote", "epigraph", "msgset",
+ "sidebar"]
+
+blockTags :: [String]
+blockTags = blockHtmlTags ++ blockDocBookTags
+
isInlineTag :: Tag String -> Bool
-isInlineTag t = tagOpen (`notElem` blockHtmlTags) (const True) t ||
- tagClose (`notElem` blockHtmlTags) t ||
+isInlineTag t = tagOpen (`notElem` blockTags) (const True) t ||
+ tagClose (`notElem` blockTags) t ||
tagComment (const True) t
isBlockTag :: Tag String -> Bool
isBlockTag t = tagOpen (`elem` blocktags) (const True) t ||
tagClose (`elem` blocktags) t ||
tagComment (const True) t
- where blocktags = blockHtmlTags ++ eitherBlockOrInline
+ where blocktags = blockTags ++ eitherBlockOrInline
isTextTag :: Tag String -> Bool
isTextTag = tagText (const True)
@@ -547,8 +566,8 @@ t `closes` t2 |
t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] &&
t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div"
t1 `closes` t2 |
- t1 `elem` blockHtmlTags &&
- t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True
+ t1 `elem` blockTags &&
+ t2 `notElem` (blockTags ++ eitherBlockOrInline) = True
_ `closes` _ = False
--- parsers for use in markdown, textile readers