From 1b81981c5f681d5c3f48c8d54d6f0a64c9e482ca Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 25 Oct 2011 12:44:20 -0700 Subject: HTML reader now recognizes DocBook block and inline tags. It was always possible to include raw DocBook tags in a markdown document, but now pandoc will be able to distinguish block from inline tags and behave accordingly. Thus, for example, hello will not be wrapped in `` tags. --- src/Text/Pandoc/Readers/HTML.hs | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc/Readers/HTML.hs') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 2ad2c8e0f..70ea24680 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -503,16 +503,35 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div", "dt", "frameset", "li", "tbody", "td", "tfoot", "th", "thead", "tr", "script", "style"] +-- We want to allow raw docbook in markdown documents, so we +-- include docbook block tags here too. +blockDocBookTags :: [String] +blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist", + "orderedlist", "segmentedlist", "simplelist", + "variablelist", "caution", "important", "note", "tip", + "warning", "address", "literallayout", "programlisting", + "programlistingco", "screen", "screenco", "screenshot", + "synopsis", "example", "informalexample", "figure", + "informalfigure", "table", "informaltable", "para", + "simpara", "formalpara", "equation", "informalequation", + "figure", "screenshot", "mediaobject", "qandaset", + "procedure", "task", "cmdsynopsis", "funcsynopsis", + "classsynopsis", "blockquote", "epigraph", "msgset", + "sidebar"] + +blockTags :: [String] +blockTags = blockHtmlTags ++ blockDocBookTags + isInlineTag :: Tag String -> Bool -isInlineTag t = tagOpen (`notElem` blockHtmlTags) (const True) t || - tagClose (`notElem` blockHtmlTags) t || +isInlineTag t = tagOpen (`notElem` blockTags) (const True) t || + tagClose (`notElem` blockTags) t || tagComment (const True) t isBlockTag :: Tag String -> Bool isBlockTag t = tagOpen (`elem` blocktags) (const True) t || tagClose (`elem` blocktags) t || tagComment (const True) t - where blocktags = blockHtmlTags ++ eitherBlockOrInline + where blocktags = blockTags ++ eitherBlockOrInline isTextTag :: Tag String -> Bool isTextTag = tagText (const True) @@ -547,8 +566,8 @@ t `closes` t2 | t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] && t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" t1 `closes` t2 | - t1 `elem` blockHtmlTags && - t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True + t1 `elem` blockTags && + t2 `notElem` (blockTags ++ eitherBlockOrInline) = True _ `closes` _ = False --- parsers for use in markdown, textile readers -- cgit v1.2.3