aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2011-10-25 12:44:20 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2011-10-25 12:44:20 -0700
commit1b81981c5f681d5c3f48c8d54d6f0a64c9e482ca (patch)
treeb90494974135760a7467e158969ec43035b4cfa7
parentc8bc0391a8bccb9b301e9d1ff84dec2aa168691e (diff)
downloadpandoc-1b81981c5f681d5c3f48c8d54d6f0a64c9e482ca.tar.gz
HTML reader now recognizes DocBook block and inline tags.
It was always possible to include raw DocBook tags in a markdown document, but now pandoc will be able to distinguish block from inline tags and behave accordingly. Thus, for example, <sidebar> hello </sidebar> will not be wrapped in `<para>` tags.
-rw-r--r--README7
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs29
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs3
3 files changed, 29 insertions, 10 deletions
diff --git a/README b/README
index cf054eb10..dafe9d124 100644
--- a/README
+++ b/README
@@ -1514,7 +1514,7 @@ MediaWiki
Textile
~ It will be rendered inside `<span class="math">` tags.
-RTF, Docbook, OpenDocument, ODT
+RTF, DocBook, OpenDocument, ODT
~ It will be rendered, if possible, using unicode characters,
and will otherwise appear verbatim.
@@ -1523,7 +1523,7 @@ HTML, Slidy, DZSlides, S5, EPUB
command-line options selected:
1. The default is to render TeX math as far as possible using unicode
- characters, as with RTF, Docbook, and OpenDocument output. Formulas
+ characters, as with RTF, DocBook, and OpenDocument output. Formulas
are put inside a `span` with `class="math"`, so that they may be
styled differently from the surrounding text if needed.
@@ -1565,7 +1565,7 @@ HTML, Slidy, DZSlides, S5, EPUB
Raw HTML
--------
-Markdown allows you to insert raw HTML anywhere in a document
+Markdown allows you to insert raw HTML (or DocBook) anywhere in a document
(except verbatim contexts, where `<`, `>`, and `&` are interpreted
literally).
@@ -1610,7 +1610,6 @@ markdown with HTML block elements. For example, one can surround
a block of markdown text with `<div>` tags without preventing it
from being interpreted as markdown.
-
Raw TeX
-------
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 2ad2c8e0f..70ea24680 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -503,16 +503,35 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
"dt", "frameset", "li", "tbody", "td", "tfoot",
"th", "thead", "tr", "script", "style"]
+-- We want to allow raw docbook in markdown documents, so we
+-- include docbook block tags here too.
+blockDocBookTags :: [String]
+blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
+ "orderedlist", "segmentedlist", "simplelist",
+ "variablelist", "caution", "important", "note", "tip",
+ "warning", "address", "literallayout", "programlisting",
+ "programlistingco", "screen", "screenco", "screenshot",
+ "synopsis", "example", "informalexample", "figure",
+ "informalfigure", "table", "informaltable", "para",
+ "simpara", "formalpara", "equation", "informalequation",
+ "figure", "screenshot", "mediaobject", "qandaset",
+ "procedure", "task", "cmdsynopsis", "funcsynopsis",
+ "classsynopsis", "blockquote", "epigraph", "msgset",
+ "sidebar"]
+
+blockTags :: [String]
+blockTags = blockHtmlTags ++ blockDocBookTags
+
isInlineTag :: Tag String -> Bool
-isInlineTag t = tagOpen (`notElem` blockHtmlTags) (const True) t ||
- tagClose (`notElem` blockHtmlTags) t ||
+isInlineTag t = tagOpen (`notElem` blockTags) (const True) t ||
+ tagClose (`notElem` blockTags) t ||
tagComment (const True) t
isBlockTag :: Tag String -> Bool
isBlockTag t = tagOpen (`elem` blocktags) (const True) t ||
tagClose (`elem` blocktags) t ||
tagComment (const True) t
- where blocktags = blockHtmlTags ++ eitherBlockOrInline
+ where blocktags = blockTags ++ eitherBlockOrInline
isTextTag :: Tag String -> Bool
isTextTag = tagText (const True)
@@ -547,8 +566,8 @@ t `closes` t2 |
t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] &&
t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div"
t1 `closes` t2 |
- t1 `elem` blockHtmlTags &&
- t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True
+ t1 `elem` blockTags &&
+ t2 `notElem` (blockTags ++ eitherBlockOrInline) = True
_ `closes` _ = False
--- parsers for use in markdown, textile readers
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 29c042cf9..9f8b921e7 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -255,7 +255,8 @@ inlineToDocbook _ EnDash = text "–"
inlineToDocbook _ (Code _ str) =
inTagsSimple "literal" $ text (escapeStringForXML str)
inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str
-inlineToDocbook _ (RawInline _ _) = empty
+inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x
+ | otherwise = empty
inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty
inlineToDocbook _ Space = space
inlineToDocbook opts (Link txt (src, _)) =