aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README14
-rw-r--r--src/Text/Pandoc.hs12
-rw-r--r--src/Text/Pandoc/Options.hs4
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs10
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs5
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs4
6 files changed, 38 insertions, 11 deletions
diff --git a/README b/README
index 161092550..d7031d9fa 100644
--- a/README
+++ b/README
@@ -2238,6 +2238,20 @@ markdown with HTML block elements. For example, one can surround
a block of markdown text with `<div>` tags without preventing it
from being interpreted as markdown.
+#### Extension: `native_divs` ####
+
+Use native pandoc `Div` blocks for content inside `<div>` tags.
+For the most part this should give the same output as
+`markdown_in_html_blocks`, but it makes it easier to write pandoc
+filters to manipulate groups of blocks.
+
+#### Extension: `native_spans` ####
+
+Use native pandoc `Span` blocks for content inside `<span>` tags.
+For the most part this should give the same output as `raw_html`,
+but it makes it easier to write pandoc filters to manipulate groups
+of inlines.
+
Raw TeX
-------
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 589a6af98..c7c64f0fc 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -304,7 +304,17 @@ getDefaultExtensions "markdown_github" = githubMarkdownExtensions
getDefaultExtensions "markdown" = pandocExtensions
getDefaultExtensions "plain" = pandocExtensions
getDefaultExtensions "org" = Set.fromList [Ext_citations]
-getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
+getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers,
+ Ext_raw_tex]
+getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers,
+ Ext_native_divs,
+ Ext_native_spans]
+getDefaultExtensions "html5" = getDefaultExtensions "html"
+getDefaultExtensions "epub" = Set.fromList [Ext_auto_identifiers,
+ Ext_raw_html,
+ Ext_native_divs,
+ Ext_native_spans,
+ Ext_epub_html_exts]
getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers]
-- | Retrieve reader based on formatSpec (format+extensions).
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index bb213bac0..84ccbbdc9 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -77,6 +77,8 @@ data Extension =
| Ext_backtick_code_blocks -- ^ Github style ``` code blocks
| Ext_inline_code_attributes -- ^ Allow attributes on inline code
| Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
+ | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
+ | Ext_native_spans -- ^ Use Span inlines for contents of <span>
| Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown
-- iff container has attribute 'markdown'
| Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak
@@ -131,6 +133,8 @@ pandocExtensions = Set.fromList
, Ext_backtick_code_blocks
, Ext_inline_code_attributes
, Ext_markdown_in_html_blocks
+ , Ext_native_divs
+ , Ext_native_spans
, Ext_escaped_line_breaks
, Ext_fancy_lists
, Ext_startnum
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 968b815c0..7462b3711 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -13,8 +13,7 @@ import Text.Pandoc.Definition hiding (Attr)
import Text.Pandoc.Walk (walk, query)
import Text.Pandoc.Generic(bottomUp)
import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Options ( ReaderOptions(..), readerExtensions, Extension(..)
- , readerTrace)
+import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
import Text.Pandoc.Shared (escapeURI)
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
@@ -32,7 +31,6 @@ import Data.Monoid (mempty, (<>))
import Data.List (isPrefixOf, isInfixOf)
import Data.Maybe (mapMaybe, fromMaybe)
import qualified Data.Map as M (Map, lookup, fromList, elems)
-import qualified Data.Set as S (insert)
import Control.DeepSeq.Generics (deepseq, NFData)
import Debug.Trace (trace)
@@ -65,9 +63,7 @@ archiveToEPUB os archive = do
let mediaBag = fetchImages (M.elems items) root archive ast
return $ (ast, mediaBag)
where
- rs = readerExtensions os
- os' = os {readerExtensions = foldr S.insert rs [Ext_epub_html_exts, Ext_raw_html]}
- os'' = os' {readerParseRaw = True}
+ os' = os {readerParseRaw = True}
parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
when (readerTrace os) (traceM path)
@@ -78,7 +74,7 @@ archiveToEPUB os archive = do
mimeToReader "application/xhtml+xml" r path = do
fname <- findEntryByPathE (r </> path) archive
return $ fixInternalReferences (r </> path) .
- readHtml os'' .
+ readHtml os' .
UTF8.toStringLazy $
fromEntry fname
mimeToReader s _ path
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 3d988cd80..1789b865f 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -45,7 +45,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, renderTags'
, escapeURI, safeRead )
import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
- , Extension (Ext_epub_html_exts))
+ , Extension (Ext_epub_html_exts,
+ Ext_native_divs, Ext_native_spans))
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
import Data.Maybe ( fromMaybe, isJust)
@@ -296,6 +297,7 @@ pRawTag = do
pDiv :: TagParser Blocks
pDiv = try $ do
+ guardEnabled Ext_native_divs
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
contents <- pInTags "div" block
return $ B.divWith (mkAttr attr) contents
@@ -560,6 +562,7 @@ pCode = try $ do
pSpan :: TagParser Inlines
pSpan = try $ do
+ guardEnabled Ext_native_spans
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
contents <- pInTags "span" inline
return $ B.spanWith (mkAttr attr) contents
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 04b3fa684..861f81b23 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1764,7 +1764,7 @@ inBrackets parser = do
spanHtml :: MarkdownParser (F Inlines)
spanHtml = try $ do
- guardEnabled Ext_markdown_in_html_blocks
+ guardEnabled Ext_native_spans
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
let ident = fromMaybe "" $ lookup "id" attrs
@@ -1779,7 +1779,7 @@ spanHtml = try $ do
divHtml :: MarkdownParser (F Blocks)
divHtml = try $ do
- guardEnabled Ext_markdown_in_html_blocks
+ guardEnabled Ext_native_divs
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags