diff options
-rw-r--r-- | README | 14 | ||||
-rw-r--r-- | src/Text/Pandoc.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 4 |
6 files changed, 38 insertions, 11 deletions
@@ -2238,6 +2238,20 @@ markdown with HTML block elements. For example, one can surround a block of markdown text with `<div>` tags without preventing it from being interpreted as markdown. +#### Extension: `native_divs` #### + +Use native pandoc `Div` blocks for content inside `<div>` tags. +For the most part this should give the same output as +`markdown_in_html_blocks`, but it makes it easier to write pandoc +filters to manipulate groups of blocks. + +#### Extension: `native_spans` #### + +Use native pandoc `Span` blocks for content inside `<span>` tags. +For the most part this should give the same output as `raw_html`, +but it makes it easier to write pandoc filters to manipulate groups +of inlines. + Raw TeX ------- diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 589a6af98..c7c64f0fc 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -304,7 +304,17 @@ getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = pandocExtensions getDefaultExtensions "org" = Set.fromList [Ext_citations] -getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex] +getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, + Ext_raw_tex] +getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers, + Ext_native_divs, + Ext_native_spans] +getDefaultExtensions "html5" = getDefaultExtensions "html" +getDefaultExtensions "epub" = Set.fromList [Ext_auto_identifiers, + Ext_raw_html, + Ext_native_divs, + Ext_native_spans, + Ext_epub_html_exts] getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] -- | Retrieve reader based on formatSpec (format+extensions). diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index bb213bac0..84ccbbdc9 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -77,6 +77,8 @@ data Extension = | Ext_backtick_code_blocks -- ^ Github style ``` code blocks | Ext_inline_code_attributes -- ^ Allow attributes on inline code | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks + | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags + | Ext_native_spans -- ^ Use Span inlines for contents of <span> | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown -- iff container has attribute 'markdown' | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak @@ -131,6 +133,8 @@ pandocExtensions = Set.fromList , Ext_backtick_code_blocks , Ext_inline_code_attributes , Ext_markdown_in_html_blocks + , Ext_native_divs + , Ext_native_spans , Ext_escaped_line_breaks , Ext_fancy_lists , Ext_startnum diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 968b815c0..7462b3711 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -13,8 +13,7 @@ import Text.Pandoc.Definition hiding (Attr) import Text.Pandoc.Walk (walk, query) import Text.Pandoc.Generic(bottomUp) import Text.Pandoc.Readers.HTML (readHtml) -import Text.Pandoc.Options ( ReaderOptions(..), readerExtensions, Extension(..) - , readerTrace) +import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) import Text.Pandoc.Shared (escapeURI) import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except) @@ -32,7 +31,6 @@ import Data.Monoid (mempty, (<>)) import Data.List (isPrefixOf, isInfixOf) import Data.Maybe (mapMaybe, fromMaybe) import qualified Data.Map as M (Map, lookup, fromList, elems) -import qualified Data.Set as S (insert) import Control.DeepSeq.Generics (deepseq, NFData) import Debug.Trace (trace) @@ -65,9 +63,7 @@ archiveToEPUB os archive = do let mediaBag = fetchImages (M.elems items) root archive ast return $ (ast, mediaBag) where - rs = readerExtensions os - os' = os {readerExtensions = foldr S.insert rs [Ext_epub_html_exts, Ext_raw_html]} - os'' = os' {readerParseRaw = True} + os' = os {readerParseRaw = True} parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do when (readerTrace os) (traceM path) @@ -78,7 +74,7 @@ archiveToEPUB os archive = do mimeToReader "application/xhtml+xml" r path = do fname <- findEntryByPathE (r </> path) archive return $ fixInternalReferences (r </> path) . - readHtml os'' . + readHtml os' . UTF8.toStringLazy $ fromEntry fname mimeToReader s _ path diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 3d988cd80..1789b865f 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -45,7 +45,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, renderTags' , escapeURI, safeRead ) import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) - , Extension (Ext_epub_html_exts)) + , Extension (Ext_epub_html_exts, + Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import Data.Maybe ( fromMaybe, isJust) @@ -296,6 +297,7 @@ pRawTag = do pDiv :: TagParser Blocks pDiv = try $ do + guardEnabled Ext_native_divs TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True) contents <- pInTags "div" block return $ B.divWith (mkAttr attr) contents @@ -560,6 +562,7 @@ pCode = try $ do pSpan :: TagParser Inlines pSpan = try $ do + guardEnabled Ext_native_spans TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) contents <- pInTags "span" inline return $ B.spanWith (mkAttr attr) contents diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 04b3fa684..861f81b23 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1764,7 +1764,7 @@ inBrackets parser = do spanHtml :: MarkdownParser (F Inlines) spanHtml = try $ do - guardEnabled Ext_markdown_in_html_blocks + guardEnabled Ext_native_spans (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) let ident = fromMaybe "" $ lookup "id" attrs @@ -1779,7 +1779,7 @@ spanHtml = try $ do divHtml :: MarkdownParser (F Blocks) divHtml = try $ do - guardEnabled Ext_markdown_in_html_blocks + guardEnabled Ext_native_divs (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags |