diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 5 |
6 files changed, 29 insertions, 17 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 589a6af98..c7c64f0fc 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -304,7 +304,17 @@ getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = pandocExtensions getDefaultExtensions "org" = Set.fromList [Ext_citations] -getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex] +getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, + Ext_raw_tex] +getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers, + Ext_native_divs, + Ext_native_spans] +getDefaultExtensions "html5" = getDefaultExtensions "html" +getDefaultExtensions "epub" = Set.fromList [Ext_auto_identifiers, + Ext_raw_html, + Ext_native_divs, + Ext_native_spans, + Ext_epub_html_exts] getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] -- | Retrieve reader based on formatSpec (format+extensions). diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index bb213bac0..84ccbbdc9 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -77,6 +77,8 @@ data Extension = | Ext_backtick_code_blocks -- ^ Github style ``` code blocks | Ext_inline_code_attributes -- ^ Allow attributes on inline code | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks + | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags + | Ext_native_spans -- ^ Use Span inlines for contents of <span> | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown -- iff container has attribute 'markdown' | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak @@ -131,6 +133,8 @@ pandocExtensions = Set.fromList , Ext_backtick_code_blocks , Ext_inline_code_attributes , Ext_markdown_in_html_blocks + , Ext_native_divs + , Ext_native_spans , Ext_escaped_line_breaks , Ext_fancy_lists , Ext_startnum diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 8073f9ad2..b6b271488 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -13,8 +13,7 @@ import Text.Pandoc.Definition hiding (Attr) import Text.Pandoc.Walk (walk, query) import Text.Pandoc.Generic(bottomUp) import Text.Pandoc.Readers.HTML (readHtml) -import Text.Pandoc.Options ( ReaderOptions(..), readerExtensions, Extension(..) - , readerTrace) +import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) import Text.Pandoc.Shared (escapeURI, collapseFilePath) import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except) @@ -32,7 +31,6 @@ import Data.Monoid (mempty, (<>)) import Data.List (isPrefixOf, isInfixOf) import Data.Maybe (mapMaybe, fromMaybe) import qualified Data.Map as M (Map, lookup, fromList, elems) -import qualified Data.Set as S (insert) import Control.DeepSeq.Generics (deepseq, NFData) import Debug.Trace (trace) @@ -51,7 +49,7 @@ runEPUB = either error id . runExcept -- are of the form "filename#id" -- archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) -archiveToEPUB (setEPUBOptions -> os) archive = do +archiveToEPUB os archive = do -- root is path to folder with manifest file in (root, content) <- getManifest archive meta <- parseMeta content @@ -67,6 +65,7 @@ archiveToEPUB (setEPUBOptions -> os) archive = do let mediaBag = fetchImages (M.elems items) archive ast return $ (ast, mediaBag) where + os' = os {readerParseRaw = True} parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do when (readerTrace os) (traceM path) @@ -77,20 +76,13 @@ archiveToEPUB (setEPUBOptions -> os) archive = do mimeToReader "application/xhtml+xml" (normalise -> path) = do fname <- findEntryByPathE path archive return $ fixInternalReferences path . - readHtml os . + readHtml os' . UTF8.toStringLazy $ fromEntry fname mimeToReader s path | s `elem` imageMimes = return $ imageToPandoc path | otherwise = return $ mempty -setEPUBOptions :: ReaderOptions -> ReaderOptions -setEPUBOptions os = os'' - where - rs = readerExtensions os - os' = os {readerExtensions = foldr S.insert rs [Ext_epub_html_exts]} - os'' = os' {readerParseRaw = True} - -- paths should be absolute when this function is called -- renameImages should do this fetchImages :: [(FilePath, MIME)] diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 3d988cd80..1789b865f 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -45,7 +45,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, renderTags' , escapeURI, safeRead ) import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) - , Extension (Ext_epub_html_exts)) + , Extension (Ext_epub_html_exts, + Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import Data.Maybe ( fromMaybe, isJust) @@ -296,6 +297,7 @@ pRawTag = do pDiv :: TagParser Blocks pDiv = try $ do + guardEnabled Ext_native_divs TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True) contents <- pInTags "div" block return $ B.divWith (mkAttr attr) contents @@ -560,6 +562,7 @@ pCode = try $ do pSpan :: TagParser Inlines pSpan = try $ do + guardEnabled Ext_native_spans TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) contents <- pInTags "span" inline return $ B.spanWith (mkAttr attr) contents diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 04b3fa684..861f81b23 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1764,7 +1764,7 @@ inBrackets parser = do spanHtml :: MarkdownParser (F Inlines) spanHtml = try $ do - guardEnabled Ext_markdown_in_html_blocks + guardEnabled Ext_native_spans (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) let ident = fromMaybe "" $ lookup "id" attrs @@ -1779,7 +1779,7 @@ spanHtml = try $ do divHtml :: MarkdownParser (F Blocks) divHtml = try $ do - guardEnabled Ext_markdown_in_html_blocks + guardEnabled Ext_native_divs (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 5c0476b7d..95d4db29b 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -357,7 +357,10 @@ blockToMarkdown opts (Header level attr inlines) = do _ | isEnabled Ext_header_attributes opts -> space <> attrsToMarkdown attr | otherwise -> empty - contents <- inlineListToMarkdown opts inlines + contents <- inlineListToMarkdown opts $ + if level == 1 && plain + then capitalize inlines + else inlines let setext = writerSetextHeaders opts return $ nowrap $ case level of |