diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-09-17 12:49:15 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-09-17 13:01:27 -0700 |
commit | b1ee747a249e0c1b1840222ef77607218157f099 (patch) | |
tree | d3dae4803ee2585ba3d44fca9c42a2a31e5cdb6f /src | |
parent | ce05814372b5a9d3d8f90dcd0e7c56df77d966a0 (diff) | |
download | pandoc-b1ee747a249e0c1b1840222ef77607218157f099.tar.gz |
Added `--strip-comments` option, `readerStripComments` in `ReaderOptions`.
* Options: Added readerStripComments to ReaderOptions.
* Added `--strip-comments` command-line option.
* Made `htmlTag` from the HTML reader sensitive to this feature.
This affects Markdown and Textile input.
Closes #2552.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/App.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 4 |
4 files changed, 24 insertions, 8 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index f8e23b10c..e5be7e620 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -483,6 +483,7 @@ convertWithOpts opts = do , readerTrackChanges = optTrackChanges opts , readerAbbreviations = abbrevs , readerExtensions = readerExts + , readerStripComments = optStripComments opts } let transforms = (case optBaseHeaderLevel opts of @@ -666,6 +667,7 @@ data Opt = Opt , optIncludeInHeader :: [FilePath] -- ^ Files to include in header , optResourcePath :: [FilePath] -- ^ Path to search for images etc , optEol :: LineEnding -- ^ Style of line-endings to use + , optStripComments :: Bool -- ^ Skip HTML comments } deriving (Generic, Show) instance ToJSON Opt where @@ -742,6 +744,7 @@ defaultOpts = Opt , optIncludeInHeader = [] , optResourcePath = ["."] , optEol = Native + , optStripComments = False } addMetadata :: (String, String) -> Pandoc -> Pandoc @@ -1114,6 +1117,11 @@ options = "NUMBER") "" -- "Length of line in characters" + , Option "" ["strip-comments"] + (NoArg + (\opt -> return opt { optStripComments = True })) + "" -- "Strip HTML comments" + , Option "" ["toc", "table-of-contents"] (NoArg (\opt -> return opt { optTableOfContents = True })) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index cd353e18e..345245855 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -65,7 +65,8 @@ data ReaderOptions = ReaderOptions{ -- indented code blocks , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations , readerDefaultImageExtension :: String -- ^ Default extension for images - , readerTrackChanges :: TrackChanges + , readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx + , readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML } deriving (Show, Read, Data, Typeable, Generic) instance ToJSON ReaderOptions where @@ -82,6 +83,7 @@ instance Default ReaderOptions , readerAbbreviations = defaultAbbrevs , readerDefaultImageExtension = "" , readerTrackChanges = AcceptChanges + , readerStripComments = False } defaultAbbrevs :: Set.Set String diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 2093be19c..4cbc03089 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -46,9 +46,10 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, addMetaField , escapeURI, safeRead, crFilter ) -import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled, - Extension (Ext_epub_html_exts, - Ext_raw_html, Ext_native_divs, Ext_native_spans)) +import Text.Pandoc.Options ( + ReaderOptions(readerExtensions,readerStripComments), extensionEnabled, + Extension (Ext_epub_html_exts, + Ext_raw_html, Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Logging import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk @@ -1070,7 +1071,7 @@ _ `closes` _ = False --- parsers for use in markdown, textile readers -- | Matches a stretch of HTML in balanced tags. -htmlInBalanced :: (Monad m) +htmlInBalanced :: (HasReaderOptions st, Monad m) => (Tag String -> Bool) -> ParserT String st m String htmlInBalanced f = try $ do @@ -1118,7 +1119,7 @@ hasTagWarning (TagWarning _:_) = True hasTagWarning _ = False -- | Matches a tag meeting a certain condition. -htmlTag :: Monad m +htmlTag :: (HasReaderOptions st, Monad m) => (Tag String -> Bool) -> ParserT [Char] st m (Tag String, String) htmlTag f = try $ do @@ -1153,7 +1154,10 @@ htmlTag f = try $ do count (length s + 4) anyChar skipMany (satisfy (/='>')) char '>' - return (next, "<!--" <> s <> "-->") + stripComments <- getOption readerStripComments + if stripComments + then return (next, "") + else return (next, "<!--" <> s <> "-->") | otherwise -> fail "bogus comment mode, HTML5 parse error" TagOpen tagname attr -> do guard $ all (isName . fst) attr diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c2a73dcc5..1364f25cb 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1079,7 +1079,9 @@ htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ return $ B.rawBlock "html" first + return $ if null first + then mempty + else return $ B.rawBlock "html" first strictHtmlBlock :: PandocMonad m => MarkdownParser m String strictHtmlBlock = htmlInBalanced (not . isInlineTag) |