aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANUAL.txt8
-rw-r--r--src/Text/Pandoc/App.hs8
-rw-r--r--src/Text/Pandoc/Options.hs4
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs16
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs4
-rw-r--r--test/command/2552.md14
6 files changed, 46 insertions, 8 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 8491c1998..564f7f317 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -655,6 +655,14 @@ General writer options
of contents. The default is 3 (which means that level 1, 2, and 3
headers will be listed in the contents).
+`--strip-comments`
+
+: Strip out HTML comments in the Markdown or Textile source,
+ rather than passing them on to Markdown, Textile or HTML
+ output as raw HTML. This does not apply to HTML comments
+ inside raw HTML blocks when the `markdown_in_html_blocks`
+ extension is not set.
+
`--no-highlight`
: Disables syntax highlighting for code blocks and inlines, even when
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index f8e23b10c..e5be7e620 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -483,6 +483,7 @@ convertWithOpts opts = do
, readerTrackChanges = optTrackChanges opts
, readerAbbreviations = abbrevs
, readerExtensions = readerExts
+ , readerStripComments = optStripComments opts
}
let transforms = (case optBaseHeaderLevel opts of
@@ -666,6 +667,7 @@ data Opt = Opt
, optIncludeInHeader :: [FilePath] -- ^ Files to include in header
, optResourcePath :: [FilePath] -- ^ Path to search for images etc
, optEol :: LineEnding -- ^ Style of line-endings to use
+ , optStripComments :: Bool -- ^ Skip HTML comments
} deriving (Generic, Show)
instance ToJSON Opt where
@@ -742,6 +744,7 @@ defaultOpts = Opt
, optIncludeInHeader = []
, optResourcePath = ["."]
, optEol = Native
+ , optStripComments = False
}
addMetadata :: (String, String) -> Pandoc -> Pandoc
@@ -1114,6 +1117,11 @@ options =
"NUMBER")
"" -- "Length of line in characters"
+ , Option "" ["strip-comments"]
+ (NoArg
+ (\opt -> return opt { optStripComments = True }))
+ "" -- "Strip HTML comments"
+
, Option "" ["toc", "table-of-contents"]
(NoArg
(\opt -> return opt { optTableOfContents = True }))
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index cd353e18e..345245855 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -65,7 +65,8 @@ data ReaderOptions = ReaderOptions{
-- indented code blocks
, readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations
, readerDefaultImageExtension :: String -- ^ Default extension for images
- , readerTrackChanges :: TrackChanges
+ , readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx
+ , readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML
} deriving (Show, Read, Data, Typeable, Generic)
instance ToJSON ReaderOptions where
@@ -82,6 +83,7 @@ instance Default ReaderOptions
, readerAbbreviations = defaultAbbrevs
, readerDefaultImageExtension = ""
, readerTrackChanges = AcceptChanges
+ , readerStripComments = False
}
defaultAbbrevs :: Set.Set String
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 2093be19c..4cbc03089 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -46,9 +46,10 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, addMetaField
, escapeURI, safeRead, crFilter )
-import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
- Extension (Ext_epub_html_exts,
- Ext_raw_html, Ext_native_divs, Ext_native_spans))
+import Text.Pandoc.Options (
+ ReaderOptions(readerExtensions,readerStripComments), extensionEnabled,
+ Extension (Ext_epub_html_exts,
+ Ext_raw_html, Ext_native_divs, Ext_native_spans))
import Text.Pandoc.Logging
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
@@ -1070,7 +1071,7 @@ _ `closes` _ = False
--- parsers for use in markdown, textile readers
-- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Monad m)
+htmlInBalanced :: (HasReaderOptions st, Monad m)
=> (Tag String -> Bool)
-> ParserT String st m String
htmlInBalanced f = try $ do
@@ -1118,7 +1119,7 @@ hasTagWarning (TagWarning _:_) = True
hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
-htmlTag :: Monad m
+htmlTag :: (HasReaderOptions st, Monad m)
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
@@ -1153,7 +1154,10 @@ htmlTag f = try $ do
count (length s + 4) anyChar
skipMany (satisfy (/='>'))
char '>'
- return (next, "<!--" <> s <> "-->")
+ stripComments <- getOption readerStripComments
+ if stripComments
+ then return (next, "")
+ else return (next, "<!--" <> s <> "-->")
| otherwise -> fail "bogus comment mode, HTML5 parse error"
TagOpen tagname attr -> do
guard $ all (isName . fst) attr
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c2a73dcc5..1364f25cb 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1079,7 +1079,9 @@ htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
- return $ return $ B.rawBlock "html" first
+ return $ if null first
+ then mempty
+ else return $ B.rawBlock "html" first
strictHtmlBlock :: PandocMonad m => MarkdownParser m String
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
diff --git a/test/command/2552.md b/test/command/2552.md
new file mode 100644
index 000000000..90a3a381c
--- /dev/null
+++ b/test/command/2552.md
@@ -0,0 +1,14 @@
+```
+% pandoc --strip-comments
+Foo
+
+bar
+
+<!-- comment -->
+
+baz<!-- bim -->boop
+^D
+<p>Foo</p>
+<p>bar</p>
+<p>bazboop</p>
+```