From 7b8c2b6691e3816ba52ee07ee7f63573d4ae7253 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 2 Dec 2017 15:21:59 -0800 Subject: Add --strip-empty-paragraphs option. This works for any input format. --- src/Text/Pandoc/App.hs | 20 +++++++++++++++----- src/Text/Pandoc/Shared.hs | 9 +++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index f1c21c69a..3fdbf1949 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -86,8 +86,8 @@ import Text.Pandoc.Lua (LuaException (..), runLuaFilter) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) -import Text.Pandoc.Shared (eastAsianLineBreakFilter, headerShift, isURI, ordNub, - safeRead, tabFilter) +import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, + headerShift, isURI, ordNub, safeRead, tabFilter) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL) import Text.Pandoc.XML (toEntities) @@ -461,14 +461,17 @@ convertWithOpts opts = do let transforms = (case optBaseHeaderLevel opts of x | x > 1 -> (headerShift (x - 1) :) - | otherwise -> id) $ + | otherwise -> id) . + (if optStripEmptyParagraphs opts + then (stripEmptyParagraphs :) + else id) . (if extensionEnabled Ext_east_asian_line_breaks readerExts && not (extensionEnabled Ext_east_asian_line_breaks writerExts && writerWrapText writerOptions == WrapPreserve) then (eastAsianLineBreakFilter :) - else id) + else id) $ [] let sourceToDoc :: [FilePath] -> PandocIO Pandoc @@ -622,6 +625,7 @@ data Opt = Opt , optLuaFilters :: [FilePath] -- ^ Lua filters to apply , optEmailObfuscation :: ObfuscationMethod , optIdentifierPrefix :: String + , optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks , optDataDir :: Maybe FilePath , optCiteMethod :: CiteMethod -- ^ Method to output cites @@ -694,6 +698,7 @@ defaultOpts = Opt , optLuaFilters = [] , optEmailObfuscation = NoObfuscation , optIdentifierPrefix = "" + , optStripEmptyParagraphs = False , optIndentedCodeClasses = [] , optDataDir = Nothing , optCiteMethod = Citeproc @@ -940,7 +945,12 @@ options = "NUMBER") "" -- "Headers base level" - , Option "" ["indented-code-classes"] + , Option "" ["strip-empty-paragraphs"] + (NoArg + (\opt -> return opt{ optStripEmptyParagraphs = True })) + "" -- "Strip empty paragraphs" + + , Option "" ["indented-code-classes"] (ReqArg (\arg opt -> return opt { optIndentedCodeClasses = words $ map (\c -> if c == ',' then ' ' else c) arg }) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 5c13e0acb..1c3a25cc7 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -72,6 +72,7 @@ module Text.Pandoc.Shared ( inlineListToIdentifier, isHeaderBlock, headerShift, + stripEmptyParagraphs, isTightList, addMetaField, makeMeta, @@ -529,6 +530,14 @@ headerShift n = walk shift shift (Header level attr inner) = Header (level + n) attr inner shift x = x +-- | Remove empty paragraphs. +stripEmptyParagraphs :: Pandoc -> Pandoc +stripEmptyParagraphs = walk go + where go :: [Block] -> [Block] + go = filter (not . isEmptyParagraph) + isEmptyParagraph (Para []) = True + isEmptyParagraph _ = False + -- | Detect if a list is tight. isTightList :: [[Block]] -> Bool isTightList = all firstIsPlain -- cgit v1.2.3