diff options
-rw-r--r-- | MANUAL.txt | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/App.hs | 20 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 9 |
3 files changed, 30 insertions, 5 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index e8fdf9375..5adeebe58 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -427,6 +427,12 @@ Reader options : Specify the base level for headers (defaults to 1). +`--strip-empty-paragraphs` + +: Ignore paragraphs with non content. This option is useful + for converting word processing documents where users have + used empty paragraphs to create inter-paragraph space. + `--indented-code-classes=`*CLASSES* : Specify classes to use for indented code blocks--for example, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index f1c21c69a..3fdbf1949 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -86,8 +86,8 @@ import Text.Pandoc.Lua (LuaException (..), runLuaFilter) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) -import Text.Pandoc.Shared (eastAsianLineBreakFilter, headerShift, isURI, ordNub, - safeRead, tabFilter) +import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, + headerShift, isURI, ordNub, safeRead, tabFilter) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL) import Text.Pandoc.XML (toEntities) @@ -461,14 +461,17 @@ convertWithOpts opts = do let transforms = (case optBaseHeaderLevel opts of x | x > 1 -> (headerShift (x - 1) :) - | otherwise -> id) $ + | otherwise -> id) . + (if optStripEmptyParagraphs opts + then (stripEmptyParagraphs :) + else id) . (if extensionEnabled Ext_east_asian_line_breaks readerExts && not (extensionEnabled Ext_east_asian_line_breaks writerExts && writerWrapText writerOptions == WrapPreserve) then (eastAsianLineBreakFilter :) - else id) + else id) $ [] let sourceToDoc :: [FilePath] -> PandocIO Pandoc @@ -622,6 +625,7 @@ data Opt = Opt , optLuaFilters :: [FilePath] -- ^ Lua filters to apply , optEmailObfuscation :: ObfuscationMethod , optIdentifierPrefix :: String + , optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks , optDataDir :: Maybe FilePath , optCiteMethod :: CiteMethod -- ^ Method to output cites @@ -694,6 +698,7 @@ defaultOpts = Opt , optLuaFilters = [] , optEmailObfuscation = NoObfuscation , optIdentifierPrefix = "" + , optStripEmptyParagraphs = False , optIndentedCodeClasses = [] , optDataDir = Nothing , optCiteMethod = Citeproc @@ -940,7 +945,12 @@ options = "NUMBER") "" -- "Headers base level" - , Option "" ["indented-code-classes"] + , Option "" ["strip-empty-paragraphs"] + (NoArg + (\opt -> return opt{ optStripEmptyParagraphs = True })) + "" -- "Strip empty paragraphs" + + , Option "" ["indented-code-classes"] (ReqArg (\arg opt -> return opt { optIndentedCodeClasses = words $ map (\c -> if c == ',' then ' ' else c) arg }) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 5c13e0acb..1c3a25cc7 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -72,6 +72,7 @@ module Text.Pandoc.Shared ( inlineListToIdentifier, isHeaderBlock, headerShift, + stripEmptyParagraphs, isTightList, addMetaField, makeMeta, @@ -529,6 +530,14 @@ headerShift n = walk shift shift (Header level attr inner) = Header (level + n) attr inner shift x = x +-- | Remove empty paragraphs. +stripEmptyParagraphs :: Pandoc -> Pandoc +stripEmptyParagraphs = walk go + where go :: [Block] -> [Block] + go = filter (not . isEmptyParagraph) + isEmptyParagraph (Para []) = True + isEmptyParagraph _ = False + -- | Detect if a list is tight. isTightList :: [[Block]] -> Bool isTightList = all firstIsPlain |