aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-12-02 15:21:59 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2017-12-02 15:21:59 -0800
commit7b8c2b6691e3816ba52ee07ee7f63573d4ae7253 (patch)
treef201d4e69ce4d86911d845ef63094c9ba4f7688b
parente09e6a6ffaf1c61747f1e2d87b025a61bd0ee90e (diff)
downloadpandoc-7b8c2b6691e3816ba52ee07ee7f63573d4ae7253.tar.gz
Add --strip-empty-paragraphs option.
This works for any input format.
-rw-r--r--MANUAL.txt6
-rw-r--r--src/Text/Pandoc/App.hs20
-rw-r--r--src/Text/Pandoc/Shared.hs9
3 files changed, 30 insertions, 5 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index e8fdf9375..5adeebe58 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -427,6 +427,12 @@ Reader options
: Specify the base level for headers (defaults to 1).
+`--strip-empty-paragraphs`
+
+: Ignore paragraphs with non content. This option is useful
+ for converting word processing documents where users have
+ used empty paragraphs to create inter-paragraph space.
+
`--indented-code-classes=`*CLASSES*
: Specify classes to use for indented code blocks--for example,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index f1c21c69a..3fdbf1949 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -86,8 +86,8 @@ import Text.Pandoc.Lua (LuaException (..), runLuaFilter)
import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Process (pipeProcess)
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
-import Text.Pandoc.Shared (eastAsianLineBreakFilter, headerShift, isURI, ordNub,
- safeRead, tabFilter)
+import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
+ headerShift, isURI, ordNub, safeRead, tabFilter)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL)
import Text.Pandoc.XML (toEntities)
@@ -461,14 +461,17 @@ convertWithOpts opts = do
let transforms = (case optBaseHeaderLevel opts of
x | x > 1 -> (headerShift (x - 1) :)
- | otherwise -> id) $
+ | otherwise -> id) .
+ (if optStripEmptyParagraphs opts
+ then (stripEmptyParagraphs :)
+ else id) .
(if extensionEnabled Ext_east_asian_line_breaks
readerExts &&
not (extensionEnabled Ext_east_asian_line_breaks
writerExts &&
writerWrapText writerOptions == WrapPreserve)
then (eastAsianLineBreakFilter :)
- else id)
+ else id) $
[]
let sourceToDoc :: [FilePath] -> PandocIO Pandoc
@@ -622,6 +625,7 @@ data Opt = Opt
, optLuaFilters :: [FilePath] -- ^ Lua filters to apply
, optEmailObfuscation :: ObfuscationMethod
, optIdentifierPrefix :: String
+ , optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs
, optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
, optDataDir :: Maybe FilePath
, optCiteMethod :: CiteMethod -- ^ Method to output cites
@@ -694,6 +698,7 @@ defaultOpts = Opt
, optLuaFilters = []
, optEmailObfuscation = NoObfuscation
, optIdentifierPrefix = ""
+ , optStripEmptyParagraphs = False
, optIndentedCodeClasses = []
, optDataDir = Nothing
, optCiteMethod = Citeproc
@@ -940,7 +945,12 @@ options =
"NUMBER")
"" -- "Headers base level"
- , Option "" ["indented-code-classes"]
+ , Option "" ["strip-empty-paragraphs"]
+ (NoArg
+ (\opt -> return opt{ optStripEmptyParagraphs = True }))
+ "" -- "Strip empty paragraphs"
+
+ , Option "" ["indented-code-classes"]
(ReqArg
(\arg opt -> return opt { optIndentedCodeClasses = words $
map (\c -> if c == ',' then ' ' else c) arg })
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 5c13e0acb..1c3a25cc7 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -72,6 +72,7 @@ module Text.Pandoc.Shared (
inlineListToIdentifier,
isHeaderBlock,
headerShift,
+ stripEmptyParagraphs,
isTightList,
addMetaField,
makeMeta,
@@ -529,6 +530,14 @@ headerShift n = walk shift
shift (Header level attr inner) = Header (level + n) attr inner
shift x = x
+-- | Remove empty paragraphs.
+stripEmptyParagraphs :: Pandoc -> Pandoc
+stripEmptyParagraphs = walk go
+ where go :: [Block] -> [Block]
+ go = filter (not . isEmptyParagraph)
+ isEmptyParagraph (Para []) = True
+ isEmptyParagraph _ = False
+
-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
isTightList = all firstIsPlain