Add `empty_paragraphs` extension.

* Deprecate `--strip-empty-paragraphs` option. Instead we now use an `empty_paragraphs` extension that can be enabled on the reader or writer. By default, disabled. * Add `Ext_empty_paragraphs` constructor to `Extension`. * Revert "Docx reader: don't strip out empty paragraphs." This reverts commit d6c58eb836f033a48955796de4d9ffb3b30e297b. * Implement `empty_paragraphs` extension in docx reader and writer, opendocument writer, html reader and writer. * Add tests for `empty_paragraphs` extension.
author: John MacFarlane <jgm@berkeley.edu> 2017-12-04 10:31:06 -0800
committer: John MacFarlane <jgm@berkeley.edu> 2017-12-04 14:56:57 -0800
commit: ae60e0196c5c12d358002cf3251dfebf07c66da6 (patch)
tree: 7c3bc1c605e6e65dbe4fe0071580e6ac92de5c00 /src/Text
parent: c58ecde93737aa68bd5dda9c4c72193dcaab3cf0 (diff)
download: pandoc-ae60e0196c5c12d358002cf3251dfebf07c66da6.tar.gz
7 files changed, 52 insertions, 33 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 3fdbf1949..7d7d630ea 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -947,7 +947,10 @@ options =
 
     , Option "" ["strip-empty-paragraphs"]
                  (NoArg
-                  (\opt -> return opt{ optStripEmptyParagraphs = True }))
+                  (\opt -> do
+                      deprecatedOption "--stripEmptyParagraphs"
+                        "Use +empty_paragraphs extension."
+                      return opt{ optStripEmptyParagraphs = True }))
                  "" -- "Strip empty paragraphs"
 
     , Option "" ["indented-code-classes"]
@@ -1472,7 +1475,7 @@ options =
     , Option "m" ["latexmathml", "asciimathml"]
                  (OptArg
                   (\arg opt -> do
-                      deprecatedOption "--latexmathml, --asciimathml, -m"
+                      deprecatedOption "--latexmathml, --asciimathml, -m" ""
                       return opt { optHTMLMathMethod = LaTeXMathML arg })
                   "URL")
                  "" -- "Use LaTeXMathML script in html output"
@@ -1480,7 +1483,7 @@ options =
     , Option "" ["mimetex"]
                  (OptArg
                   (\arg opt -> do
-                      deprecatedOption "--mimetex"
+                      deprecatedOption "--mimetex" ""
                       let url' = case arg of
                                       Just u  -> u ++ "?"
                                       Nothing -> "/cgi-bin/mimetex.cgi?"
@@ -1491,7 +1494,7 @@ options =
     , Option "" ["jsmath"]
                  (OptArg
                   (\arg opt -> do
-                      deprecatedOption "--jsmath"
+                      deprecatedOption "--jsmath" ""
                       return opt { optHTMLMathMethod = JsMath arg})
                   "URL")
                  "" -- "Use jsMath for HTML math"
@@ -1499,7 +1502,7 @@ options =
     , Option "" ["gladtex"]
                  (NoArg
                   (\opt -> do
-                      deprecatedOption "--gladtex"
+                      deprecatedOption "--gladtex" ""
                       return opt { optHTMLMathMethod = GladTeX }))
                  "" -- "Use gladtex for HTML math"
 
@@ -1699,9 +1702,9 @@ splitField s =
 baseWriterName :: String -> String
 baseWriterName = takeWhile (\c -> c /= '+' && c /= '-')
 
-deprecatedOption :: String -> IO ()
-deprecatedOption o =
-  runIO (report $ Deprecated o "") >>=
+deprecatedOption :: String -> String -> IO ()
+deprecatedOption o msg =
+  runIO (report $ Deprecated o msg) >>=
     \r -> case r of
        Right () -> return ()
        Left e   -> E.throwIO e
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 67ad2ad04..771898d70 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -152,6 +152,7 @@ data Extension =
     | Ext_old_dashes          -- ^ -- = em, - before number = en
     | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link
     | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup
+    | Ext_empty_paragraphs -- ^ Allow empty paragraphs
     deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
 
 -- | Extensions to be used with pandoc-flavored markdown.
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 1fac98b14..651d46753 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -534,7 +534,10 @@ bodyPartToBlocks (Paragraph pPr parparts)
       then do modify $ \s -> s { docxDropCap = ils' }
               return mempty
       else do modify $ \s -> s { docxDropCap = mempty }
-              return $ parStyleToTransform pPr $ para ils'
+              opts <- asks docxOptions
+              if isNull ils' && not (isEnabled Ext_empty_paragraphs opts)
+                 then return mempty
+                 else return $ parStyleToTransform pPr $ para ils'
 bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
   let
     kvs = case levelInfo of
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 24935fcd7..b0f5d38f9 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -68,9 +68,11 @@ import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
-import Text.Pandoc.Options (Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans, Ext_raw_html),
-                            ReaderOptions (readerExtensions, readerStripComments),
-                            extensionEnabled)
+import Text.Pandoc.Options (
+    Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
+               Ext_native_spans, Ext_raw_html),
+    ReaderOptions (readerExtensions, readerStripComments),
+    extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Shared (addMetaField, crFilter, escapeURI, extractSpaces,
                            safeRead, underlineSpan)
@@ -575,7 +577,10 @@ pPlain = do
 pPara :: PandocMonad m => TagParser m Blocks
 pPara = do
   contents <- trimInlines <$> pInTags "p" inline
-  return $ B.para contents
+  (do guardDisabled Ext_empty_paragraphs
+      guard (B.isNull contents)
+      return mempty)
+    <|> return (B.para contents)
 
 pFigure :: PandocMonad m => TagParser m Blocks
 pFigure = try $ do
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index f80c2b59a..c9eaaf838 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -922,19 +922,22 @@ blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
   captionNode <- withParaProp (pCustomStyle "ImageCaption")
                  $ blockToOpenXML opts (Para alt)
   return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
-blockToOpenXML' opts (Para lst) = do
-  isFirstPara <- gets stFirstPara
-  paraProps <- getParaProps $ case lst of
-                               [Math DisplayMath _] -> True
-                               _                    -> False
-  bodyTextStyle <- pStyleM "Body Text"
-  let paraProps' = case paraProps of
-        [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
-        []               -> [mknode "w:pPr" [] [bodyTextStyle]]
-        ps               -> ps
-  modify $ \s -> s { stFirstPara = False }
-  contents <- inlinesToOpenXML opts lst
-  return [mknode "w:p" [] (paraProps' ++ contents)]
+blockToOpenXML' opts (Para lst)
+  | null lst && not (isEnabled Ext_empty_paragraphs opts) = return []
+  | otherwise = do
+      isFirstPara <- gets stFirstPara
+      paraProps <- getParaProps $ case lst of
+                                   [Math DisplayMath _] -> True
+                                   _                    -> False
+      bodyTextStyle <- pStyleM "Body Text"
+      let paraProps' = case paraProps of
+            [] | isFirstPara -> [mknode "w:pPr" []
+                                [pCustomStyle "FirstParagraph"]]
+            []               -> [mknode "w:pPr" [] [bodyTextStyle]]
+            ps               -> ps
+      modify $ \s -> s { stFirstPara = False }
+      contents <- inlinesToOpenXML opts lst
+      return [mknode "w:p" [] (paraProps' ++ contents)]
 blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
 blockToOpenXML' _ b@(RawBlock format str)
   | format == Format "openxml" = return [ x | Elem x <- parseXML str ]
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 756bc3fd8..f25bbadfb 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -56,7 +56,7 @@ import qualified Data.Text.Lazy as TL
 import Network.HTTP (urlEncode)
 import Network.URI (URI (..), parseURIReference, unEscapeString)
 import Numeric (showHex)
-import Text.Blaze.Internal (customLeaf)
+import Text.Blaze.Internal (customLeaf, MarkupM(Empty))
 import Text.Blaze.Html hiding (contents)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatHtmlBlock, formatHtmlInline, highlight,
@@ -658,6 +658,7 @@ blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) =
   figure opts attr txt (s,tit)
 blockToHtml opts (Para lst)
   | isEmptyRaw lst = return mempty
+  | null lst && not (isEnabled Ext_empty_paragraphs opts) = return mempty
   | otherwise = do
       contents <- inlineListToHtml opts lst
       return $ H.p contents
@@ -902,8 +903,7 @@ tableItemToHtml opts tag' align' item = do
   let tag'' = if null alignStr
                  then tag'
                  else tag' ! attribs
-  return $ (
-              tag'' contents) >> nl opts
+  return $ tag'' contents >> nl opts
 
 toListItems :: WriterOptions -> [Html] -> [Html]
 toListItems opts items = map (toListItem opts) items ++ [nl opts]
@@ -911,9 +911,13 @@ toListItems opts items = map (toListItem opts) items ++ [nl opts]
 toListItem :: WriterOptions -> Html -> Html
 toListItem opts item = nl opts >> H.li item
 
-blockListToHtml :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Html
+blockListToHtml :: PandocMonad m
+                => WriterOptions -> [Block] -> StateT WriterState m Html
 blockListToHtml opts lst =
-  (mconcat . intersperse (nl opts)) <$> mapM (blockToHtml opts) lst
+  (mconcat . intersperse (nl opts) . filter nonempty)
+    <$> mapM (blockToHtml opts) lst
+  where nonempty (Empty _) = False
+        nonempty _         = True
 
 -- | Convert list of Pandoc inline elements to HTML.
 inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 702349636..8aa19dbb5 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -130,7 +130,6 @@ setFirstPara :: PandocMonad m => OD m ()
 setFirstPara =  modify $  \s -> s { stFirstPara = True }
 
 inParagraphTags :: PandocMonad m => Doc -> OD m Doc
-inParagraphTags d | isEmpty d = return empty
 inParagraphTags d = do
   b <- gets stFirstPara
   a <- if b
@@ -323,7 +322,8 @@ blockToOpenDocument o bs
                                   else inParagraphTags =<< inlinesToOpenDocument o b
     | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs
                              = figure attr c s t
-    | Para           b <- bs = if null b
+    | Para           b <- bs = if null b &&
+                                    not (isEnabled Ext_empty_paragraphs o)
                                   then return empty
                                   else inParagraphTags =<< inlinesToOpenDocument o b
     | LineBlock      b <- bs = blockToOpenDocument o $ linesToPara b
author	John MacFarlane <jgm@berkeley.edu>	2017-12-04 10:31:06 -0800
committer	John MacFarlane <jgm@berkeley.edu>	2017-12-04 14:56:57 -0800
commit	ae60e0196c5c12d358002cf3251dfebf07c66da6 (patch)
tree	7c3bc1c605e6e65dbe4fe0071580e6ac92de5c00 /src/Text
parent	c58ecde93737aa68bd5dda9c4c72193dcaab3cf0 (diff)
download	pandoc-ae60e0196c5c12d358002cf3251dfebf07c66da6.tar.gz