aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-12-04 10:31:06 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2017-12-04 14:56:57 -0800
commitae60e0196c5c12d358002cf3251dfebf07c66da6 (patch)
tree7c3bc1c605e6e65dbe4fe0071580e6ac92de5c00 /src/Text
parentc58ecde93737aa68bd5dda9c4c72193dcaab3cf0 (diff)
downloadpandoc-ae60e0196c5c12d358002cf3251dfebf07c66da6.tar.gz
Add `empty_paragraphs` extension.
* Deprecate `--strip-empty-paragraphs` option. Instead we now use an `empty_paragraphs` extension that can be enabled on the reader or writer. By default, disabled. * Add `Ext_empty_paragraphs` constructor to `Extension`. * Revert "Docx reader: don't strip out empty paragraphs." This reverts commit d6c58eb836f033a48955796de4d9ffb3b30e297b. * Implement `empty_paragraphs` extension in docx reader and writer, opendocument writer, html reader and writer. * Add tests for `empty_paragraphs` extension.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/App.hs19
-rw-r--r--src/Text/Pandoc/Extensions.hs1
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs5
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs13
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs29
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs14
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs4
7 files changed, 52 insertions, 33 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 3fdbf1949..7d7d630ea 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -947,7 +947,10 @@ options =
, Option "" ["strip-empty-paragraphs"]
(NoArg
- (\opt -> return opt{ optStripEmptyParagraphs = True }))
+ (\opt -> do
+ deprecatedOption "--stripEmptyParagraphs"
+ "Use +empty_paragraphs extension."
+ return opt{ optStripEmptyParagraphs = True }))
"" -- "Strip empty paragraphs"
, Option "" ["indented-code-classes"]
@@ -1472,7 +1475,7 @@ options =
, Option "m" ["latexmathml", "asciimathml"]
(OptArg
(\arg opt -> do
- deprecatedOption "--latexmathml, --asciimathml, -m"
+ deprecatedOption "--latexmathml, --asciimathml, -m" ""
return opt { optHTMLMathMethod = LaTeXMathML arg })
"URL")
"" -- "Use LaTeXMathML script in html output"
@@ -1480,7 +1483,7 @@ options =
, Option "" ["mimetex"]
(OptArg
(\arg opt -> do
- deprecatedOption "--mimetex"
+ deprecatedOption "--mimetex" ""
let url' = case arg of
Just u -> u ++ "?"
Nothing -> "/cgi-bin/mimetex.cgi?"
@@ -1491,7 +1494,7 @@ options =
, Option "" ["jsmath"]
(OptArg
(\arg opt -> do
- deprecatedOption "--jsmath"
+ deprecatedOption "--jsmath" ""
return opt { optHTMLMathMethod = JsMath arg})
"URL")
"" -- "Use jsMath for HTML math"
@@ -1499,7 +1502,7 @@ options =
, Option "" ["gladtex"]
(NoArg
(\opt -> do
- deprecatedOption "--gladtex"
+ deprecatedOption "--gladtex" ""
return opt { optHTMLMathMethod = GladTeX }))
"" -- "Use gladtex for HTML math"
@@ -1699,9 +1702,9 @@ splitField s =
baseWriterName :: String -> String
baseWriterName = takeWhile (\c -> c /= '+' && c /= '-')
-deprecatedOption :: String -> IO ()
-deprecatedOption o =
- runIO (report $ Deprecated o "") >>=
+deprecatedOption :: String -> String -> IO ()
+deprecatedOption o msg =
+ runIO (report $ Deprecated o msg) >>=
\r -> case r of
Right () -> return ()
Left e -> E.throwIO e
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 67ad2ad04..771898d70 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -152,6 +152,7 @@ data Extension =
| Ext_old_dashes -- ^ -- = em, - before number = en
| Ext_spaced_reference_links -- ^ Allow space between two parts of ref link
| Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup
+ | Ext_empty_paragraphs -- ^ Allow empty paragraphs
deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
-- | Extensions to be used with pandoc-flavored markdown.
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 1fac98b14..651d46753 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -534,7 +534,10 @@ bodyPartToBlocks (Paragraph pPr parparts)
then do modify $ \s -> s { docxDropCap = ils' }
return mempty
else do modify $ \s -> s { docxDropCap = mempty }
- return $ parStyleToTransform pPr $ para ils'
+ opts <- asks docxOptions
+ if isNull ils' && not (isEnabled Ext_empty_paragraphs opts)
+ then return mempty
+ else return $ parStyleToTransform pPr $ para ils'
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
let
kvs = case levelInfo of
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 24935fcd7..b0f5d38f9 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -68,9 +68,11 @@ import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Logging
-import Text.Pandoc.Options (Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans, Ext_raw_html),
- ReaderOptions (readerExtensions, readerStripComments),
- extensionEnabled)
+import Text.Pandoc.Options (
+ Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
+ Ext_native_spans, Ext_raw_html),
+ ReaderOptions (readerExtensions, readerStripComments),
+ extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Shared (addMetaField, crFilter, escapeURI, extractSpaces,
safeRead, underlineSpan)
@@ -575,7 +577,10 @@ pPlain = do
pPara :: PandocMonad m => TagParser m Blocks
pPara = do
contents <- trimInlines <$> pInTags "p" inline
- return $ B.para contents
+ (do guardDisabled Ext_empty_paragraphs
+ guard (B.isNull contents)
+ return mempty)
+ <|> return (B.para contents)
pFigure :: PandocMonad m => TagParser m Blocks
pFigure = try $ do
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index f80c2b59a..c9eaaf838 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -922,19 +922,22 @@ blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
captionNode <- withParaProp (pCustomStyle "ImageCaption")
$ blockToOpenXML opts (Para alt)
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
-blockToOpenXML' opts (Para lst) = do
- isFirstPara <- gets stFirstPara
- paraProps <- getParaProps $ case lst of
- [Math DisplayMath _] -> True
- _ -> False
- bodyTextStyle <- pStyleM "Body Text"
- let paraProps' = case paraProps of
- [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
- [] -> [mknode "w:pPr" [] [bodyTextStyle]]
- ps -> ps
- modify $ \s -> s { stFirstPara = False }
- contents <- inlinesToOpenXML opts lst
- return [mknode "w:p" [] (paraProps' ++ contents)]
+blockToOpenXML' opts (Para lst)
+ | null lst && not (isEnabled Ext_empty_paragraphs opts) = return []
+ | otherwise = do
+ isFirstPara <- gets stFirstPara
+ paraProps <- getParaProps $ case lst of
+ [Math DisplayMath _] -> True
+ _ -> False
+ bodyTextStyle <- pStyleM "Body Text"
+ let paraProps' = case paraProps of
+ [] | isFirstPara -> [mknode "w:pPr" []
+ [pCustomStyle "FirstParagraph"]]
+ [] -> [mknode "w:pPr" [] [bodyTextStyle]]
+ ps -> ps
+ modify $ \s -> s { stFirstPara = False }
+ contents <- inlinesToOpenXML opts lst
+ return [mknode "w:p" [] (paraProps' ++ contents)]
blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
blockToOpenXML' _ b@(RawBlock format str)
| format == Format "openxml" = return [ x | Elem x <- parseXML str ]
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 756bc3fd8..f25bbadfb 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -56,7 +56,7 @@ import qualified Data.Text.Lazy as TL
import Network.HTTP (urlEncode)
import Network.URI (URI (..), parseURIReference, unEscapeString)
import Numeric (showHex)
-import Text.Blaze.Internal (customLeaf)
+import Text.Blaze.Internal (customLeaf, MarkupM(Empty))
import Text.Blaze.Html hiding (contents)
import Text.Pandoc.Definition
import Text.Pandoc.Highlighting (formatHtmlBlock, formatHtmlInline, highlight,
@@ -658,6 +658,7 @@ blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) =
figure opts attr txt (s,tit)
blockToHtml opts (Para lst)
| isEmptyRaw lst = return mempty
+ | null lst && not (isEnabled Ext_empty_paragraphs opts) = return mempty
| otherwise = do
contents <- inlineListToHtml opts lst
return $ H.p contents
@@ -902,8 +903,7 @@ tableItemToHtml opts tag' align' item = do
let tag'' = if null alignStr
then tag'
else tag' ! attribs
- return $ (
- tag'' contents) >> nl opts
+ return $ tag'' contents >> nl opts
toListItems :: WriterOptions -> [Html] -> [Html]
toListItems opts items = map (toListItem opts) items ++ [nl opts]
@@ -911,9 +911,13 @@ toListItems opts items = map (toListItem opts) items ++ [nl opts]
toListItem :: WriterOptions -> Html -> Html
toListItem opts item = nl opts >> H.li item
-blockListToHtml :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Html
+blockListToHtml :: PandocMonad m
+ => WriterOptions -> [Block] -> StateT WriterState m Html
blockListToHtml opts lst =
- (mconcat . intersperse (nl opts)) <$> mapM (blockToHtml opts) lst
+ (mconcat . intersperse (nl opts) . filter nonempty)
+ <$> mapM (blockToHtml opts) lst
+ where nonempty (Empty _) = False
+ nonempty _ = True
-- | Convert list of Pandoc inline elements to HTML.
inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 702349636..8aa19dbb5 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -130,7 +130,6 @@ setFirstPara :: PandocMonad m => OD m ()
setFirstPara = modify $ \s -> s { stFirstPara = True }
inParagraphTags :: PandocMonad m => Doc -> OD m Doc
-inParagraphTags d | isEmpty d = return empty
inParagraphTags d = do
b <- gets stFirstPara
a <- if b
@@ -323,7 +322,8 @@ blockToOpenDocument o bs
else inParagraphTags =<< inlinesToOpenDocument o b
| Para [Image attr c (s,'f':'i':'g':':':t)] <- bs
= figure attr c s t
- | Para b <- bs = if null b
+ | Para b <- bs = if null b &&
+ not (isEnabled Ext_empty_paragraphs o)
then return empty
else inParagraphTags =<< inlinesToOpenDocument o b
| LineBlock b <- bs = blockToOpenDocument o $ linesToPara b