diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-12-04 10:31:06 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-12-04 14:56:57 -0800 |
commit | ae60e0196c5c12d358002cf3251dfebf07c66da6 (patch) | |
tree | 7c3bc1c605e6e65dbe4fe0071580e6ac92de5c00 /src/Text/Pandoc | |
parent | c58ecde93737aa68bd5dda9c4c72193dcaab3cf0 (diff) | |
download | pandoc-ae60e0196c5c12d358002cf3251dfebf07c66da6.tar.gz |
Add `empty_paragraphs` extension.
* Deprecate `--strip-empty-paragraphs` option. Instead we now
use an `empty_paragraphs` extension that can be enabled on
the reader or writer. By default, disabled.
* Add `Ext_empty_paragraphs` constructor to `Extension`.
* Revert "Docx reader: don't strip out empty paragraphs."
This reverts commit d6c58eb836f033a48955796de4d9ffb3b30e297b.
* Implement `empty_paragraphs` extension in docx reader and writer,
opendocument writer, html reader and writer.
* Add tests for `empty_paragraphs` extension.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/App.hs | 19 | ||||
-rw-r--r-- | src/Text/Pandoc/Extensions.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 13 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 29 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/OpenDocument.hs | 4 |
7 files changed, 52 insertions, 33 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 3fdbf1949..7d7d630ea 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -947,7 +947,10 @@ options = , Option "" ["strip-empty-paragraphs"] (NoArg - (\opt -> return opt{ optStripEmptyParagraphs = True })) + (\opt -> do + deprecatedOption "--stripEmptyParagraphs" + "Use +empty_paragraphs extension." + return opt{ optStripEmptyParagraphs = True })) "" -- "Strip empty paragraphs" , Option "" ["indented-code-classes"] @@ -1472,7 +1475,7 @@ options = , Option "m" ["latexmathml", "asciimathml"] (OptArg (\arg opt -> do - deprecatedOption "--latexmathml, --asciimathml, -m" + deprecatedOption "--latexmathml, --asciimathml, -m" "" return opt { optHTMLMathMethod = LaTeXMathML arg }) "URL") "" -- "Use LaTeXMathML script in html output" @@ -1480,7 +1483,7 @@ options = , Option "" ["mimetex"] (OptArg (\arg opt -> do - deprecatedOption "--mimetex" + deprecatedOption "--mimetex" "" let url' = case arg of Just u -> u ++ "?" Nothing -> "/cgi-bin/mimetex.cgi?" @@ -1491,7 +1494,7 @@ options = , Option "" ["jsmath"] (OptArg (\arg opt -> do - deprecatedOption "--jsmath" + deprecatedOption "--jsmath" "" return opt { optHTMLMathMethod = JsMath arg}) "URL") "" -- "Use jsMath for HTML math" @@ -1499,7 +1502,7 @@ options = , Option "" ["gladtex"] (NoArg (\opt -> do - deprecatedOption "--gladtex" + deprecatedOption "--gladtex" "" return opt { optHTMLMathMethod = GladTeX })) "" -- "Use gladtex for HTML math" @@ -1699,9 +1702,9 @@ splitField s = baseWriterName :: String -> String baseWriterName = takeWhile (\c -> c /= '+' && c /= '-') -deprecatedOption :: String -> IO () -deprecatedOption o = - runIO (report $ Deprecated o "") >>= +deprecatedOption :: String -> String -> IO () +deprecatedOption o msg = + runIO (report $ Deprecated o msg) >>= \r -> case r of Right () -> return () Left e -> E.throwIO e diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 67ad2ad04..771898d70 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -152,6 +152,7 @@ data Extension = | Ext_old_dashes -- ^ -- = em, - before number = en | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup + | Ext_empty_paragraphs -- ^ Allow empty paragraphs deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) -- | Extensions to be used with pandoc-flavored markdown. diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 1fac98b14..651d46753 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -534,7 +534,10 @@ bodyPartToBlocks (Paragraph pPr parparts) then do modify $ \s -> s { docxDropCap = ils' } return mempty else do modify $ \s -> s { docxDropCap = mempty } - return $ parStyleToTransform pPr $ para ils' + opts <- asks docxOptions + if isNull ils' && not (isEnabled Ext_empty_paragraphs opts) + then return mempty + else return $ parStyleToTransform pPr $ para ils' bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do let kvs = case levelInfo of diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 24935fcd7..b0f5d38f9 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -68,9 +68,11 @@ import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Logging -import Text.Pandoc.Options (Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans, Ext_raw_html), - ReaderOptions (readerExtensions, readerStripComments), - extensionEnabled) +import Text.Pandoc.Options ( + Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs, + Ext_native_spans, Ext_raw_html), + ReaderOptions (readerExtensions, readerStripComments), + extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared (addMetaField, crFilter, escapeURI, extractSpaces, safeRead, underlineSpan) @@ -575,7 +577,10 @@ pPlain = do pPara :: PandocMonad m => TagParser m Blocks pPara = do contents <- trimInlines <$> pInTags "p" inline - return $ B.para contents + (do guardDisabled Ext_empty_paragraphs + guard (B.isNull contents) + return mempty) + <|> return (B.para contents) pFigure :: PandocMonad m => TagParser m Blocks pFigure = try $ do diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index f80c2b59a..c9eaaf838 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -922,19 +922,22 @@ blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -blockToOpenXML' opts (Para lst) = do - isFirstPara <- gets stFirstPara - paraProps <- getParaProps $ case lst of - [Math DisplayMath _] -> True - _ -> False - bodyTextStyle <- pStyleM "Body Text" - let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [bodyTextStyle]] - ps -> ps - modify $ \s -> s { stFirstPara = False } - contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps' ++ contents)] +blockToOpenXML' opts (Para lst) + | null lst && not (isEnabled Ext_empty_paragraphs opts) = return [] + | otherwise = do + isFirstPara <- gets stFirstPara + paraProps <- getParaProps $ case lst of + [Math DisplayMath _] -> True + _ -> False + bodyTextStyle <- pStyleM "Body Text" + let paraProps' = case paraProps of + [] | isFirstPara -> [mknode "w:pPr" [] + [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] + ps -> ps + modify $ \s -> s { stFirstPara = False } + contents <- inlinesToOpenXML opts lst + return [mknode "w:p" [] (paraProps' ++ contents)] blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 756bc3fd8..f25bbadfb 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -56,7 +56,7 @@ import qualified Data.Text.Lazy as TL import Network.HTTP (urlEncode) import Network.URI (URI (..), parseURIReference, unEscapeString) import Numeric (showHex) -import Text.Blaze.Internal (customLeaf) +import Text.Blaze.Internal (customLeaf, MarkupM(Empty)) import Text.Blaze.Html hiding (contents) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatHtmlBlock, formatHtmlInline, highlight, @@ -658,6 +658,7 @@ blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = figure opts attr txt (s,tit) blockToHtml opts (Para lst) | isEmptyRaw lst = return mempty + | null lst && not (isEnabled Ext_empty_paragraphs opts) = return mempty | otherwise = do contents <- inlineListToHtml opts lst return $ H.p contents @@ -902,8 +903,7 @@ tableItemToHtml opts tag' align' item = do let tag'' = if null alignStr then tag' else tag' ! attribs - return $ ( - tag'' contents) >> nl opts + return $ tag'' contents >> nl opts toListItems :: WriterOptions -> [Html] -> [Html] toListItems opts items = map (toListItem opts) items ++ [nl opts] @@ -911,9 +911,13 @@ toListItems opts items = map (toListItem opts) items ++ [nl opts] toListItem :: WriterOptions -> Html -> Html toListItem opts item = nl opts >> H.li item -blockListToHtml :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Html +blockListToHtml :: PandocMonad m + => WriterOptions -> [Block] -> StateT WriterState m Html blockListToHtml opts lst = - (mconcat . intersperse (nl opts)) <$> mapM (blockToHtml opts) lst + (mconcat . intersperse (nl opts) . filter nonempty) + <$> mapM (blockToHtml opts) lst + where nonempty (Empty _) = False + nonempty _ = True -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 702349636..8aa19dbb5 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -130,7 +130,6 @@ setFirstPara :: PandocMonad m => OD m () setFirstPara = modify $ \s -> s { stFirstPara = True } inParagraphTags :: PandocMonad m => Doc -> OD m Doc -inParagraphTags d | isEmpty d = return empty inParagraphTags d = do b <- gets stFirstPara a <- if b @@ -323,7 +322,8 @@ blockToOpenDocument o bs else inParagraphTags =<< inlinesToOpenDocument o b | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs = figure attr c s t - | Para b <- bs = if null b + | Para b <- bs = if null b && + not (isEnabled Ext_empty_paragraphs o) then return empty else inParagraphTags =<< inlinesToOpenDocument o b | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b |