aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-02-28 20:28:16 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2019-02-28 21:50:09 -0800
commit77faccb505992c944cd1b92f50e4e00d2927682b (patch)
treee9c80f4f48974860378fa3ea9fd33b767625a200
parent7aeabd8430bce7253d34a9a4baf26bbf71e9869e (diff)
downloadpandoc-77faccb505992c944cd1b92f50e4e00d2927682b.tar.gz
Shared: add filterIpynbOutput. [API change]
Add command line option `--ipynb-output=all|none|best`. Closes #5339.
-rw-r--r--MANUAL.txt9
-rw-r--r--src/Text/Pandoc/App.hs29
-rw-r--r--src/Text/Pandoc/App/CommandLineOptions.hs12
-rw-r--r--src/Text/Pandoc/App/Opt.hs2
-rw-r--r--src/Text/Pandoc/Shared.hs29
5 files changed, 71 insertions, 10 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 66e2d25f8..d92a7afe5 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -1192,6 +1192,15 @@ Options affecting specific writers {.options}
the EPUB-specific contents. The default is `EPUB`. To put
the EPUB contents in the top level, use an empty string.
+`--ipynb-output=all|none|best`
+
+: Determines how ipynb output cells are treated. `all` means
+ that all of the data formats included in the original are
+ preserved. `none` means that the contents of data cells
+ are omitted. `best` causes pandoc to try to pick the
+ richest data block in each output cell that is compatible
+ with the output format. The default is `best`.
+
`--pdf-engine=`*PROGRAM*
: Use the specified engine when producing PDF output.
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 4e4e3211c..cf70f3971 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -73,7 +73,7 @@ import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Readers.Markdown (yamlToMeta)
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
- headerShift, isURI, tabFilter, uriPathToPath)
+ headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput)
import qualified Text.Pandoc.UTF8 as UTF8
#ifndef _WINDOWS
import System.Posix.IO (stdOutput)
@@ -247,8 +247,20 @@ convertWithOpts opts = do
(writerExtensions writerOptions) &&
writerWrapText writerOptions == WrapPreserve)
then (eastAsianLineBreakFilter :)
- else id) $
- []
+ else id) .
+ (case optIpynbOutput opts of
+ "all" -> id
+ "none" -> (filterIpynbOutput Nothing :)
+ "best" -> (filterIpynbOutput (Just $
+ if htmlFormat writerName
+ then Format "html"
+ else
+ case writerName of
+ "latex" -> Format "latex"
+ "beamer" -> Format "latex"
+ _ -> Format writerName) :)
+ _ -> id) -- should not happen
+ $ []
let sourceToDoc :: [FilePath] -> PandocIO Pandoc
sourceToDoc sources' =
@@ -293,15 +305,12 @@ convertWithOpts opts = do
TL.unpack (TE.decodeUtf8With TE.lenientDecode err')
Nothing -> do
- let htmlFormat = format `elem`
- ["html","html4","html5","s5","slidy",
- "slideous","dzslides","revealjs"]
- addNl = if standalone
+ let addNl = if standalone
then id
else (<> T.singleton '\n')
output <- addNl <$> f writerOptions doc
writerFn eol outputFile =<<
- if optSelfContained opts && htmlFormat
+ if optSelfContained opts && htmlFormat writerName
-- TODO not maximally efficient; change type
-- of makeSelfContained so it works w/ Text
then T.pack <$> makeSelfContained (T.unpack output)
@@ -309,6 +318,10 @@ convertWithOpts opts = do
type Transform = Pandoc -> Pandoc
+htmlFormat :: String -> Bool
+htmlFormat = (`elem` ["html","html4","html5","s5","slidy",
+ "slideous","dzslides","revealjs"])
+
isTextFormat :: String -> Bool
isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx"]
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 6ae167ebf..be93357cb 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -643,7 +643,17 @@ options =
"NUMBER")
"" -- "Header level at which to split chapters in EPUB"
- , Option "" ["pdf-engine"]
+ , Option "" ["ipynb-output"]
+ (ReqArg
+ (\arg opt ->
+ if arg `notElem` ["all","none","best"]
+ then E.throwIO $ PandocOptionError $
+ "ipynb-output must be all, none, or best"
+ else return opt { optIpynbOutput = arg })
+ "all|none|best")
+ "" -- "Starting number for sections, subsections, etc."
+
+ , Option "" ["pdf-engine"]
(ReqArg
(\arg opt -> do
let b = takeBaseName arg
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index 698fdc96b..59405cbeb 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -123,6 +123,7 @@ data Opt = Opt
, optFileScope :: Bool -- ^ Parse input files before combining
, optTitlePrefix :: Maybe String -- ^ Prefix for title
, optCss :: [FilePath] -- ^ CSS files to link to
+ , optIpynbOutput :: String -- ^ Maybe f to use best data; Nothing to omit
, optIncludeBeforeBody :: [FilePath] -- ^ Files to include before
, optIncludeAfterBody :: [FilePath] -- ^ Files to include after body
, optIncludeInHeader :: [FilePath] -- ^ Files to include in header
@@ -196,6 +197,7 @@ defaultOpts = Opt
, optFileScope = False
, optTitlePrefix = Nothing
, optCss = []
+ , optIpynbOutput = "best"
, optIncludeBeforeBody = []
, optIncludeAfterBody = []
, optIncludeInHeader = []
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index db00d5aa4..992e57b6a 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -86,6 +86,7 @@ module Text.Pandoc.Shared (
eastAsianLineBreakFilter,
underlineSpan,
splitSentences,
+ filterIpynbOutput,
-- * TagSoup HTML handling
renderTags',
-- * File handling
@@ -122,12 +123,13 @@ import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
generalCategory, GeneralCategory(NonSpacingMark,
SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
import Data.Data (Data, Typeable)
-import Data.List (find, intercalate, intersperse, stripPrefix)
+import Data.List (find, intercalate, intersperse, stripPrefix, sortBy)
import qualified Data.Map as M
import Data.Maybe (mapMaybe)
import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
import qualified Data.Set as Set
import qualified Data.Text as T
+import Data.Ord (comparing)
import Data.Version (showVersion)
import Network.URI (URI (uriScheme), escapeURIString, parseURI)
import Paths_pandoc (version)
@@ -689,6 +691,31 @@ splitSentences xs =
let (sent, rest) = breakSentence xs
in if null rest then [sent] else sent : splitSentences rest
+-- | Process ipynb output cells. If mode is Nothing,
+-- remove all output. If mode is Just format, select
+-- best output for the format.
+filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc
+filterIpynbOutput mode = walk go
+ where go (Div (ident, ("output":os), kvs) bs) =
+ case mode of
+ Nothing -> Div (ident, ("output":os), kvs) []
+ Just fmt -> Div (ident, ("output":os), kvs) $
+ take 1 $ sortBy (comparing rank) bs
+ where
+ rank (RawBlock (Format "html") _)
+ | fmt == Format "html" = (1 :: Int)
+ | fmt == Format "markdown" = 2
+ | otherwise = 3
+ rank (RawBlock (Format "latex") _)
+ | fmt == Format "latex" = 1
+ | fmt == Format "markdown" = 2
+ | otherwise = 3
+ rank (RawBlock f _)
+ | fmt == f = 1
+ | otherwise = 3
+ rank _ = 2
+ go x = x
+
--
-- TagSoup HTML handling
--