From 746c92a41a4f1df5ac97246fe69555cef5419d00 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 28 Sep 2019 14:47:41 -0700 Subject: Raise error on unsupported extensions. Closes #4338. + An error is now raised if you try to specify (enable or disable) an extension that does not affect the given format, e.g. `docx+pipe_tables`. + The `--list-extensions[=FORMAT]` option now lists only extensions that affect the given FORMAT. + Text.Pandoc.Error: Add constructors `PandocUnknownReaderError`, `PandocUnknownWriterError`, `PandocUnsupportedExtensionError`. [API change] + Text.Pandoc.Extensions now exports `getAllExtensions`, which returns the extensions that affect a given format (whether enabled by default or not). [API change] + Text.Pandoc.Extensions: change type of `parseFormatSpec` from `Either ParseError (String, Extensions -> Extensions)` to `Either ParseError (String, [Extension], [Extension])` [API change]. + Text.Pandoc.Readers: change type of `getReader` so it returns a value in the PandocMonad instance rather than an Either [API change]. Exceptions for unknown formats and unsupported extensions are now raised by this function and need not be handled by the calling function. + Text.Pandoc.Writers: change type of `getWriter` so it returns a value in the PandocMonad instance rather than an Either [API change]. Exceptions for unknown formats and unsupported extensions are now raised by this function and need not be handled by the calling function. --- src/Text/Pandoc/App.hs | 11 +- src/Text/Pandoc/App/CommandLineOptions.hs | 23 ++++- src/Text/Pandoc/App/OutputSettings.hs | 21 +--- src/Text/Pandoc/Error.hs | 23 +++++ src/Text/Pandoc/Extensions.hs | 166 +++++++++++++++++++++++++++--- src/Text/Pandoc/Lua/Module/Pandoc.hs | 25 +++-- src/Text/Pandoc/Readers.hs | 26 +++-- src/Text/Pandoc/Writers.hs | 35 +++++-- 8 files changed, 259 insertions(+), 71 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 8bdf12b69..b96fd5306 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -155,16 +155,7 @@ convertWithOpts opts = do <> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`." _ -> return () - (reader, readerExts) <- - case getReader readerName of - Right (r, es) -> return (r :: Reader PandocIO, es) - Left e -> throwError $ PandocAppError e' - where e' = case readerName of - "pdf" -> e ++ - "\nPandoc can convert to PDF, but not from PDF." - "doc" -> e ++ - "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." - _ -> e + (reader :: Reader PandocIO, readerExts) <- getReader readerName let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" || diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index cffe69eca..abb73ec92 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -768,12 +768,25 @@ options = , Option "" ["list-extensions"] (OptArg (\arg _ -> do - let exts = getDefaultExtensions (fromMaybe "markdown" arg) - let showExt x = (if extensionEnabled x exts - then '+' - else '-') : drop 4 (show x) + let extList :: [Extension] + extList = [minBound..maxBound] + let allExts = + case arg of + Nothing -> extensionsFromList extList + Just fmt -> getAllExtensions fmt + let defExts = + case arg of + Nothing -> getDefaultExtensions + "markdown" + Just fmt -> getDefaultExtensions fmt + let showExt x = + (if extensionEnabled x defExts + then '+' + else if extensionEnabled x allExts + then '-' + else ' ') : drop 4 (show x) mapM_ (UTF8.hPutStrLn stdout . showExt) - ([minBound..maxBound] :: [Extension]) + [ex | ex <- extList, extensionEnabled ex allExts] exitSuccess ) "FORMAT") "" diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index 744f4591f..cfb6f7ec2 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -85,18 +85,12 @@ optToOutputSettings opts = do then writerName else map toLower $ baseWriterName writerName - (writer, writerExts) <- + (writer :: Writer PandocIO, writerExts) <- if ".lua" `isSuffixOf` format then return (TextWriter (\o d -> writeCustom writerName o d) :: Writer PandocIO, mempty) - else case getWriter (map toLower writerName) of - Left e -> throwError $ PandocAppError $ - if format == "pdf" - then e ++ "\n" ++ pdfIsNoWriterErrorMsg - else e - Right (w, es) -> return (w :: Writer PandocIO, es) - + else getWriter (map toLower writerName) let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput @@ -249,13 +243,6 @@ optToOutputSettings opts = do baseWriterName :: String -> String baseWriterName = takeWhile (\c -> c /= '+' && c /= '-') -pdfIsNoWriterErrorMsg :: String -pdfIsNoWriterErrorMsg = - "To create a pdf using pandoc, use " ++ - "-t latex|beamer|context|ms|html5" ++ - "\nand specify an output file with " ++ - ".pdf extension (-o filename.pdf)." - pdfWriterAndProg :: Maybe String -- ^ user-specified writer name -> Maybe String -- ^ user-specified pdf-engine -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) @@ -263,6 +250,8 @@ pdfWriterAndProg mWriter mEngine = do let panErr msg = liftIO $ E.throwIO $ PandocAppError msg case go mWriter mEngine of Right (writ, prog) -> return (writ, Just prog) + Left "pdf writer" -> liftIO $ E.throwIO $ + PandocUnknownWriterError "pdf" Left err -> panErr err where go Nothing Nothing = Right ("latex", "pdflatex") @@ -279,7 +268,7 @@ pdfWriterAndProg mWriter mEngine = do [] -> Left $ "pdf-engine " ++ eng ++ " not known" - engineForWriter "pdf" = Left pdfIsNoWriterErrorMsg + engineForWriter "pdf" = Left "pdf writer" engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of eng : _ -> Right eng [] -> Left $ diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index ae66162b3..113ab9d6e 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -54,6 +54,9 @@ data PandocError = PandocIOError String IOError | PandocMacroLoop String | PandocUTF8DecodingError String Int Word8 | PandocIpynbDecodingError String + | PandocUnknownReaderError String + | PandocUnknownWriterError String + | PandocUnsupportedExtensionError String String deriving (Show, Typeable, Generic) instance Exception PandocError @@ -112,6 +115,26 @@ handleError (Left e) = "The input must be a UTF-8 encoded text." PandocIpynbDecodingError w -> err 93 $ "ipynb decoding error: " ++ w + PandocUnknownReaderError r -> err 21 $ + "Unknown input format " ++ r ++ + case r of + "doc" -> "\nPandoc can convert from DOCX, but not from DOC." ++ + "\nTry using Word to save your DOC file as DOCX," ++ + " and convert that with pandoc." + "pdf" -> "\nPandoc can convert to PDF, but not from PDF." + _ -> "" + PandocUnknownWriterError w -> err 22 $ + "Unknown output format " ++ w ++ + case w of + "pdf" -> "To create a pdf using pandoc, use" ++ + " -t latex|beamer|context|ms|html5" ++ + "\nand specify an output file with " ++ + ".pdf extension (-o filename.pdf)." + "doc" -> "\nPandoc can convert to DOCX, but not from DOC." + _ -> "" + PandocUnsupportedExtensionError ext f -> err 23 $ + "The extension " ++ ext ++ " is not supported " ++ + "for " ++ f err :: Int -> String -> IO a err exitCode msg = do diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 1c787b7d3..d85b26200 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -26,6 +26,7 @@ module Text.Pandoc.Extensions ( Extension(..) , enableExtension , disableExtension , getDefaultExtensions + , getAllExtensions , pandocExtensions , plainExtensions , strictExtensions @@ -312,12 +313,12 @@ strictExtensions = extensionsFromList -- | Default extensions from format-describing string. getDefaultExtensions :: String -> Extensions -getDefaultExtensions "markdown_strict" = strictExtensions +getDefaultExtensions "markdown_strict" = strictExtensions getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions -getDefaultExtensions "markdown_mmd" = multimarkdownExtensions -getDefaultExtensions "markdown_github" = githubMarkdownExtensions -getDefaultExtensions "markdown" = pandocExtensions -getDefaultExtensions "ipynb" = +getDefaultExtensions "markdown_mmd" = multimarkdownExtensions +getDefaultExtensions "markdown_github" = githubMarkdownExtensions +getDefaultExtensions "markdown" = pandocExtensions +getDefaultExtensions "ipynb" = extensionsFromList [ Ext_all_symbols_escapable , Ext_pipe_tables @@ -379,16 +380,149 @@ getDefaultExtensions "opml" = pandocExtensions -- affects notes getDefaultExtensions _ = extensionsFromList [Ext_auto_identifiers] --- | Parse a format-specifying string into a markup format and a function that --- takes Extensions and enables and disables extensions as defined in the format --- spec. +allMarkdownExtensions :: Extensions +allMarkdownExtensions = + pandocExtensions <> + extensionsFromList + [ Ext_old_dashes + , Ext_angle_brackets_escapable + , Ext_lists_without_preceding_blankline + , Ext_four_space_rule + , Ext_spaced_reference_links + , Ext_hard_line_breaks + , Ext_ignore_line_breaks + , Ext_east_asian_line_breaks + , Ext_emoji + , Ext_tex_math_single_backslash + , Ext_tex_math_double_backslash + , Ext_markdown_attribute + , Ext_mmd_title_block + , Ext_abbreviations + , Ext_autolink_bare_uris + , Ext_mmd_link_attributes + , Ext_mmd_header_identifiers + , Ext_compact_definition_lists + , Ext_gutenberg + , Ext_smart + , Ext_literate_haskell + ] + + +-- | Get all valid extensions for a format. This is used +-- mainly in checking format specifications for validity. +getAllExtensions :: String -> Extensions +getAllExtensions f = universalExtensions <> getAll f + where + autoIdExtensions = extensionsFromList + [ Ext_auto_identifiers + , Ext_gfm_auto_identifiers + , Ext_ascii_identifiers + ] + universalExtensions = extensionsFromList + [ Ext_east_asian_line_breaks ] + getAll "markdown_strict" = allMarkdownExtensions + getAll "markdown_phpextra" = allMarkdownExtensions + getAll "markdown_mmd" = allMarkdownExtensions + getAll "markdown_github" = allMarkdownExtensions + getAll "markdown" = allMarkdownExtensions + getAll "ipynb" = allMarkdownExtensions + getAll "docx" = extensionsFromList + [ Ext_empty_paragraphs + , Ext_styles + ] + getAll "opendocument" = extensionsFromList + [ Ext_empty_paragraphs + , Ext_native_numbering + ] + getAll "odt" = getAll "opendocument" <> autoIdExtensions + getAll "muse" = autoIdExtensions <> + extensionsFromList + [ Ext_amuse ] + getAll "asciidoc" = autoIdExtensions + getAll "plain" = allMarkdownExtensions + getAll "gfm" = githubMarkdownExtensions <> + autoIdExtensions <> + extensionsFromList + [ Ext_raw_html + , Ext_raw_tex -- only supported in writer (for math) + , Ext_hard_line_breaks + , Ext_smart + ] + getAll "commonmark" = getAll "gfm" + getAll "org" = autoIdExtensions <> + extensionsFromList + [ Ext_citations + , Ext_smart + ] + getAll "html" = autoIdExtensions <> + extensionsFromList + [ Ext_native_divs + , Ext_line_blocks + , Ext_native_spans + , Ext_empty_paragraphs + , Ext_raw_html + , Ext_raw_tex + , Ext_task_lists + , Ext_tex_math_dollars + , Ext_tex_math_single_backslash + , Ext_tex_math_double_backslash + , Ext_literate_haskell + , Ext_epub_html_exts + ] + getAll "html4" = getAll "html" + getAll "html5" = getAll "html" + getAll "epub" = getAll "html" + getAll "epub2" = getAll "epub" + getAll "epub3" = getAll "epub" + getAll "latex" = autoIdExtensions <> + extensionsFromList + [ Ext_smart + , Ext_latex_macros + , Ext_raw_tex + , Ext_task_lists + , Ext_literate_haskell + ] + getAll "beamer" = getAll "latex" + getAll "context" = autoIdExtensions <> + extensionsFromList + [ Ext_smart + , Ext_raw_tex + , Ext_ntb + ] + getAll "textile" = autoIdExtensions <> + extensionsFromList + [ Ext_old_dashes + , Ext_smart + , Ext_raw_tex + ] + getAll "opml" = allMarkdownExtensions -- affects notes + getAll "twiki" = autoIdExtensions <> + extensionsFromList + [ Ext_smart ] + getAll "vimwiki" = autoIdExtensions + getAll "dokuwiki" = autoIdExtensions + getAll "tikiwiki" = autoIdExtensions + getAll "rst" = autoIdExtensions <> + extensionsFromList + [ Ext_smart + , Ext_literate_haskell + ] + getAll "mediawiki" = autoIdExtensions <> + extensionsFromList + [ Ext_smart ] + getAll _ = mempty + + +-- | Parse a format-specifying string into a markup format, +-- a set of extensions to enable, and a set of extensions to disable. parseFormatSpec :: String - -> Either ParseError (String, Extensions -> Extensions) + -> Either ParseError (String, [Extension], [Extension]) parseFormatSpec = parse formatSpec "" where formatSpec = do name <- formatName - extMods <- many extMod - return (name, \x -> foldl (flip ($)) x extMods) + (extsToEnable, extsToDisable) <- foldl (flip ($)) ([],[]) <$> + many extMod + return (name, reverse extsToEnable, reverse extsToDisable) formatName = many1 $ noneOf "-+" extMod = do polarity <- oneOf "-+" @@ -397,10 +531,12 @@ parseFormatSpec = parse formatSpec "" Just n -> return n Nothing | name == "lhs" -> return Ext_literate_haskell - | otherwise -> Prelude.fail $ "Unknown extension: " ++ name - return $ case polarity of - '-' -> disableExtension ext - _ -> enableExtension ext + | otherwise -> Prelude.fail $ + "Unknown extension: " ++ name + return $ \(extsToEnable, extsToDisable) -> + case polarity of + '+' -> (ext : extsToEnable, extsToDisable) + _ -> (extsToEnable, ext : extsToDisable) #ifdef DERIVE_JSON_VIA_TH $(deriveJSON defaultOptions ''Extension) diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 8950c4b7f..182008da7 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -16,6 +16,7 @@ module Text.Pandoc.Lua.Module.Pandoc import Prelude import Control.Monad (when) +import Control.Monad.Except (throwError) import Data.Default (Default (..)) import Data.Maybe (fromMaybe) import Data.Text (pack) @@ -34,6 +35,7 @@ import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BSL import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil +import Text.Pandoc.Error -- | Push the "pandoc" on the lua stack. Requires the `list` module to be -- loaded. @@ -60,17 +62,20 @@ walkBlock = walkElement readDoc :: String -> Optional String -> Lua NumResults readDoc content formatSpecOrNil = do let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil) - case getReader formatSpec of - Left s -> Lua.raiseError s -- Unknown reader - Right (reader, es) -> - case reader of - TextReader r -> do - res <- Lua.liftIO . runIO $ + res <- Lua.liftIO . runIO $ + getReader formatSpec >>= \(rdr,es) -> + case rdr of + TextReader r -> r def{ readerExtensions = es } (pack content) - case res of - Right pd -> (1 :: NumResults) <$ Lua.push pd -- success, push Pandoc - Left s -> Lua.raiseError (show s) -- error while reading - _ -> Lua.raiseError "Only string formats are supported at the moment." + _ -> throwError $ PandocSomeError $ + "Only textual formats are supported" + case res of + Right pd -> (1 :: NumResults) <$ Lua.push pd -- success, push Pandoc + Left (PandocUnknownReaderError f) -> Lua.raiseError $ + "Unknown reader: " ++ f + Left (PandocUnsupportedExtensionError e f) -> Lua.raiseError $ + "Extension " ++ e ++ " not supported for " ++ f + Left e -> Lua.raiseError $ show e -- | Pipes input through a command. pipeFn :: String diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 1e2f16d43..3ad479287 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -55,6 +55,7 @@ module Text.Pandoc.Readers ) where import Prelude +import Control.Monad (unless) import Control.Monad.Except (throwError) import Data.Aeson import qualified Data.ByteString.Lazy as BL @@ -134,15 +135,28 @@ readers = [ ("native" , TextReader readNative) ] -- | Retrieve reader, extensions based on formatSpec (format+extensions). -getReader :: PandocMonad m => String -> Either String (Reader m, Extensions) +getReader :: PandocMonad m => String -> m (Reader m, Extensions) getReader s = case parseFormatSpec s of - Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] - Right (readerName, setExts) -> + Left e -> throwError $ PandocAppError + $ intercalate "\n" [m | Message m <- errorMessages e] + Right (readerName, extsToEnable, extsToDisable) -> case lookup readerName readers of - Nothing -> Left $ "Unknown reader: " ++ readerName - Just r -> Right (r, setExts $ - getDefaultExtensions readerName) + Nothing -> throwError $ PandocUnknownReaderError + readerName + Just r -> do + let allExts = getAllExtensions readerName + let exts = foldr disableExtension + (foldr enableExtension + (getDefaultExtensions readerName) + extsToEnable) extsToDisable + mapM_ (\ext -> + unless (extensionEnabled ext allExts) $ + throwError $ + PandocUnsupportedExtensionError + (drop 4 $ show ext) readerName) + (extsToEnable ++ extsToDisable) + return (r, exts) -- | Read pandoc document from JSON format. readJSON :: PandocMonad m diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index ecf45839e..c88f860bb 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -70,6 +70,8 @@ module Text.Pandoc.Writers ) where import Prelude +import Control.Monad.Except (throwError) +import Control.Monad (unless) import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) @@ -78,6 +80,7 @@ import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Error import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.ConTeXt @@ -176,15 +179,29 @@ writers = [ ] -- | Retrieve writer, extensions based on formatSpec (format+extensions). -getWriter :: PandocMonad m => String -> Either String (Writer m, Extensions) -getWriter s - = case parseFormatSpec s of - Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] - Right (writerName, setExts) -> - case lookup writerName writers of - Nothing -> Left $ "Unknown writer: " ++ writerName - Just r -> Right (r, setExts $ - getDefaultExtensions writerName) +getWriter :: PandocMonad m => String -> m (Writer m, Extensions) +getWriter s = + case parseFormatSpec s of + Left e -> throwError $ PandocAppError + $ intercalate "\n" [m | Message m <- errorMessages e] + Right (writerName, extsToEnable, extsToDisable) -> + case lookup writerName writers of + Nothing -> throwError $ + PandocUnknownWriterError writerName + Just w -> do + let allExts = getAllExtensions writerName + let exts = foldr disableExtension + (foldr enableExtension + (getDefaultExtensions writerName) + extsToEnable) extsToDisable + mapM_ (\ext -> + unless (extensionEnabled ext allExts) $ + throwError $ + PandocUnsupportedExtensionError + (drop 4 $ show ext) writerName) + (extsToEnable ++ extsToDisable) + return (w, exts) + writeJSON :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeJSON _ = return . UTF8.toText . BL.toStrict . encode -- cgit v1.2.3