diff options
-rw-r--r-- | README | 37 | ||||
-rw-r--r-- | src/Text/Pandoc.hs | 19 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Templates.hs | 17 | ||||
-rw-r--r-- | src/pandoc.hs | 73 |
5 files changed, 73 insertions, 78 deletions
@@ -26,8 +26,8 @@ tables, flexible ordered lists, definition lists, delimited code blocks, superscript, subscript, strikeout, title blocks, automatic tables of contents, embedded LaTeX math, citations, and markdown inside HTML block elements. (These enhancements, described below under -[Pandoc's markdown](#pandocs-markdown), can be disabled using the `--strict` -option.) +[Pandoc's markdown](#pandocs-markdown), can be disabled using the `strict` +input or output format.) In contrast to most existing tools for converting markdown to HTML, which use regex substitutions, Pandoc has a modular design: it consists of a @@ -117,10 +117,11 @@ and `xunicode` (if `xelatex` is used). A user who wants a drop-in replacement for `Markdown.pl` may create a symbolic link to the `pandoc` executable called `hsmarkdown`. When -invoked under the name `hsmarkdown`, `pandoc` will behave as if the -`--strict` flag had been selected, and no command-line options will be -recognized. However, this approach does not work under Cygwin, due to -problems with its simulation of symbolic links. +invoked under the name `hsmarkdown`, `pandoc` will behave as if +invoked with `-f strict --email-obfuscation=references`, +and all command-line options will be treated as regular arguments. +However, this approach does not work under Cygwin, due to problems with +its simulation of symbolic links. [Cygwin]: http://www.cygwin.com/ [`iconv`]: http://www.gnu.org/software/libiconv/ @@ -192,12 +193,6 @@ General options Reader options -------------- -`--strict` -: Use strict markdown syntax, with no pandoc extensions or variants. - When the input format is HTML, this means that constructs that have no - equivalents in standard markdown (e.g. definition lists or strikeout - text) will be parsed as raw HTML. - `-R`, `--parse-raw` : Parse untranslatable HTML codes and LaTeX environments as raw HTML or LaTeX, instead of ignoring them. Affects only HTML and LaTeX @@ -403,8 +398,6 @@ Options affecting specific writers *none* leaves `mailto:` links as they are. *javascript* obfuscates them using javascript. *references* obfuscates them by printing their letters as decimal or hexadecimal character references. - If `--strict` is specified, *references* is used regardless of the - presence of this option. `--id-prefix`=*STRING* : Specify a prefix to be added to all automatically generated identifiers @@ -764,8 +757,8 @@ Pandoc's markdown Pandoc understands an extended and slightly revised version of John Gruber's [markdown] syntax. This document explains the syntax, noting differences from standard markdown. Except where noted, these -differences can be suppressed by specifying the `--strict` command-line -option. +differences can be suppressed by using the `strict` format instead +of `markdown`. Philosophy ---------- @@ -934,8 +927,8 @@ Standard markdown syntax does not require a blank line before a block quote. Pandoc does require this (except, of course, at the beginning of the document). The reason for the requirement is that it is all too easy for a `>` to end up at the beginning of a line by accident (perhaps through line -wrapping). So, unless `--strict` is used, the following does not produce -a nested block quote in pandoc: +wrapping). So, unless the `strict` format is used, the following does +not produce a nested block quote in pandoc: > This is a block quote. >> Nested. @@ -1291,7 +1284,7 @@ around "Third". Pandoc follows a simple rule: if the text is followed by a blank line, it is treated as a paragraph. Since "Second" is followed by a list, and not a blank line, it isn't treated as a paragraph. The fact that the list is followed by a blank line is irrelevant. (Note: -Pandoc works this way even when the `--strict` option is specified. This +Pandoc works this way even when the `strict` format is specified. This behavior is consistent with the official markdown syntax description, even though it is different from that of `Markdown.pl`.) @@ -1612,8 +1605,8 @@ which allows only the following characters to be backslash-escaped: \`*_{}[]()>#+-.! -(However, if the `--strict` option is supplied, the standard -markdown rule will be used.) +(However, if the `strict` format is used, the standard markdown rule +will be used.) A backslash-escaped space is parsed as a nonbreaking space. It will appear in TeX output as `~` and in HTML and XML as `\ ` or @@ -1847,7 +1840,7 @@ with blank lines, and start and end at the left margin. Within these blocks, everything is interpreted as HTML, not markdown; so (for example), `*` does not signify emphasis. -Pandoc behaves this way when `--strict` is specified; but by default, +Pandoc behaves this way when the `strict` format is used; but by default, pandoc interprets material between HTML block tags as markdown. Thus, for example, Pandoc will turn diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 06bfd128f..61e461b35 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -139,11 +139,13 @@ import Text.Pandoc.Templates import Text.Pandoc.Options import Text.Pandoc.Shared (safeRead) import Data.ByteString.Lazy (ByteString) +import Data.List (intercalate) import Data.Version (showVersion) import Text.JSON.Generic import Data.Set (Set) import qualified Data.Set as Set import Text.Parsec +import Text.Parsec.Error import Paths_pandoc (version) -- | Version number of pandoc library. @@ -160,10 +162,12 @@ parseFormatSpec = parse formatSpec "" formatName = many1 $ noneOf "-+" extMod = do polarity <- oneOf "-+" - name <- many1 $ noneOf "-+" - ext <- case safeRead name of + name <- many $ noneOf "-+" + ext <- case safeRead ("Ext_" ++ name) of Just n -> return n - Nothing -> unexpected $ "Unknown extension: " ++ name + Nothing + | name == "lhs" -> return Ext_literate_haskell + | otherwise -> fail $ "Unknown extension: " ++ name return $ case polarity of '-' -> Set.delete ext _ -> Set.insert ext @@ -172,6 +176,8 @@ parseFormatSpec = parse formatSpec "" readers :: [(String, ReaderOptions -> String -> Pandoc)] readers = [("native" , \_ -> readNative) ,("json" , \_ -> decodeJSON) + ,("strict" , \o -> readMarkdown + o{ readerExtensions = strictExtensions } ) ,("markdown" , readMarkdown) ,("rst" , readRST) ,("docbook" , readDocBook) @@ -215,6 +221,8 @@ writers = [ ,("texinfo" , PureStringWriter writeTexinfo) ,("man" , PureStringWriter writeMan) ,("markdown" , PureStringWriter writeMarkdown) + ,("strict" , PureStringWriter $ \o -> + writeMarkdown o{ writerExtensions = strictExtensions } ) ,("plain" , PureStringWriter writePlain) ,("rst" , PureStringWriter writeRST) ,("mediawiki" , PureStringWriter writeMediaWiki) @@ -224,10 +232,11 @@ writers = [ ,("asciidoc" , PureStringWriter writeAsciiDoc) ] +-- | Retrieve reader based on formatSpec (format+extensions). getReader :: String -> Either String (ReaderOptions -> String -> Pandoc) getReader s = case parseFormatSpec s of - Left e -> Left $ show e + Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] Right (readerName, setExts) -> case lookup readerName readers of Nothing -> Left $ "Unknown reader: " ++ readerName @@ -239,7 +248,7 @@ getReader s = getWriter :: String -> Either String Writer getWriter s = case parseFormatSpec s of - Left e -> Left $ show e + Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] Right (writerName, setExts) -> case lookup writerName writers of Nothing -> Left $ "Unknown writer: " ++ writerName diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 9f615867c..ad28b7c23 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -498,5 +498,6 @@ warn msg = do safeRead :: (Monad m, Read a) => String -> m a safeRead s = case reads s of - (d,[]):_ -> return d - _ -> fail $ "Could not read `" ++ s ++ "'" + (d,x):_ + | all isSpace x -> return d + _ -> fail $ "Could not read `" ++ s ++ "'" diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 061be29aa..899f6510a 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -86,15 +86,16 @@ import qualified Control.Exception.Extensible as E (try, IOException) getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first -> String -- ^ Name of writer -> IO (Either E.IOException String) -getDefaultTemplate _ "native" = return $ Right "" -getDefaultTemplate _ "json" = return $ Right "" -getDefaultTemplate _ "docx" = return $ Right "" -getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument" -getDefaultTemplate user "epub" = getDefaultTemplate user "html" getDefaultTemplate user writer = do - let format = takeWhile (/='+') writer -- strip off "+lhs" if present - let fname = "templates" </> "default" <.> format - E.try $ readDataFile user fname + let format = takeWhile (`notElem` "+-") writer -- strip off extensions + case format of + "native" -> return $ Right "" + "json" -> return $ Right "" + "docx" -> return $ Right "" + "odt" -> getDefaultTemplate user "opendocument" + "epub" -> return $ Right "" + _ -> let fname = "templates" </> "default" <.> format + in E.try $ readDataFile user fname data TemplateState = TemplateState Int [(String,String)] diff --git a/src/pandoc.hs b/src/pandoc.hs index 13e3e2021..12d7d74a2 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -44,7 +44,7 @@ import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt import Data.Char ( toLower ) -import Data.List ( intercalate, isSuffixOf, isPrefixOf ) +import Data.List ( intercalate, isPrefixOf ) import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) import System.IO ( stdout ) import System.IO.Error ( isDoesNotExistError ) @@ -58,7 +58,6 @@ import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 (toString) -import qualified Data.Set as Set import Text.CSL.Reference (Reference(..)) #if MIN_VERSION_base(4,4,0) #else @@ -99,8 +98,8 @@ wrapWords indent c = wrap' (c - indent) (c - indent) then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs -nonTextFormats :: [String] -nonTextFormats = ["odt","docx","epub"] +isTextFormat :: String -> Bool +isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub"] -- | Data structure for command line options. data Opt = Opt @@ -133,7 +132,6 @@ data Opt = Opt , optEPUBFonts :: [FilePath] -- ^ EPUB fonts to embed , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments - , optStrict :: Bool -- ^ Use strict markdown syntax , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , optWrapText :: Bool -- ^ Wrap text , optColumns :: Int -- ^ Line length in characters @@ -186,7 +184,6 @@ defaultOpts = Opt , optEPUBFonts = [] , optDumpArgs = False , optIgnoreArgs = False - , optStrict = False , optReferenceLinks = False , optWrapText = True , optColumns = 72 @@ -237,7 +234,10 @@ options = , Option "" ["strict"] (NoArg - (\opt -> return opt { optStrict = True } )) + (\opt -> do + err 59 $ "The --strict option has been removed.\n" ++ + "Use `strict' input or output format instead." + return opt )) "" -- "Disable markdown syntax extensions" , Option "R" ["parse-raw"] @@ -702,7 +702,7 @@ usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ (wrapWords 16 78 $ readers'names) ++ "\nOutput formats: " ++ - (wrapWords 16 78 $ writers'names ++ nonTextFormats) ++ "\nOptions:") + (wrapWords 16 78 $ writers'names) ++ "\nOptions:") where writers'names = map fst writers readers'names = map fst readers @@ -782,9 +782,10 @@ main = do ["Try " ++ prg ++ " --help for more information."] let defaultOpts' = if compatMode - then defaultOpts { optReader = "markdown" + then defaultOpts { optReader = "strict" , optWriter = "html" - , optStrict = True } + , optEmailObfuscation = + ReferenceObfuscation } else defaultOpts -- thread option data structure through all supplied option actions @@ -819,7 +820,6 @@ main = do , optEPUBFonts = epubFonts , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs - , optStrict = strict , optReferenceLinks = referenceLinks , optWrapText = wrap , optColumns = columns @@ -867,8 +867,8 @@ main = do let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" - let laTeXOutput = writerName' == "latex" || writerName' == "beamer" || - writerName' == "latex+lhs" || writerName' == "beamer+lhs" + let laTeXOutput = "latex" `isPrefixOf` writerName' || + "beamer" `isPrefixOf` writerName' when pdfOutput $ do -- make sure writer is latex or beamer @@ -882,11 +882,11 @@ main = do latexEngine ++ " is needed for pdf output." Just _ -> return () - reader <- case (lookup readerName' readers) of - Just r -> return r - Nothing -> err 7 ("Unknown reader: " ++ readerName') + reader <- case getReader readerName' of + Right r -> return r + Left e -> err 7 e - let standalone' = standalone || writerName' `elem` nonTextFormats || pdfOutput + let standalone' = standalone || not (isTextFormat writerName') || pdfOutput templ <- case templatePath of _ | not standalone' -> return "" @@ -896,8 +896,8 @@ main = do Left e -> throwIO e Right t -> return t Just tp -> do - -- strip off "+lhs" if present - let format = takeWhile (/='+') writerName' + -- strip off extensions + let format = takeWhile (`notElem` "+-") writerName' let tp' = case takeExtension tp of "" -> tp <.> format _ -> tp @@ -919,13 +919,13 @@ main = do return $ ("mathml-script", s) : variables _ -> return variables - variables'' <- case writerName' of - "dzslides" -> do + variables'' <- if "dzslides" `isPrefixOf` writerName' + then do dztempl <- readDataFile datadir $ "dzslides" </> "template.html" let dzcore = unlines $ dropWhile (not . isPrefixOf "<!-- {{{{ dzslides core") $ lines dztempl return $ ("dzslides-core", dzcore) : variables' - _ -> return variables' + else return variables' -- unescape reference ids, which may contain XML entities, so -- that we can do lookups with regular string equality @@ -942,13 +942,8 @@ main = do then "." else takeDirectory (head sources) - let defaultExts = if strict - then strictExtensions - else pandocExtensions - - let readerOpts = def{ readerExtensions = defaultExts - , readerSmart = smart || (texLigatures && - (laTeXOutput || writerName' == "context")) + let readerOpts = def{ readerSmart = smart || (texLigatures && + (laTeXOutput || "context" `isPrefixOf` writerName')) , readerStandalone = standalone' , readerParseRaw = parseRaw , readerColumns = columns @@ -972,13 +967,10 @@ main = do writerIgnoreNotes = False, writerNumberSections = numberSections, writerSectionDivs = sectionDivs, - writerExtensions = defaultExts, writerReferenceLinks = referenceLinks, writerWrapText = wrap, writerColumns = columns, - writerEmailObfuscation = if strict - then ReferenceObfuscation - else obfuscationMethod, + writerEmailObfuscation = obfuscationMethod, writerIdentifierPrefix = idPrefix, writerSourceDirectory = sourceDir, writerUserDataDir = datadir, @@ -997,7 +989,7 @@ main = do writerReferenceDocx = referenceDocx } - when (writerName' `elem` nonTextFormats&& outputFile == "-") $ + when (not (isTextFormat writerName') && outputFile == "-") $ err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++ "Specify an output file using the -o option." @@ -1022,7 +1014,7 @@ main = do let doc0 = foldr ($) doc transforms - doc1 <- if writerName' == "rtf" + doc1 <- if "rtf" `isPrefixOf` writerName' then bottomUpM rtfEmbedImage doc0 else return doc0 @@ -1050,12 +1042,11 @@ main = do writerFn "-" = UTF8.putStr writerFn f = UTF8.writeFile f - let mbwriter = lookup writerName' writers - case mbwriter of - Nothing -> err 9 ("Unknown writer: " ++ writerName') - Just (IOStringWriter f) -> f writerOptions doc2 >>= writerFn outputFile - Just (IOByteStringWriter f) -> f writerOptions doc2 >>= writeBinary - Just (PureStringWriter f) + case getWriter writerName' of + Left e -> err 9 e + Right (IOStringWriter f) -> f writerOptions doc2 >>= writerFn outputFile + Right (IOByteStringWriter f) -> f writerOptions doc2 >>= writeBinary + Right (PureStringWriter f) | pdfOutput -> do res <- tex2pdf latexEngine $ f writerOptions doc2 case res of |