diff options
-rw-r--r-- | Benchmark.hs | 36 | ||||
-rw-r--r-- | README | 6 | ||||
-rw-r--r-- | pandoc.cabal | 2 | ||||
-rw-r--r-- | src/Text/Pandoc.hs | 45 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 95 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 75 | ||||
-rw-r--r-- | src/pandoc.hs | 74 | ||||
-rw-r--r-- | tests/markdown-reader-more.native | 2 | ||||
-rw-r--r-- | tests/markdown-reader-more.txt | 3 |
11 files changed, 142 insertions, 215 deletions
diff --git a/Benchmark.hs b/Benchmark.hs new file mode 100644 index 000000000..333b908be --- /dev/null +++ b/Benchmark.hs @@ -0,0 +1,36 @@ +import Text.Pandoc +import Text.Pandoc.Shared (readDataFile) +import Criterion.Main +import Data.List (isSuffixOf) + +readerBench :: Pandoc + -> (String, ParserState -> String -> Pandoc) + -> Benchmark +readerBench doc (name, reader) = + let writer = case lookup name writers of + Just w -> w + Nothing -> error $ "Could not find writer for " ++ name + inp = writer defaultWriterOptions{ writerWrapText = True + , writerLiterateHaskell = + "+lhs" `isSuffixOf` name } doc + in bench (name ++ " reader") $ whnf + (reader defaultParserState{stateSmart = True + , stateStandalone = True + , stateLiterateHaskell = + "+lhs" `isSuffixOf` name }) inp + +writerBench :: Pandoc + -> (String, WriterOptions -> Pandoc -> a) + -> Benchmark +writerBench doc (name, writer) = bench (name ++ " writer") $ whnf + (writer defaultWriterOptions{ + writerWrapText = True + , writerLiterateHaskell = "+lhs" `isSuffixOf` name }) doc + +main = do + inp <- readDataFile (Just ".") "README" + let ps = defaultParserState{ stateSmart = True } + let doc = readMarkdown ps inp + let readerBs = map (readerBench doc) readers + defaultMain $ map (writerBench doc) writers ++ readerBs + @@ -270,12 +270,6 @@ Options : Disable text wrapping in output. By default, text is wrapped appropriately for the output format. -`--sanitize-html` -: Sanitizes HTML (in markdown or HTML input) using a whitelist. - Unsafe tags are replaced by HTML comments; unsafe attributes - are omitted. URIs in links and images are also checked against a - whitelist of URI schemes. - `--email-obfuscation=`*none|javascript|references* : Specify a method for obfuscating `mailto:` links in HTML documents. *none* leaves `mailto:` links as they are. *javascript* obfuscates diff --git a/pandoc.cabal b/pandoc.cabal index c2cc7268a..727bae405 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -74,6 +74,8 @@ Extra-Source-Files: -- code to create pandoc.1 man page MakeManPage.hs, manpage.template, + -- benchmarking + Benchmark.hs, -- tests tests/bodybg.gif, tests/html-reader.html, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index ab1e3cd03..7d3468461 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -57,6 +57,9 @@ module Text.Pandoc ( -- * Definitions module Text.Pandoc.Definition + -- * Lists of readers and writers + , readers + , writers -- * Readers: converting /to/ Pandoc format , readMarkdown , readRST @@ -127,8 +130,50 @@ import Text.Pandoc.Templates import Text.Pandoc.Parsing import Text.Pandoc.Shared import Data.Version (showVersion) +import Text.JSON.Generic import Paths_pandoc (version) -- | Version number of pandoc library. pandocVersion :: String pandocVersion = showVersion version + +-- | Association list of formats and readers. +readers :: [(String, ParserState -> String -> Pandoc)] +readers = [("native" , \_ -> read) + ,("json" , \_ -> decodeJSON) + ,("markdown" , readMarkdown) + ,("markdown+lhs" , readMarkdown) + ,("rst" , readRST) + ,("textile" , readTextile) -- TODO : textile+lhs + ,("rst+lhs" , readRST) + ,("html" , readHtml) + ,("latex" , readLaTeX) + ,("latex+lhs" , readLaTeX) + ] + +-- | Association list of formats and writers (omitting the +-- binary writers, odt and epub). +writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] +writers = [("native" , writeNative) + ,("json" , \_ -> encodeJSON) + ,("html" , writeHtmlString) + ,("html+lhs" , writeHtmlString) + ,("s5" , writeHtmlString) + ,("slidy" , writeHtmlString) + ,("docbook" , writeDocbook) + ,("opendocument" , writeOpenDocument) + ,("latex" , writeLaTeX) + ,("latex+lhs" , writeLaTeX) + ,("context" , writeConTeXt) + ,("texinfo" , writeTexinfo) + ,("man" , writeMan) + ,("markdown" , writeMarkdown) + ,("markdown+lhs" , writeMarkdown) + ,("plain" , writePlain) + ,("rst" , writeRST) + ,("rst+lhs" , writeRST) + ,("mediawiki" , writeMediaWiki) + ,("textile" , writeTextile) + ,("rtf" , writeRTF) + ,("org" , writeOrg) + ] diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 1b206e4c7..48c6aa70d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -586,7 +586,6 @@ data ParserState = ParserState { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? stateParserContext :: ParserContext, -- ^ Inside list? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? stateKeys :: KeyTable, -- ^ List of reference keys stateCitations :: [String], -- ^ List of available citations stateNotes :: NoteTable, -- ^ List of notes @@ -614,7 +613,6 @@ defaultParserState = ParserState { stateParseRaw = False, stateParserContext = NullState, stateQuoteContext = NoQuote, - stateSanitizeHTML = False, stateKeys = M.empty, stateCitations = [], stateNotes = [], diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f05fdd57b..ed026eb49 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -40,7 +40,6 @@ module Text.Pandoc.Readers.HTML ( extractTagType, htmlBlockElement, htmlComment, - unsanitaryURI ) where import Text.ParserCombinators.Parsec @@ -51,7 +50,6 @@ import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Data.Maybe ( fromMaybe ) import Data.List ( isPrefixOf, isSuffixOf, intercalate ) import Data.Char ( toLower, isAlphaNum ) -import Network.URI ( parseURIReference, URI (..) ) import Control.Monad ( liftM, when ) -- | Convert HTML-formatted string to 'Pandoc' document. @@ -85,36 +83,6 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div", "dt", "frameset", "li", "tbody", "td", "tfoot", "th", "thead", "tr", "script", "style"] -sanitaryTags :: [[Char]] -sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big", - "blockquote", "br", "button", "caption", "center", - "cite", "code", "col", "colgroup", "dd", "del", "dfn", - "dir", "div", "dl", "dt", "em", "fieldset", "font", - "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", - "i", "img", "input", "ins", "kbd", "label", "legend", - "li", "map", "menu", "ol", "optgroup", "option", "p", - "pre", "q", "s", "samp", "select", "small", "span", - "strike", "strong", "sub", "sup", "table", "tbody", - "td", "textarea", "tfoot", "th", "thead", "tr", "tt", - "u", "ul", "var"] - -sanitaryAttributes :: [[Char]] -sanitaryAttributes = ["abbr", "accept", "accept-charset", - "accesskey", "action", "align", "alt", "axis", - "border", "cellpadding", "cellspacing", "char", - "charoff", "charset", "checked", "cite", "class", - "clear", "cols", "colspan", "color", "compact", - "coords", "datetime", "dir", "disabled", - "enctype", "for", "frame", "headers", "height", - "href", "hreflang", "hspace", "id", "ismap", - "label", "lang", "longdesc", "maxlength", "media", - "method", "multiple", "name", "nohref", "noshade", - "nowrap", "prompt", "readonly", "rel", "rev", - "rows", "rowspan", "rules", "scope", "selected", - "shape", "size", "span", "src", "start", - "summary", "tabindex", "target", "title", "type", - "usemap", "valign", "value", "vspace", "width"] - -- taken from HXT and extended closes :: String -> String -> Bool @@ -153,41 +121,6 @@ _ `closes` _ = False -- HTML utility functions -- --- | Returns @True@ if sanitization is specified and the specified tag is --- not on the sanitized tag list. -unsanitaryTag :: [Char] - -> GenParser tok ParserState Bool -unsanitaryTag tag = do - st <- getState - return $ stateSanitizeHTML st && tag `notElem` sanitaryTags - --- | returns @True@ if sanitization is specified and the specified attribute --- is not on the sanitized attribute list. -unsanitaryAttribute :: ([Char], String, t) - -> GenParser tok ParserState Bool -unsanitaryAttribute (attr, val, _) = do - st <- getState - return $ stateSanitizeHTML st && - (attr `notElem` sanitaryAttributes || - (attr `elem` ["href","src"] && unsanitaryURI val)) - --- | Returns @True@ if the specified URI is potentially a security risk. -unsanitaryURI :: String -> Bool -unsanitaryURI u = - let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:", - "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:", - "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:", - "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:", - "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:", - "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:", - "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:", - "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", - "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", - "snews:", "webcal:", "ymsgr:"] - in case parseURIReference (escapeURI u) of - Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes - Nothing -> True - -- | Read blocks until end tag. blocksTilEnd :: String -> GenParser Char ParserState [Block] blocksTilEnd tag = do @@ -240,10 +173,7 @@ anyHtmlTag = try $ do char '>' let result = "<" ++ tag ++ concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">" - unsanitary <- unsanitaryTag tag - if unsanitary - then return $ "<!-- unsafe HTML removed -->" - else return result + return result anyHtmlEndTag :: GenParser Char ParserState [Char] anyHtmlEndTag = try $ do @@ -255,10 +185,7 @@ anyHtmlEndTag = try $ do spaces char '>' let result = "</" ++ tag ++ ">" - unsanitary <- unsanitaryTag tag - if unsanitary - then return $ "<!-- unsafe HTML removed -->" - else return result + return result htmlTag :: Bool -> String @@ -294,16 +221,10 @@ quoted quoteChar = do (many (noneOf [quoteChar])) return (result, [quoteChar]) -nullAttribute :: ([Char], [Char], [Char]) -nullAttribute = ("", "", "") - htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char]) htmlAttribute = do attr <- htmlRegularAttribute <|> htmlMinimizedAttribute - unsanitary <- unsanitaryAttribute attr - if unsanitary - then return nullAttribute - else return attr + return attr -- minimized boolean attribute htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char]) @@ -364,10 +285,7 @@ htmlScript = try $ do lookAhead $ htmlOpenTag "script" open <- anyHtmlTag rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script") - st <- getState - if stateSanitizeHTML st && not ("script" `elem` sanitaryTags) - then return "<!-- unsafe HTML removed -->" - else return $ open ++ rest ++ "</script>" + return $ open ++ rest ++ "</script>" scriptChunk :: GenParser Char ParserState [Char] scriptChunk = jsComment <|> jsString <|> jsChars @@ -399,10 +317,7 @@ htmlStyle = try $ do lookAhead $ htmlOpenTag "style" open <- anyHtmlTag rest <- manyTill anyChar (htmlEndTag "style") - st <- getState - if stateSanitizeHTML st && not ("style" `elem` sanitaryTags) - then return "<!-- unsafe HTML removed -->" - else return $ open ++ rest ++ "</style>" + return $ open ++ rest ++ "</style>" htmlBlockElement :: GenParser Char ParserState [Char] htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ] diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 406809dfc..0bc13d2dd 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -119,20 +119,15 @@ anyEnvironment = try $ do -- | Process LaTeX preamble, extracting metadata. processLaTeXPreamble :: GenParser Char ParserState () -processLaTeXPreamble = try $ manyTill - (choice [bibliographic, comment, unknownCommand, nullBlock]) - (try (string "\\begin{document}")) >> - spaces +processLaTeXPreamble = + skipMany $ notFollowedBy' anyEnvironment >> block -- | Parse LaTeX and return 'Pandoc'. parseLaTeX :: GenParser Char ParserState Pandoc parseLaTeX = do - optional processLaTeXPreamble -- preamble might not be present (fragment) - spaces - blocks <- parseBlocks spaces - optional $ try (string "\\end{document}" >> many anyChar) - -- might not be present (fragment) + blocks <- try (processLaTeXPreamble >> spaces >> environment "document") + <|> many block spaces eof state <- getState @@ -420,8 +415,8 @@ ignore = try $ do unknownCommand :: GenParser Char ParserState Block unknownCommand = try $ do - notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", - "document"] + spaces + notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"] state <- getState when (stateParserContext state == ListItemState) $ notFollowedBy' (string "\\item") diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 59f825808..a68741bda 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -41,7 +41,7 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' ) import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType, - htmlBlockElement, htmlComment, unsanitaryURI ) + htmlBlockElement, htmlComment ) import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec import Control.Monad (when, liftM, guard) @@ -202,9 +202,17 @@ referenceKey = try $ do lab <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL excludes = many $ - optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' ')) - src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" + let nl = char '\n' >> notFollowedBy blankline >> return ' ' + let sourceURL = liftM unwords $ many $ try $ do + notFollowedBy' referenceTitle + skipMany (oneOf " \t") + optional nl + notFollowedBy' reference + skipMany (oneOf " \t") + many1 (noneOf " \t\n") + let betweenAngles = try $ char '<' >> + manyTill (noneOf ">\n" <|> nl) (char '>') + src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle blanklines endPos <- getPosition @@ -650,16 +658,14 @@ isHtmlOrBlank _ = False para :: GenParser Char ParserState Block para = try $ do - result <- many1 inline - if all isHtmlOrBlank result - then fail "treat as raw HTML" - else return () - newline - blanklines <|> do st <- getState - if stateStrict st - then lookAhead (blockQuote <|> header) >> return "" - else pzero - return $ Para $ normalizeSpaces result + result <- liftM normalizeSpaces $ many1 inline + guard $ not . all isHtmlOrBlank $ result + option (Plain result) $ try $ do + newline + blanklines <|> + (getState >>= guard . stateStrict >> + lookAhead (blockQuote <|> header) >> return "") + return $ Para result plain :: GenParser Char ParserState Block plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces @@ -1085,15 +1091,13 @@ endline = try $ do newline notFollowedBy blankline st <- getState - if stateStrict st - then do notFollowedBy emailBlockQuoteStart - notFollowedBy (char '#') -- atx header - else return () + when (stateStrict st) $ do + notFollowedBy emailBlockQuoteStart + notFollowedBy (char '#') -- atx header -- parse potential list-starts differently if in a list: - if stateParserContext st == ListItemState - then notFollowedBy' (bulletListStart <|> - (anyOrderedListStart >> return ())) - else return () + when (stateParserContext st == ListItemState) $ do + notFollowedBy' bulletListStart + notFollowedBy' anyOrderedListStart return Space -- @@ -1118,9 +1122,16 @@ source = source' :: GenParser Char st (String, [Char]) source' = do skipSpaces - let sourceURL excludes = many $ - optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' ')) - src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" + let nl = char '\n' >>~ notFollowedBy blankline + let sourceURL = liftM unwords $ many $ try $ do + notFollowedBy' linkTitle + skipMany (oneOf " \t") + optional nl + skipMany (oneOf " \t") + many1 (noneOf " \t\n") + let betweenAngles = try $ char '<' >> + manyTill (noneOf ">\n" <|> nl) (char '>') + src <- try betweenAngles <|> sourceURL tit <- option "" linkTitle skipSpaces eof @@ -1139,10 +1150,7 @@ link :: GenParser Char ParserState Inline link = try $ do lab <- reference (src, tit) <- source <|> referenceLink lab - sanitize <- getState >>= return . stateSanitizeHTML - if sanitize && unsanitaryURI src - then fail "Unsanitary URI" - else return $ Link lab (src, tit) + return $ Link lab (src, tit) -- a link like [this][ref] or [this][] or [this] referenceLink :: [Inline] @@ -1162,12 +1170,9 @@ autoLink = try $ do (orig, src) <- uri <|> emailAddress char '>' st <- getState - let sanitize = stateSanitizeHTML st - if sanitize && unsanitaryURI src - then fail "Unsanitary URI" - else return $ if stateStrict st - then Link [Str orig] (src, "") - else Link [Code orig] (src, "") + return $ if stateStrict st + then Link [Str orig] (src, "") + else Link [Code orig] (src, "") image :: GenParser Char ParserState Inline image = try $ do diff --git a/src/pandoc.hs b/src/pandoc.hs index e8220de34..52dfb731a 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -29,7 +29,6 @@ Parses command-line options and calls the appropriate readers and writers. -} module Main where -import Text.JSON.Generic (encodeJSON, decodeJSON) import Text.Pandoc import Text.Pandoc.S5 (s5HeaderIncludes) import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, @@ -81,60 +80,6 @@ wrapWords c = wrap' c c where then ",\n" ++ x ++ wrap' cols (cols - length x) xs else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs --- | Association list of formats and readers. -readers :: [(String, ParserState -> String -> Pandoc)] -readers = [("native" , readPandoc) - ,("json" , readJSON) - ,("markdown" , readMarkdown) - ,("markdown+lhs" , readMarkdown) - ,("rst" , readRST) - ,("textile" , readTextile) -- TODO : textile+lhs - ,("rst+lhs" , readRST) - ,("html" , readHtml) - ,("latex" , readLaTeX) - ,("latex+lhs" , readLaTeX) - ] - --- | Reader for native Pandoc format. -readPandoc :: ParserState -> String -> Pandoc -readPandoc _ = read - --- | Reader for JSON version of Pandoc AST. -readJSON :: ParserState -> String -> Pandoc -readJSON _ = decodeJSON - --- | Writer for JSON version of Pandoc AST. -writeJSON :: WriterOptions -> Pandoc -> String -writeJSON _ = encodeJSON - --- | Association list of formats and writers. -writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] -writers = [("native" , writeNative) - ,("json" , writeJSON) - ,("html" , writeHtmlString) - ,("html+lhs" , writeHtmlString) - ,("s5" , writeHtmlString) - ,("slidy" , writeHtmlString) - ,("docbook" , writeDocbook) - ,("opendocument" , writeOpenDocument) - ,("odt" , \_ _ -> "") - ,("epub" , \_ _ -> "") - ,("latex" , writeLaTeX) - ,("latex+lhs" , writeLaTeX) - ,("context" , writeConTeXt) - ,("texinfo" , writeTexinfo) - ,("man" , writeMan) - ,("markdown" , writeMarkdown) - ,("markdown+lhs" , writeMarkdown) - ,("plain" , writePlain) - ,("rst" , writeRST) - ,("rst+lhs" , writeRST) - ,("mediawiki" , writeMediaWiki) - ,("textile" , writeTextile) - ,("rtf" , writeRTF) - ,("org" , writeOrg) - ] - isNonTextOutput :: String -> Bool isNonTextOutput = (`elem` ["odt","epub"]) @@ -166,7 +111,6 @@ data Opt = Opt , optStrict :: Bool -- ^ Use strict markdown syntax , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , optWrapText :: Bool -- ^ Wrap text - , optSanitizeHTML :: Bool -- ^ Sanitize HTML , optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply , optEmailObfuscation :: ObfuscationMethod , optIdentifierPrefix :: String @@ -205,7 +149,6 @@ defaultOpts = Opt , optStrict = False , optReferenceLinks = False , optWrapText = True - , optSanitizeHTML = False , optPlugins = [] , optEmailObfuscation = JavascriptObfuscation , optIdentifierPrefix = "" @@ -223,13 +166,13 @@ options = (ReqArg (\arg opt -> return opt { optReader = map toLower arg }) "FORMAT") - "" -- ("(" ++ (intercalate ", " $ map fst readers) ++ ")") + "" , Option "tw" ["to","write"] (ReqArg (\arg opt -> return opt { optWriter = map toLower arg }) "FORMAT") - "" -- ("(" ++ (intercalate ", " $ map fst writers) ++ ")") + "" , Option "s" ["standalone"] (NoArg @@ -355,11 +298,6 @@ options = (\opt -> return opt { optWrapText = False })) "" -- "Do not wrap text in output" - , Option "" ["sanitize-html"] - (NoArg - (\opt -> return opt { optSanitizeHTML = True })) - "" -- "Sanitize HTML" - , Option "" ["email-obfuscation"] (ReqArg (\arg opt -> do @@ -579,7 +517,7 @@ usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ (intercalate ", " $ map fst readers) ++ "\nOutput formats: " ++ - (intercalate ", " $ map fst writers) ++ "\nOptions:") + (intercalate ", " $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:") -- Determine default reader based on source file extensions defaultReaderName :: String -> [FilePath] -> String @@ -684,7 +622,6 @@ main = do , optStrict = strict , optReferenceLinks = referenceLinks , optWrapText = wrap - , optSanitizeHTML = sanitize , optEmailObfuscation = obfuscationMethod , optIdentifierPrefix = idPrefix , optIndentedCodeClasses = codeBlockClasses @@ -730,8 +667,8 @@ main = do Nothing -> error ("Unknown reader: " ++ readerName') let writer = case lookup writerName' writers of - Just _ | writerName' == "epub" -> writeEPUB epubStylesheet - Just _ | writerName' == "odt" -> writeODT referenceODT + Nothing | writerName' == "epub" -> writeEPUB epubStylesheet + Nothing | writerName' == "odt" -> writeODT referenceODT Just r -> \o -> return . fromString . r o Nothing -> error $ "Unknown writer: " ++ @@ -783,7 +720,6 @@ main = do let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, - stateSanitizeHTML = sanitize, stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' || lhsExtension sources, stateStandalone = standalone', diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 784b14ccc..6fce927c1 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -6,7 +6,7 @@ Pandoc (Meta {docTitle = [Str "Title",Space,Str "spanning",Space,Str "multiple", , Para [TeX "\\placeformula",Space,TeX "\\startformula\n L_{1} = L_{2}\n \\stopformula"] , Para [TeX "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]"] , Header 2 [Str "URLs",Space,Str "with",Space,Str "spaces"] -, Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20%20and%20%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")] +, Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")] , Para [Link [Str "baz"] ("/foo%20foo",""),Space,Link [Str "bam"] ("/foo%20fee",""),Space,Link [Str "bork"] ("/foo/zee%20zob","title")] , Header 2 [Str "Horizontal",Space,Str "rules",Space,Str "with",Space,Str "spaces",Space,Str "at",Space,Str "end"] , HorizontalRule diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index f4540d84d..258002b8a 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -31,7 +31,8 @@ ## URLs with spaces [foo](/bar and baz) -[foo](/bar and baz ) +[foo](/bar + and baz ) [foo]( /bar and baz ) [foo](bar baz "title" ) |