diff options
| -rw-r--r-- | Benchmark.hs | 36 | ||||
| -rw-r--r-- | README | 6 | ||||
| -rw-r--r-- | pandoc.cabal | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc.hs | 45 | ||||
| -rw-r--r-- | src/Text/Pandoc/Parsing.hs | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 95 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 17 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 75 | ||||
| -rw-r--r-- | src/pandoc.hs | 74 | ||||
| -rw-r--r-- | tests/markdown-reader-more.native | 2 | ||||
| -rw-r--r-- | tests/markdown-reader-more.txt | 3 | 
11 files changed, 142 insertions, 215 deletions
| diff --git a/Benchmark.hs b/Benchmark.hs new file mode 100644 index 000000000..333b908be --- /dev/null +++ b/Benchmark.hs @@ -0,0 +1,36 @@ +import Text.Pandoc +import Text.Pandoc.Shared (readDataFile) +import Criterion.Main +import Data.List (isSuffixOf) + +readerBench :: Pandoc +            -> (String, ParserState -> String -> Pandoc) +            -> Benchmark +readerBench doc (name, reader) = +  let writer = case lookup name writers of +                     Just w  -> w +                     Nothing -> error $ "Could not find writer for " ++ name +      inp = writer defaultWriterOptions{ writerWrapText = True +                                       , writerLiterateHaskell = +                                          "+lhs" `isSuffixOf` name } doc +  in  bench (name ++ " reader") $ whnf +        (reader defaultParserState{stateSmart = True +                                  , stateStandalone = True +                                  , stateLiterateHaskell = +                                      "+lhs" `isSuffixOf` name }) inp + +writerBench :: Pandoc +            -> (String, WriterOptions -> Pandoc -> a) +            -> Benchmark +writerBench doc (name, writer) = bench (name ++ " writer") $ whnf +    (writer defaultWriterOptions{ +                   writerWrapText = True +                  , writerLiterateHaskell = "+lhs" `isSuffixOf` name }) doc + +main = do +  inp <- readDataFile (Just ".") "README" +  let ps = defaultParserState{ stateSmart = True } +  let doc = readMarkdown ps inp +  let readerBs = map (readerBench doc) readers +  defaultMain $ map (writerBench doc) writers ++ readerBs + @@ -270,12 +270,6 @@ Options  :   Disable text wrapping in output. By default, text is wrapped      appropriately for the output format. -`--sanitize-html` -:   Sanitizes HTML (in markdown or HTML input) using a whitelist. -    Unsafe tags are replaced by HTML comments; unsafe attributes -    are omitted.  URIs in links and images are also checked against a -    whitelist of URI schemes. -  `--email-obfuscation=`*none|javascript|references*  :   Specify a method for obfuscating `mailto:` links in HTML documents.      *none* leaves `mailto:` links as they are.  *javascript* obfuscates diff --git a/pandoc.cabal b/pandoc.cabal index c2cc7268a..727bae405 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -74,6 +74,8 @@ Extra-Source-Files:                   -- code to create pandoc.1 man page                   MakeManPage.hs,                   manpage.template, +                 -- benchmarking +                 Benchmark.hs,                   -- tests                   tests/bodybg.gif,                   tests/html-reader.html, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index ab1e3cd03..7d3468461 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -57,6 +57,9 @@ module Text.Pandoc                 (                  -- * Definitions                 module Text.Pandoc.Definition +               -- * Lists of readers and writers +               , readers +               , writers                 -- * Readers: converting /to/ Pandoc format                 , readMarkdown                 , readRST @@ -127,8 +130,50 @@ import Text.Pandoc.Templates  import Text.Pandoc.Parsing  import Text.Pandoc.Shared  import Data.Version (showVersion) +import Text.JSON.Generic  import Paths_pandoc (version)  -- | Version number of pandoc library.  pandocVersion :: String  pandocVersion = showVersion version + +-- | Association list of formats and readers. +readers :: [(String, ParserState -> String -> Pandoc)] +readers = [("native"       , \_ -> read) +          ,("json"         , \_ -> decodeJSON) +          ,("markdown"     , readMarkdown) +          ,("markdown+lhs" , readMarkdown) +          ,("rst"          , readRST) +          ,("textile"      , readTextile) -- TODO : textile+lhs  +          ,("rst+lhs"      , readRST) +          ,("html"         , readHtml) +          ,("latex"        , readLaTeX) +          ,("latex+lhs"    , readLaTeX) +          ] + +-- | Association list of formats and writers (omitting the +-- binary writers, odt and epub). +writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] +writers = [("native"       , writeNative) +          ,("json"         , \_ -> encodeJSON) +          ,("html"         , writeHtmlString) +          ,("html+lhs"     , writeHtmlString) +          ,("s5"           , writeHtmlString) +          ,("slidy"        , writeHtmlString) +          ,("docbook"      , writeDocbook) +          ,("opendocument" , writeOpenDocument) +          ,("latex"        , writeLaTeX) +          ,("latex+lhs"    , writeLaTeX) +          ,("context"      , writeConTeXt) +          ,("texinfo"      , writeTexinfo) +          ,("man"          , writeMan) +          ,("markdown"     , writeMarkdown) +          ,("markdown+lhs" , writeMarkdown) +          ,("plain"        , writePlain) +          ,("rst"          , writeRST) +          ,("rst+lhs"      , writeRST) +          ,("mediawiki"    , writeMediaWiki) +          ,("textile"      , writeTextile) +          ,("rtf"          , writeRTF) +          ,("org"          , writeOrg) +          ] diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 1b206e4c7..48c6aa70d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -586,7 +586,6 @@ data ParserState = ParserState      { stateParseRaw        :: Bool,          -- ^ Parse raw HTML and LaTeX?        stateParserContext   :: ParserContext, -- ^ Inside list?        stateQuoteContext    :: QuoteContext,  -- ^ Inside quoted environment? -      stateSanitizeHTML    :: Bool,          -- ^ Sanitize HTML?        stateKeys            :: KeyTable,      -- ^ List of reference keys        stateCitations       :: [String],      -- ^ List of available citations        stateNotes           :: NoteTable,     -- ^ List of notes @@ -614,7 +613,6 @@ defaultParserState =      ParserState { stateParseRaw        = False,                    stateParserContext   = NullState,                    stateQuoteContext    = NoQuote, -                  stateSanitizeHTML    = False,                    stateKeys            = M.empty,                    stateCitations       = [],                    stateNotes           = [], diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f05fdd57b..ed026eb49 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -40,7 +40,6 @@ module Text.Pandoc.Readers.HTML (                                   extractTagType,                                   htmlBlockElement,                                   htmlComment, -                                 unsanitaryURI                                  ) where  import Text.ParserCombinators.Parsec @@ -51,7 +50,6 @@ import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )  import Data.Maybe ( fromMaybe )  import Data.List ( isPrefixOf, isSuffixOf, intercalate )  import Data.Char ( toLower, isAlphaNum ) -import Network.URI ( parseURIReference, URI (..) )  import Control.Monad ( liftM, when )  -- | Convert HTML-formatted string to 'Pandoc' document. @@ -85,36 +83,6 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",                   "dt", "frameset", "li", "tbody", "td", "tfoot",                   "th", "thead", "tr", "script", "style"] -sanitaryTags :: [[Char]] -sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big", -                "blockquote", "br", "button", "caption", "center", -                "cite", "code", "col", "colgroup", "dd", "del", "dfn", -                "dir", "div", "dl", "dt", "em", "fieldset", "font", -                "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", -                "i", "img", "input", "ins", "kbd", "label", "legend", -                "li", "map", "menu", "ol", "optgroup", "option", "p", -                "pre", "q", "s", "samp", "select", "small", "span", -                "strike", "strong", "sub", "sup", "table", "tbody", -                "td", "textarea", "tfoot", "th", "thead", "tr", "tt", -                "u", "ul", "var"] - -sanitaryAttributes :: [[Char]] -sanitaryAttributes = ["abbr", "accept", "accept-charset", -                      "accesskey", "action", "align", "alt", "axis", -                      "border", "cellpadding", "cellspacing", "char", -                      "charoff", "charset", "checked", "cite", "class", -                      "clear", "cols", "colspan", "color", "compact", -                      "coords", "datetime", "dir", "disabled", -                      "enctype", "for", "frame", "headers", "height", -                      "href", "hreflang", "hspace", "id", "ismap", -                      "label", "lang", "longdesc", "maxlength", "media", -                      "method", "multiple", "name", "nohref", "noshade", -                      "nowrap", "prompt", "readonly", "rel", "rev", -                      "rows", "rowspan", "rules", "scope", "selected", -                      "shape", "size", "span", "src", "start", -                      "summary", "tabindex", "target", "title", "type", -                      "usemap", "valign", "value", "vspace", "width"] -  -- taken from HXT and extended  closes :: String -> String -> Bool @@ -153,41 +121,6 @@ _ `closes` _ = False  -- HTML utility functions  -- --- | Returns @True@ if sanitization is specified and the specified tag is  ---  not on the sanitized tag list. -unsanitaryTag :: [Char] -              -> GenParser tok ParserState Bool -unsanitaryTag tag = do -  st <- getState -  return $ stateSanitizeHTML st && tag `notElem` sanitaryTags - --- | returns @True@ if sanitization is specified and the specified attribute ---  is not on the sanitized attribute list. -unsanitaryAttribute :: ([Char], String, t) -                    -> GenParser tok ParserState Bool -unsanitaryAttribute (attr, val, _) = do -  st <- getState -  return $ stateSanitizeHTML st && -           (attr `notElem` sanitaryAttributes || -             (attr `elem` ["href","src"] && unsanitaryURI val)) - --- | Returns @True@ if the specified URI is potentially a security risk. -unsanitaryURI :: String -> Bool -unsanitaryURI u = -  let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:", -             "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:", -             "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:", -             "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:", -             "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:", -             "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:", -             "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:", -             "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", -             "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", -             "snews:", "webcal:", "ymsgr:"] -  in  case parseURIReference (escapeURI u) of -           Just p  -> (map toLower $ uriScheme p) `notElem` safeURISchemes -           Nothing -> True -  -- | Read blocks until end tag.  blocksTilEnd :: String -> GenParser Char ParserState [Block]  blocksTilEnd tag = do @@ -240,10 +173,7 @@ anyHtmlTag = try $ do    char '>'    let result = "<" ++ tag ++                  concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">" -  unsanitary <- unsanitaryTag tag -  if unsanitary -     then return $ "<!-- unsafe HTML removed -->" -     else return result +  return result  anyHtmlEndTag :: GenParser Char ParserState [Char]  anyHtmlEndTag = try $ do @@ -255,10 +185,7 @@ anyHtmlEndTag = try $ do    spaces    char '>'    let result = "</" ++ tag ++ ">" -  unsanitary <- unsanitaryTag tag -  if unsanitary -     then return $ "<!-- unsafe HTML removed -->" -     else return result  +  return result   htmlTag :: Bool          -> String @@ -294,16 +221,10 @@ quoted quoteChar = do                      (many (noneOf [quoteChar]))    return (result, [quoteChar]) -nullAttribute :: ([Char], [Char], [Char]) -nullAttribute = ("", "", "")  -  htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char])  htmlAttribute = do    attr <- htmlRegularAttribute <|> htmlMinimizedAttribute -  unsanitary <- unsanitaryAttribute attr -  if unsanitary -     then return nullAttribute -     else return attr +  return attr  -- minimized boolean attribute  htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char]) @@ -364,10 +285,7 @@ htmlScript = try $ do    lookAhead $ htmlOpenTag "script"    open <- anyHtmlTag    rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script") -  st <- getState -  if stateSanitizeHTML st && not ("script" `elem` sanitaryTags) -     then return "<!-- unsafe HTML removed -->" -     else return $ open ++ rest ++ "</script>" +  return $ open ++ rest ++ "</script>"  scriptChunk :: GenParser Char ParserState [Char]  scriptChunk = jsComment <|> jsString <|> jsChars @@ -399,10 +317,7 @@ htmlStyle = try $ do    lookAhead $ htmlOpenTag "style"    open <- anyHtmlTag    rest <- manyTill anyChar (htmlEndTag "style") -  st <- getState -  if stateSanitizeHTML st && not ("style" `elem` sanitaryTags) -     then return "<!-- unsafe HTML removed -->" -     else return $ open ++ rest ++ "</style>" +  return $ open ++ rest ++ "</style>"  htmlBlockElement :: GenParser Char ParserState [Char]  htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ] diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 406809dfc..0bc13d2dd 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -119,20 +119,15 @@ anyEnvironment =  try $ do  -- | Process LaTeX preamble, extracting metadata.  processLaTeXPreamble :: GenParser Char ParserState () -processLaTeXPreamble = try $ manyTill  -  (choice [bibliographic, comment, unknownCommand, nullBlock])  -  (try (string "\\begin{document}")) >>  -  spaces +processLaTeXPreamble = +  skipMany $ notFollowedBy' anyEnvironment >> block  -- | Parse LaTeX and return 'Pandoc'.  parseLaTeX :: GenParser Char ParserState Pandoc  parseLaTeX = do -  optional processLaTeXPreamble -- preamble might not be present (fragment) -  spaces -  blocks <- parseBlocks    spaces -  optional $ try (string "\\end{document}" >> many anyChar)  -  -- might not be present (fragment) +  blocks <- try (processLaTeXPreamble >> spaces >> environment "document") +        <|> many block    spaces    eof    state <- getState @@ -420,8 +415,8 @@ ignore = try $ do  unknownCommand :: GenParser Char ParserState Block  unknownCommand = try $ do -  notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", -                                     "document"] +  spaces +  notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"]    state <- getState    when (stateParserContext state == ListItemState) $       notFollowedBy' (string "\\item") diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 59f825808..a68741bda 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -41,7 +41,7 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )  import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,                                     anyHtmlInlineTag, anyHtmlTag,                                    anyHtmlEndTag, htmlEndTag, extractTagType, -                                  htmlBlockElement, htmlComment, unsanitaryURI ) +                                  htmlBlockElement, htmlComment )  import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )  import Text.ParserCombinators.Parsec  import Control.Monad (when, liftM, guard) @@ -202,9 +202,17 @@ referenceKey = try $ do    lab <- reference    char ':'    skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') -  let sourceURL excludes = many $ -        optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' ')) -  src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" +  let nl = char '\n' >> notFollowedBy blankline >> return ' ' +  let sourceURL = liftM unwords $ many $ try $ do +                    notFollowedBy' referenceTitle +                    skipMany (oneOf " \t") +                    optional nl +                    notFollowedBy' reference +                    skipMany (oneOf " \t") +                    many1 (noneOf " \t\n") +  let betweenAngles = try $ char '<' >> +                            manyTill (noneOf ">\n" <|> nl) (char '>') +  src <- try betweenAngles <|> sourceURL    tit <- option "" referenceTitle    blanklines    endPos <- getPosition @@ -650,16 +658,14 @@ isHtmlOrBlank _              = False  para :: GenParser Char ParserState Block  para = try $ do  -  result <- many1 inline -  if all isHtmlOrBlank result -     then fail "treat as raw HTML" -     else return () -  newline -  blanklines <|> do st <- getState -                    if stateStrict st -                       then lookAhead (blockQuote <|> header) >> return "" -                       else pzero -  return $ Para $ normalizeSpaces result +  result <- liftM normalizeSpaces $ many1 inline +  guard $ not . all isHtmlOrBlank $ result +  option (Plain result) $ try $ do +              newline +              blanklines <|> +                (getState >>= guard . stateStrict >> +                 lookAhead (blockQuote <|> header) >> return "") +              return $ Para result  plain :: GenParser Char ParserState Block  plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces @@ -1085,15 +1091,13 @@ endline = try $ do    newline    notFollowedBy blankline    st <- getState -  if stateStrict st  -    then do notFollowedBy emailBlockQuoteStart -            notFollowedBy (char '#')  -- atx header -    else return ()  +  when (stateStrict st) $ do +    notFollowedBy emailBlockQuoteStart +    notFollowedBy (char '#')  -- atx header    -- parse potential list-starts differently if in a list: -  if stateParserContext st == ListItemState -     then notFollowedBy' (bulletListStart <|>  -                          (anyOrderedListStart >> return ())) -     else return () +  when (stateParserContext st == ListItemState) $ do +     notFollowedBy' bulletListStart +     notFollowedBy' anyOrderedListStart    return Space  -- @@ -1118,9 +1122,16 @@ source =  source' :: GenParser Char st (String, [Char])  source' = do    skipSpaces -  let sourceURL excludes = many $ -        optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' ')) -  src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" +  let nl = char '\n' >>~ notFollowedBy blankline +  let sourceURL = liftM unwords $ many $ try $ do +                    notFollowedBy' linkTitle +                    skipMany (oneOf " \t") +                    optional nl +                    skipMany (oneOf " \t") +                    many1 (noneOf " \t\n") +  let betweenAngles = try $ char '<' >> +                            manyTill (noneOf ">\n" <|> nl) (char '>') +  src <- try betweenAngles <|> sourceURL    tit <- option "" linkTitle    skipSpaces    eof @@ -1139,10 +1150,7 @@ link :: GenParser Char ParserState Inline  link = try $ do    lab <- reference    (src, tit) <- source <|> referenceLink lab -  sanitize <- getState >>= return . stateSanitizeHTML -  if sanitize && unsanitaryURI src -     then fail "Unsanitary URI" -     else return $ Link lab (src, tit) +  return $ Link lab (src, tit)  -- a link like [this][ref] or [this][] or [this]  referenceLink :: [Inline] @@ -1162,12 +1170,9 @@ autoLink = try $ do    (orig, src) <- uri <|> emailAddress    char '>'    st <- getState -  let sanitize = stateSanitizeHTML st -  if sanitize && unsanitaryURI src -     then fail "Unsanitary URI" -     else return $ if stateStrict st -                      then Link [Str orig] (src, "") -                      else Link [Code orig] (src, "") +  return $ if stateStrict st +              then Link [Str orig] (src, "") +              else Link [Code orig] (src, "")  image :: GenParser Char ParserState Inline  image = try $ do diff --git a/src/pandoc.hs b/src/pandoc.hs index e8220de34..52dfb731a 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -29,7 +29,6 @@ Parses command-line options and calls the appropriate readers and  writers.  -}  module Main where -import Text.JSON.Generic (encodeJSON, decodeJSON)  import Text.Pandoc  import Text.Pandoc.S5 (s5HeaderIncludes)  import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, @@ -81,60 +80,6 @@ wrapWords c = wrap' c c where                                                         then ",\n" ++ x ++ wrap' cols (cols - length x) xs                                                         else ", "  ++ x ++ wrap' cols (remaining - (length x + 2)) xs --- | Association list of formats and readers. -readers :: [(String, ParserState -> String -> Pandoc)] -readers = [("native"       , readPandoc) -          ,("json"         , readJSON) -          ,("markdown"     , readMarkdown) -          ,("markdown+lhs" , readMarkdown) -          ,("rst"          , readRST) -          ,("textile"      , readTextile) -- TODO : textile+lhs  -          ,("rst+lhs"      , readRST) -          ,("html"         , readHtml) -          ,("latex"        , readLaTeX) -          ,("latex+lhs"    , readLaTeX) -          ] - --- | Reader for native Pandoc format. -readPandoc :: ParserState -> String -> Pandoc -readPandoc _ = read - --- | Reader for JSON version of Pandoc AST. -readJSON :: ParserState -> String -> Pandoc -readJSON _ = decodeJSON - --- | Writer for JSON version of Pandoc AST. -writeJSON :: WriterOptions -> Pandoc -> String -writeJSON _ = encodeJSON - --- | Association list of formats and writers. -writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] -writers = [("native"       , writeNative) -          ,("json"         , writeJSON) -          ,("html"         , writeHtmlString) -          ,("html+lhs"     , writeHtmlString) -          ,("s5"           , writeHtmlString) -          ,("slidy"        , writeHtmlString) -          ,("docbook"      , writeDocbook) -          ,("opendocument" , writeOpenDocument) -          ,("odt"          , \_ _ -> "") -          ,("epub"         , \_ _ -> "") -          ,("latex"        , writeLaTeX) -          ,("latex+lhs"    , writeLaTeX) -          ,("context"      , writeConTeXt) -          ,("texinfo"      , writeTexinfo) -          ,("man"          , writeMan) -          ,("markdown"     , writeMarkdown) -          ,("markdown+lhs" , writeMarkdown) -          ,("plain"        , writePlain) -          ,("rst"          , writeRST) -          ,("rst+lhs"      , writeRST) -          ,("mediawiki"    , writeMediaWiki) -          ,("textile"      , writeTextile) -          ,("rtf"          , writeRTF) -          ,("org"          , writeOrg) -          ] -  isNonTextOutput :: String -> Bool  isNonTextOutput = (`elem` ["odt","epub"]) @@ -166,7 +111,6 @@ data Opt = Opt      , optStrict            :: Bool    -- ^ Use strict markdown syntax      , optReferenceLinks    :: Bool    -- ^ Use reference links in writing markdown, rst      , optWrapText          :: Bool    -- ^ Wrap text -    , optSanitizeHTML      :: Bool    -- ^ Sanitize HTML      , optPlugins           :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply      , optEmailObfuscation  :: ObfuscationMethod      , optIdentifierPrefix  :: String @@ -205,7 +149,6 @@ defaultOpts = Opt      , optStrict            = False      , optReferenceLinks    = False      , optWrapText          = True -    , optSanitizeHTML      = False      , optPlugins           = []      , optEmailObfuscation  = JavascriptObfuscation      , optIdentifierPrefix  = "" @@ -223,13 +166,13 @@ options =                   (ReqArg                    (\arg opt -> return opt { optReader = map toLower arg })                    "FORMAT") -                 "" -- ("(" ++ (intercalate ", " $ map fst readers) ++ ")") +                 ""      , Option "tw" ["to","write"]                   (ReqArg                    (\arg opt -> return opt { optWriter = map toLower arg })                    "FORMAT") -                 "" -- ("(" ++ (intercalate ", " $ map fst writers) ++ ")") +                 ""      , Option "s" ["standalone"]                   (NoArg @@ -355,11 +298,6 @@ options =                    (\opt -> return opt { optWrapText = False }))                   "" -- "Do not wrap text in output" -    , Option "" ["sanitize-html"] -                 (NoArg -                  (\opt -> return opt { optSanitizeHTML = True })) -                 "" -- "Sanitize HTML" -      , Option "" ["email-obfuscation"]                   (ReqArg                    (\arg opt -> do @@ -579,7 +517,7 @@ usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String  usageMessage programName = usageInfo    (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats:  " ++    (intercalate ", " $ map fst readers) ++ "\nOutput formats:  " ++ -  (intercalate ", " $ map fst writers) ++ "\nOptions:") +  (intercalate ", " $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:")  -- Determine default reader based on source file extensions  defaultReaderName :: String -> [FilePath] -> String @@ -684,7 +622,6 @@ main = do                , optStrict            = strict                , optReferenceLinks    = referenceLinks                , optWrapText          = wrap -              , optSanitizeHTML      = sanitize                , optEmailObfuscation  = obfuscationMethod                , optIdentifierPrefix  = idPrefix                , optIndentedCodeClasses = codeBlockClasses @@ -730,8 +667,8 @@ main = do       Nothing -> error ("Unknown reader: " ++ readerName')    let writer = case lookup writerName' writers of -                Just _ | writerName' == "epub" -> writeEPUB epubStylesheet -                Just _ | writerName' == "odt"  -> writeODT referenceODT +                Nothing | writerName' == "epub" -> writeEPUB epubStylesheet +                Nothing | writerName' == "odt"  -> writeODT referenceODT                  Just r                         -> \o ->                                                       return . fromString . r o                  Nothing                        -> error $ "Unknown writer: " ++ @@ -783,7 +720,6 @@ main = do    let startParserState =           defaultParserState { stateParseRaw        = parseRaw,                                stateTabStop         = tabStop, -                              stateSanitizeHTML    = sanitize,                                stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' ||                                                       lhsExtension sources,                                stateStandalone      = standalone', diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 784b14ccc..6fce927c1 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -6,7 +6,7 @@ Pandoc (Meta {docTitle = [Str "Title",Space,Str "spanning",Space,Str "multiple",  , Para [TeX "\\placeformula",Space,TeX "\\startformula\n   L_{1} = L_{2}\n   \\stopformula"]  , Para [TeX "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]"]  , Header 2 [Str "URLs",Space,Str "with",Space,Str "spaces"] -, Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20%20and%20%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")] +, Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")]  , Para [Link [Str "baz"] ("/foo%20foo",""),Space,Link [Str "bam"] ("/foo%20fee",""),Space,Link [Str "bork"] ("/foo/zee%20zob","title")]  , Header 2 [Str "Horizontal",Space,Str "rules",Space,Str "with",Space,Str "spaces",Space,Str "at",Space,Str "end"]  , HorizontalRule diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index f4540d84d..258002b8a 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -31,7 +31,8 @@  ## URLs with spaces  [foo](/bar and baz) -[foo](/bar and baz ) +[foo](/bar + and baz )  [foo]( /bar  and  baz  )  [foo](bar baz  "title" ) | 
