11 files changed, 142 insertions, 215 deletions
diff --git a/Benchmark.hs b/Benchmark.hs
new file mode 100644
index 000000000..333b908be
--- /dev/null
+++ b/Benchmark.hs
@@ -0,0 +1,36 @@
+import Text.Pandoc
+import Text.Pandoc.Shared (readDataFile)
+import Criterion.Main
+import Data.List (isSuffixOf)
+
+readerBench :: Pandoc
+            -> (String, ParserState -> String -> Pandoc)
+            -> Benchmark
+readerBench doc (name, reader) =
+  let writer = case lookup name writers of
+                     Just w  -> w
+                     Nothing -> error $ "Could not find writer for " ++ name
+      inp = writer defaultWriterOptions{ writerWrapText = True
+                                       , writerLiterateHaskell =
+                                          "+lhs" `isSuffixOf` name } doc
+  in  bench (name ++ " reader") $ whnf
+        (reader defaultParserState{stateSmart = True
+                                  , stateStandalone = True
+                                  , stateLiterateHaskell =
+                                      "+lhs" `isSuffixOf` name }) inp
+
+writerBench :: Pandoc
+            -> (String, WriterOptions -> Pandoc -> a)
+            -> Benchmark
+writerBench doc (name, writer) = bench (name ++ " writer") $ whnf
+    (writer defaultWriterOptions{
+                   writerWrapText = True
+                  , writerLiterateHaskell = "+lhs" `isSuffixOf` name }) doc
+
+main = do
+  inp <- readDataFile (Just ".") "README"
+  let ps = defaultParserState{ stateSmart = True }
+  let doc = readMarkdown ps inp
+  let readerBs = map (readerBench doc) readers
+  defaultMain $ map (writerBench doc) writers ++ readerBs
+
diff --git a/README b/README
index 26859a81d..03fa84547 100644
--- a/README
+++ b/README
@@ -270,12 +270,6 @@ Options
 :   Disable text wrapping in output. By default, text is wrapped
     appropriately for the output format.
 
-`--sanitize-html`
-:   Sanitizes HTML (in markdown or HTML input) using a whitelist.
-    Unsafe tags are replaced by HTML comments; unsafe attributes
-    are omitted.  URIs in links and images are also checked against a
-    whitelist of URI schemes.
-
 `--email-obfuscation=`*none|javascript|references*
 :   Specify a method for obfuscating `mailto:` links in HTML documents.
     *none* leaves `mailto:` links as they are.  *javascript* obfuscates
diff --git a/pandoc.cabal b/pandoc.cabal
index c2cc7268a..727bae405 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -74,6 +74,8 @@ Extra-Source-Files:
                  -- code to create pandoc.1 man page
                  MakeManPage.hs,
                  manpage.template,
+                 -- benchmarking
+                 Benchmark.hs,
                  -- tests
                  tests/bodybg.gif,
                  tests/html-reader.html,
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index ab1e3cd03..7d3468461 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -57,6 +57,9 @@ module Text.Pandoc
                ( 
                -- * Definitions
                module Text.Pandoc.Definition
+               -- * Lists of readers and writers
+               , readers
+               , writers
                -- * Readers: converting /to/ Pandoc format
                , readMarkdown
                , readRST
@@ -127,8 +130,50 @@ import Text.Pandoc.Templates
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import Data.Version (showVersion)
+import Text.JSON.Generic
 import Paths_pandoc (version)
 
 -- | Version number of pandoc library.
 pandocVersion :: String
 pandocVersion = showVersion version
+
+-- | Association list of formats and readers.
+readers :: [(String, ParserState -> String -> Pandoc)]
+readers = [("native"       , \_ -> read)
+          ,("json"         , \_ -> decodeJSON)
+          ,("markdown"     , readMarkdown)
+          ,("markdown+lhs" , readMarkdown)
+          ,("rst"          , readRST)
+          ,("textile"      , readTextile) -- TODO : textile+lhs 
+          ,("rst+lhs"      , readRST)
+          ,("html"         , readHtml)
+          ,("latex"        , readLaTeX)
+          ,("latex+lhs"    , readLaTeX)
+          ]
+
+-- | Association list of formats and writers (omitting the
+-- binary writers, odt and epub).
+writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
+writers = [("native"       , writeNative)
+          ,("json"         , \_ -> encodeJSON)
+          ,("html"         , writeHtmlString)
+          ,("html+lhs"     , writeHtmlString)
+          ,("s5"           , writeHtmlString)
+          ,("slidy"        , writeHtmlString)
+          ,("docbook"      , writeDocbook)
+          ,("opendocument" , writeOpenDocument)
+          ,("latex"        , writeLaTeX)
+          ,("latex+lhs"    , writeLaTeX)
+          ,("context"      , writeConTeXt)
+          ,("texinfo"      , writeTexinfo)
+          ,("man"          , writeMan)
+          ,("markdown"     , writeMarkdown)
+          ,("markdown+lhs" , writeMarkdown)
+          ,("plain"        , writePlain)
+          ,("rst"          , writeRST)
+          ,("rst+lhs"      , writeRST)
+          ,("mediawiki"    , writeMediaWiki)
+          ,("textile"      , writeTextile)
+          ,("rtf"          , writeRTF)
+          ,("org"          , writeOrg)
+          ]
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 1b206e4c7..48c6aa70d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -586,7 +586,6 @@ data ParserState = ParserState
     { stateParseRaw        :: Bool,          -- ^ Parse raw HTML and LaTeX?
       stateParserContext   :: ParserContext, -- ^ Inside list?
       stateQuoteContext    :: QuoteContext,  -- ^ Inside quoted environment?
-      stateSanitizeHTML    :: Bool,          -- ^ Sanitize HTML?
       stateKeys            :: KeyTable,      -- ^ List of reference keys
       stateCitations       :: [String],      -- ^ List of available citations
       stateNotes           :: NoteTable,     -- ^ List of notes
@@ -614,7 +613,6 @@ defaultParserState =
     ParserState { stateParseRaw        = False,
                   stateParserContext   = NullState,
                   stateQuoteContext    = NoQuote,
-                  stateSanitizeHTML    = False,
                   stateKeys            = M.empty,
                   stateCitations       = [],
                   stateNotes           = [],
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f05fdd57b..ed026eb49 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -40,7 +40,6 @@ module Text.Pandoc.Readers.HTML (
                                  extractTagType,
                                  htmlBlockElement,
                                  htmlComment,
-                                 unsanitaryURI
                                 ) where
 
 import Text.ParserCombinators.Parsec
@@ -51,7 +50,6 @@ import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
 import Data.Maybe ( fromMaybe )
 import Data.List ( isPrefixOf, isSuffixOf, intercalate )
 import Data.Char ( toLower, isAlphaNum )
-import Network.URI ( parseURIReference, URI (..) )
 import Control.Monad ( liftM, when )
 
 -- | Convert HTML-formatted string to 'Pandoc' document.
@@ -85,36 +83,6 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
                  "dt", "frameset", "li", "tbody", "td", "tfoot",
                  "th", "thead", "tr", "script", "style"]
 
-sanitaryTags :: [[Char]]
-sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big",
-                "blockquote", "br", "button", "caption", "center",
-                "cite", "code", "col", "colgroup", "dd", "del", "dfn",
-                "dir", "div", "dl", "dt", "em", "fieldset", "font",
-                "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
-                "i", "img", "input", "ins", "kbd", "label", "legend",
-                "li", "map", "menu", "ol", "optgroup", "option", "p",
-                "pre", "q", "s", "samp", "select", "small", "span",
-                "strike", "strong", "sub", "sup", "table", "tbody",
-                "td", "textarea", "tfoot", "th", "thead", "tr", "tt",
-                "u", "ul", "var"]
-
-sanitaryAttributes :: [[Char]]
-sanitaryAttributes = ["abbr", "accept", "accept-charset",
-                      "accesskey", "action", "align", "alt", "axis",
-                      "border", "cellpadding", "cellspacing", "char",
-                      "charoff", "charset", "checked", "cite", "class",
-                      "clear", "cols", "colspan", "color", "compact",
-                      "coords", "datetime", "dir", "disabled",
-                      "enctype", "for", "frame", "headers", "height",
-                      "href", "hreflang", "hspace", "id", "ismap",
-                      "label", "lang", "longdesc", "maxlength", "media",
-                      "method", "multiple", "name", "nohref", "noshade",
-                      "nowrap", "prompt", "readonly", "rel", "rev",
-                      "rows", "rowspan", "rules", "scope", "selected",
-                      "shape", "size", "span", "src", "start",
-                      "summary", "tabindex", "target", "title", "type",
-                      "usemap", "valign", "value", "vspace", "width"]
-
 -- taken from HXT and extended
 
 closes :: String -> String -> Bool
@@ -153,41 +121,6 @@ _ `closes` _ = False
 -- HTML utility functions
 --
 
--- | Returns @True@ if sanitization is specified and the specified tag is 
---  not on the sanitized tag list.
-unsanitaryTag :: [Char]
-              -> GenParser tok ParserState Bool
-unsanitaryTag tag = do
-  st <- getState
-  return $ stateSanitizeHTML st && tag `notElem` sanitaryTags
-
--- | returns @True@ if sanitization is specified and the specified attribute
---  is not on the sanitized attribute list.
-unsanitaryAttribute :: ([Char], String, t)
-                    -> GenParser tok ParserState Bool
-unsanitaryAttribute (attr, val, _) = do
-  st <- getState
-  return $ stateSanitizeHTML st &&
-           (attr `notElem` sanitaryAttributes ||
-             (attr `elem` ["href","src"] && unsanitaryURI val))
-
--- | Returns @True@ if the specified URI is potentially a security risk.
-unsanitaryURI :: String -> Bool
-unsanitaryURI u =
-  let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:",
-             "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:",
-             "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:",
-             "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:",
-             "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:",
-             "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:",
-             "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:",
-             "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
-             "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
-             "snews:", "webcal:", "ymsgr:"]
-  in  case parseURIReference (escapeURI u) of
-           Just p  -> (map toLower $ uriScheme p) `notElem` safeURISchemes
-           Nothing -> True
-
 -- | Read blocks until end tag.
 blocksTilEnd :: String -> GenParser Char ParserState [Block]
 blocksTilEnd tag = do
@@ -240,10 +173,7 @@ anyHtmlTag = try $ do
   char '>'
   let result = "<" ++ tag ++ 
                concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
-  unsanitary <- unsanitaryTag tag
-  if unsanitary
-     then return $ "<!-- unsafe HTML removed -->"
-     else return result
+  return result
 
 anyHtmlEndTag :: GenParser Char ParserState [Char]
 anyHtmlEndTag = try $ do
@@ -255,10 +185,7 @@ anyHtmlEndTag = try $ do
   spaces
   char '>'
   let result = "</" ++ tag ++ ">"
-  unsanitary <- unsanitaryTag tag
-  if unsanitary
-     then return $ "<!-- unsafe HTML removed -->"
-     else return result 
+  return result 
 
 htmlTag :: Bool
         -> String
@@ -294,16 +221,10 @@ quoted quoteChar = do
                     (many (noneOf [quoteChar]))
   return (result, [quoteChar])
 
-nullAttribute :: ([Char], [Char], [Char])
-nullAttribute = ("", "", "") 
-
 htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char])
 htmlAttribute = do
   attr <- htmlRegularAttribute <|> htmlMinimizedAttribute
-  unsanitary <- unsanitaryAttribute attr
-  if unsanitary
-     then return nullAttribute
-     else return attr
+  return attr
 
 -- minimized boolean attribute
 htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char])
@@ -364,10 +285,7 @@ htmlScript = try $ do
   lookAhead $ htmlOpenTag "script"
   open <- anyHtmlTag
   rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script")
-  st <- getState
-  if stateSanitizeHTML st && not ("script" `elem` sanitaryTags)
-     then return "<!-- unsafe HTML removed -->"
-     else return $ open ++ rest ++ "</script>"
+  return $ open ++ rest ++ "</script>"
 
 scriptChunk :: GenParser Char ParserState [Char]
 scriptChunk = jsComment <|> jsString <|> jsChars
@@ -399,10 +317,7 @@ htmlStyle = try $ do
   lookAhead $ htmlOpenTag "style"
   open <- anyHtmlTag
   rest <- manyTill anyChar (htmlEndTag "style")
-  st <- getState
-  if stateSanitizeHTML st && not ("style" `elem` sanitaryTags)
-     then return "<!-- unsafe HTML removed -->"
-     else return $ open ++ rest ++ "</style>"
+  return $ open ++ rest ++ "</style>"
 
 htmlBlockElement :: GenParser Char ParserState [Char]
 htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ]
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 406809dfc..0bc13d2dd 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -119,20 +119,15 @@ anyEnvironment =  try $ do
 
 -- | Process LaTeX preamble, extracting metadata.
 processLaTeXPreamble :: GenParser Char ParserState ()
-processLaTeXPreamble = try $ manyTill 
-  (choice [bibliographic, comment, unknownCommand, nullBlock]) 
-  (try (string "\\begin{document}")) >> 
-  spaces
+processLaTeXPreamble =
+  skipMany $ notFollowedBy' anyEnvironment >> block
 
 -- | Parse LaTeX and return 'Pandoc'.
 parseLaTeX :: GenParser Char ParserState Pandoc
 parseLaTeX = do
-  optional processLaTeXPreamble -- preamble might not be present (fragment)
-  spaces
-  blocks <- parseBlocks
   spaces
-  optional $ try (string "\\end{document}" >> many anyChar) 
-  -- might not be present (fragment)
+  blocks <- try (processLaTeXPreamble >> spaces >> environment "document")
+        <|> many block
   spaces
   eof
   state <- getState
@@ -420,8 +415,8 @@ ignore = try $ do
 
 unknownCommand :: GenParser Char ParserState Block
 unknownCommand = try $ do
-  notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
-                                     "document"]
+  spaces
+  notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"]
   state <- getState
   when (stateParserContext state == ListItemState) $
      notFollowedBy' (string "\\item")
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 59f825808..a68741bda 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -41,7 +41,7 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
 import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, 
                                   anyHtmlInlineTag, anyHtmlTag,
                                   anyHtmlEndTag, htmlEndTag, extractTagType,
-                                  htmlBlockElement, htmlComment, unsanitaryURI )
+                                  htmlBlockElement, htmlComment )
 import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
 import Text.ParserCombinators.Parsec
 import Control.Monad (when, liftM, guard)
@@ -202,9 +202,17 @@ referenceKey = try $ do
   lab <- reference
   char ':'
   skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
-  let sourceURL excludes = many $
-        optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' '))
-  src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
+  let nl = char '\n' >> notFollowedBy blankline >> return ' '
+  let sourceURL = liftM unwords $ many $ try $ do
+                    notFollowedBy' referenceTitle
+                    skipMany (oneOf " \t")
+                    optional nl
+                    notFollowedBy' reference
+                    skipMany (oneOf " \t")
+                    many1 (noneOf " \t\n")
+  let betweenAngles = try $ char '<' >>
+                            manyTill (noneOf ">\n" <|> nl) (char '>')
+  src <- try betweenAngles <|> sourceURL
   tit <- option "" referenceTitle
   blanklines
   endPos <- getPosition
@@ -650,16 +658,14 @@ isHtmlOrBlank _              = False
 
 para :: GenParser Char ParserState Block
 para = try $ do 
-  result <- many1 inline
-  if all isHtmlOrBlank result
-     then fail "treat as raw HTML"
-     else return ()
-  newline
-  blanklines <|> do st <- getState
-                    if stateStrict st
-                       then lookAhead (blockQuote <|> header) >> return ""
-                       else pzero
-  return $ Para $ normalizeSpaces result
+  result <- liftM normalizeSpaces $ many1 inline
+  guard $ not . all isHtmlOrBlank $ result
+  option (Plain result) $ try $ do
+              newline
+              blanklines <|>
+                (getState >>= guard . stateStrict >>
+                 lookAhead (blockQuote <|> header) >> return "")
+              return $ Para result
 
 plain :: GenParser Char ParserState Block
 plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces
@@ -1085,15 +1091,13 @@ endline = try $ do
   newline
   notFollowedBy blankline
   st <- getState
-  if stateStrict st 
-    then do notFollowedBy emailBlockQuoteStart
-            notFollowedBy (char '#')  -- atx header
-    else return () 
+  when (stateStrict st) $ do
+    notFollowedBy emailBlockQuoteStart
+    notFollowedBy (char '#')  -- atx header
   -- parse potential list-starts differently if in a list:
-  if stateParserContext st == ListItemState
-     then notFollowedBy' (bulletListStart <|> 
-                          (anyOrderedListStart >> return ()))
-     else return ()
+  when (stateParserContext st == ListItemState) $ do
+     notFollowedBy' bulletListStart
+     notFollowedBy' anyOrderedListStart
   return Space
 
 --
@@ -1118,9 +1122,16 @@ source =
 source' :: GenParser Char st (String, [Char])
 source' = do
   skipSpaces
-  let sourceURL excludes = many $
-        optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' '))
-  src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
+  let nl = char '\n' >>~ notFollowedBy blankline
+  let sourceURL = liftM unwords $ many $ try $ do
+                    notFollowedBy' linkTitle
+                    skipMany (oneOf " \t")
+                    optional nl
+                    skipMany (oneOf " \t")
+                    many1 (noneOf " \t\n")
+  let betweenAngles = try $ char '<' >>
+                            manyTill (noneOf ">\n" <|> nl) (char '>')
+  src <- try betweenAngles <|> sourceURL
   tit <- option "" linkTitle
   skipSpaces
   eof
@@ -1139,10 +1150,7 @@ link :: GenParser Char ParserState Inline
 link = try $ do
   lab <- reference
   (src, tit) <- source <|> referenceLink lab
-  sanitize <- getState >>= return . stateSanitizeHTML
-  if sanitize && unsanitaryURI src
-     then fail "Unsanitary URI"
-     else return $ Link lab (src, tit)
+  return $ Link lab (src, tit)
 
 -- a link like [this][ref] or [this][] or [this]
 referenceLink :: [Inline]
@@ -1162,12 +1170,9 @@ autoLink = try $ do
   (orig, src) <- uri <|> emailAddress
   char '>'
   st <- getState
-  let sanitize = stateSanitizeHTML st
-  if sanitize && unsanitaryURI src
-     then fail "Unsanitary URI"
-     else return $ if stateStrict st
-                      then Link [Str orig] (src, "")
-                      else Link [Code orig] (src, "")
+  return $ if stateStrict st
+              then Link [Str orig] (src, "")
+              else Link [Code orig] (src, "")
 
 image :: GenParser Char ParserState Inline
 image = try $ do
diff --git a/src/pandoc.hs b/src/pandoc.hs
index e8220de34..52dfb731a 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -29,7 +29,6 @@ Parses command-line options and calls the appropriate readers and
 writers.
 -}
 module Main where
-import Text.JSON.Generic (encodeJSON, decodeJSON)
 import Text.Pandoc
 import Text.Pandoc.S5 (s5HeaderIncludes)
 import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile,
@@ -81,60 +80,6 @@ wrapWords c = wrap' c c where
                                                        then ",\n" ++ x ++ wrap' cols (cols - length x) xs
                                                        else ", "  ++ x ++ wrap' cols (remaining - (length x + 2)) xs
 
--- | Association list of formats and readers.
-readers :: [(String, ParserState -> String -> Pandoc)]
-readers = [("native"       , readPandoc)
-          ,("json"         , readJSON)
-          ,("markdown"     , readMarkdown)
-          ,("markdown+lhs" , readMarkdown)
-          ,("rst"          , readRST)
-          ,("textile"      , readTextile) -- TODO : textile+lhs 
-          ,("rst+lhs"      , readRST)
-          ,("html"         , readHtml)
-          ,("latex"        , readLaTeX)
-          ,("latex+lhs"    , readLaTeX)
-          ]
-
--- | Reader for native Pandoc format.
-readPandoc :: ParserState -> String -> Pandoc
-readPandoc _ = read
-
--- | Reader for JSON version of Pandoc AST.
-readJSON :: ParserState -> String -> Pandoc
-readJSON _ = decodeJSON
-
--- | Writer for JSON version of Pandoc AST.
-writeJSON :: WriterOptions -> Pandoc -> String
-writeJSON _ = encodeJSON
-
--- | Association list of formats and writers.
-writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
-writers = [("native"       , writeNative)
-          ,("json"         , writeJSON)
-          ,("html"         , writeHtmlString)
-          ,("html+lhs"     , writeHtmlString)
-          ,("s5"           , writeHtmlString)
-          ,("slidy"        , writeHtmlString)
-          ,("docbook"      , writeDocbook)
-          ,("opendocument" , writeOpenDocument)
-          ,("odt"          , \_ _ -> "")
-          ,("epub"         , \_ _ -> "")
-          ,("latex"        , writeLaTeX)
-          ,("latex+lhs"    , writeLaTeX)
-          ,("context"      , writeConTeXt)
-          ,("texinfo"      , writeTexinfo)
-          ,("man"          , writeMan)
-          ,("markdown"     , writeMarkdown)
-          ,("markdown+lhs" , writeMarkdown)
-          ,("plain"        , writePlain)
-          ,("rst"          , writeRST)
-          ,("rst+lhs"      , writeRST)
-          ,("mediawiki"    , writeMediaWiki)
-          ,("textile"      , writeTextile)
-          ,("rtf"          , writeRTF)
-          ,("org"          , writeOrg)
-          ]
-
 isNonTextOutput :: String -> Bool
 isNonTextOutput = (`elem` ["odt","epub"])
 
@@ -166,7 +111,6 @@ data Opt = Opt
     , optStrict            :: Bool    -- ^ Use strict markdown syntax
     , optReferenceLinks    :: Bool    -- ^ Use reference links in writing markdown, rst
     , optWrapText          :: Bool    -- ^ Wrap text
-    , optSanitizeHTML      :: Bool    -- ^ Sanitize HTML
     , optPlugins           :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply
     , optEmailObfuscation  :: ObfuscationMethod
     , optIdentifierPrefix  :: String
@@ -205,7 +149,6 @@ defaultOpts = Opt
     , optStrict            = False
     , optReferenceLinks    = False
     , optWrapText          = True
-    , optSanitizeHTML      = False
     , optPlugins           = []
     , optEmailObfuscation  = JavascriptObfuscation
     , optIdentifierPrefix  = ""
@@ -223,13 +166,13 @@ options =
                  (ReqArg
                   (\arg opt -> return opt { optReader = map toLower arg })
                   "FORMAT")
-                 "" -- ("(" ++ (intercalate ", " $ map fst readers) ++ ")")
+                 ""
 
     , Option "tw" ["to","write"]
                  (ReqArg
                   (\arg opt -> return opt { optWriter = map toLower arg })
                   "FORMAT")
-                 "" -- ("(" ++ (intercalate ", " $ map fst writers) ++ ")")
+                 ""
 
     , Option "s" ["standalone"]
                  (NoArg
@@ -355,11 +298,6 @@ options =
                   (\opt -> return opt { optWrapText = False }))
                  "" -- "Do not wrap text in output"
 
-    , Option "" ["sanitize-html"]
-                 (NoArg
-                  (\opt -> return opt { optSanitizeHTML = True }))
-                 "" -- "Sanitize HTML"
-
     , Option "" ["email-obfuscation"]
                  (ReqArg
                   (\arg opt -> do
@@ -579,7 +517,7 @@ usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
 usageMessage programName = usageInfo
   (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats:  " ++
   (intercalate ", " $ map fst readers) ++ "\nOutput formats:  " ++
-  (intercalate ", " $ map fst writers) ++ "\nOptions:")
+  (intercalate ", " $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:")
 
 -- Determine default reader based on source file extensions
 defaultReaderName :: String -> [FilePath] -> String
@@ -684,7 +622,6 @@ main = do
               , optStrict            = strict
               , optReferenceLinks    = referenceLinks
               , optWrapText          = wrap
-              , optSanitizeHTML      = sanitize
               , optEmailObfuscation  = obfuscationMethod
               , optIdentifierPrefix  = idPrefix
               , optIndentedCodeClasses = codeBlockClasses
@@ -730,8 +667,8 @@ main = do
      Nothing -> error ("Unknown reader: " ++ readerName')
 
   let writer = case lookup writerName' writers of
-                Just _ | writerName' == "epub" -> writeEPUB epubStylesheet
-                Just _ | writerName' == "odt"  -> writeODT referenceODT
+                Nothing | writerName' == "epub" -> writeEPUB epubStylesheet
+                Nothing | writerName' == "odt"  -> writeODT referenceODT
                 Just r                         -> \o ->
                                                      return . fromString . r o
                 Nothing                        -> error $ "Unknown writer: " ++
@@ -783,7 +720,6 @@ main = do
   let startParserState =
          defaultParserState { stateParseRaw        = parseRaw,
                               stateTabStop         = tabStop,
-                              stateSanitizeHTML    = sanitize,
                               stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' ||
                                                      lhsExtension sources,
                               stateStandalone      = standalone',
diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native
index 784b14ccc..6fce927c1 100644
--- a/tests/markdown-reader-more.native
+++ b/tests/markdown-reader-more.native
@@ -6,7 +6,7 @@ Pandoc (Meta {docTitle = [Str "Title",Space,Str "spanning",Space,Str "multiple",
 , Para [TeX "\\placeformula",Space,TeX "\\startformula\n   L_{1} = L_{2}\n   \\stopformula"]
 , Para [TeX "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]"]
 , Header 2 [Str "URLs",Space,Str "with",Space,Str "spaces"]
-, Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20%20and%20%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")]
+, Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")]
 , Para [Link [Str "baz"] ("/foo%20foo",""),Space,Link [Str "bam"] ("/foo%20fee",""),Space,Link [Str "bork"] ("/foo/zee%20zob","title")]
 , Header 2 [Str "Horizontal",Space,Str "rules",Space,Str "with",Space,Str "spaces",Space,Str "at",Space,Str "end"]
 , HorizontalRule
diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt
index f4540d84d..258002b8a 100644
--- a/tests/markdown-reader-more.txt
+++ b/tests/markdown-reader-more.txt
@@ -31,7 +31,8 @@
 ## URLs with spaces
 
 [foo](/bar and baz)
-[foo](/bar and baz )
+[foo](/bar
+ and baz )
 [foo]( /bar  and  baz  )
 [foo](bar baz  "title" )