1 files changed, 0 insertions, 675 deletions
diff --git a/Text/Pandoc/Readers/HTML.hs b/Text/Pandoc/Readers/HTML.hs
deleted file mode 100644
index 65e512b5e..000000000
--- a/Text/Pandoc/Readers/HTML.hs
+++ /dev/null
@@ -1,675 +0,0 @@
-{-
-Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Readers.HTML
-   Copyright   : Copyright (C) 2006-8 John MacFarlane
-   License     : GNU GPL, version 2 or above 
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha 
-   Portability : portable
-
-Conversion of HTML to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.HTML ( 
-                                 readHtml, 
-                                 rawHtmlInline, 
-                                 rawHtmlBlock, 
-                                 anyHtmlBlockTag, 
-                                 anyHtmlInlineTag,  
-                                 anyHtmlTag,
-                                 anyHtmlEndTag,
-                                 htmlEndTag,
-                                 extractTagType,
-                                 htmlBlockElement,
-                                 unsanitaryURI
-                                ) where
-
-import Text.ParserCombinators.Parsec
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared 
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
-import Data.Maybe ( fromMaybe )
-import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf, intercalate )
-import Data.Char ( toLower, isAlphaNum )
-import Network.URI ( parseURIReference, URI (..) )
-
--- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: ParserState   -- ^ Parser state
-         -> String        -- ^ String to parse
-         -> Pandoc
-readHtml = readWith parseHtml
-
---
--- Constants
---
-
-eitherBlockOrInline :: [[Char]]
-eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
-                  "map", "area", "object"]
-
-{-
-inlineHtmlTags :: [[Char]]
-inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
-                  "br", "cite", "code", "dfn", "em", "font", "i", "img",
-                  "input", "kbd", "label", "q", "s", "samp", "select",
-                  "small", "span", "strike", "strong", "sub", "sup",
-                  "textarea", "tt", "u", "var"] ++ eitherBlockOrInline
--}
-
-blockHtmlTags :: [[Char]]
-blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
-                 "dl", "fieldset", "form", "h1", "h2", "h3", "h4",
-                 "h5", "h6", "hr", "html", "isindex", "menu", "noframes",
-                 "noscript", "ol", "p", "pre", "table", "ul", "dd",
-                 "dt", "frameset", "li", "tbody", "td", "tfoot",
-                 "th", "thead", "tr", "script"] ++ eitherBlockOrInline
-
-sanitaryTags :: [[Char]]
-sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big",
-                "blockquote", "br", "button", "caption", "center",
-                "cite", "code", "col", "colgroup", "dd", "del", "dfn",
-                "dir", "div", "dl", "dt", "em", "fieldset", "font",
-                "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
-                "i", "img", "input", "ins", "kbd", "label", "legend",
-                "li", "map", "menu", "ol", "optgroup", "option", "p",
-                "pre", "q", "s", "samp", "select", "small", "span",
-                "strike", "strong", "sub", "sup", "table", "tbody",
-                "td", "textarea", "tfoot", "th", "thead", "tr", "tt",
-                "u", "ul", "var"]
-
-sanitaryAttributes :: [[Char]]
-sanitaryAttributes = ["abbr", "accept", "accept-charset",
-                      "accesskey", "action", "align", "alt", "axis",
-                      "border", "cellpadding", "cellspacing", "char",
-                      "charoff", "charset", "checked", "cite", "class",
-                      "clear", "cols", "colspan", "color", "compact",
-                      "coords", "datetime", "dir", "disabled",
-                      "enctype", "for", "frame", "headers", "height",
-                      "href", "hreflang", "hspace", "id", "ismap",
-                      "label", "lang", "longdesc", "maxlength", "media",
-                      "method", "multiple", "name", "nohref", "noshade",
-                      "nowrap", "prompt", "readonly", "rel", "rev",
-                      "rows", "rowspan", "rules", "scope", "selected",
-                      "shape", "size", "span", "src", "start",
-                      "summary", "tabindex", "target", "title", "type",
-                      "usemap", "valign", "value", "vspace", "width"]
-
---
--- HTML utility functions
---
-
--- | Returns @True@ if sanitization is specified and the specified tag is 
---  not on the sanitized tag list.
-unsanitaryTag :: [Char]
-              -> GenParser tok ParserState Bool
-unsanitaryTag tag = do
-  st <- getState
-  return $ stateSanitizeHTML st && tag `notElem` sanitaryTags
-
--- | returns @True@ if sanitization is specified and the specified attribute
---  is not on the sanitized attribute list.
-unsanitaryAttribute :: ([Char], String, t)
-                    -> GenParser tok ParserState Bool
-unsanitaryAttribute (attr, val, _) = do
-  st <- getState
-  return $ stateSanitizeHTML st &&
-           (attr `notElem` sanitaryAttributes ||
-             (attr `elem` ["href","src"] && unsanitaryURI val))
-
--- | Returns @True@ if the specified URI is potentially a security risk.
-unsanitaryURI :: String -> Bool
-unsanitaryURI u =
-  let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:",
-             "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:",
-             "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:",
-             "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:",
-             "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:",
-             "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:",
-             "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:",
-             "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
-             "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
-             "snews:", "webcal:", "ymsgr:"]
-  in  case parseURIReference u of
-           Just p  -> (map toLower $ uriScheme p) `notElem` safeURISchemes
-           Nothing -> True
-
--- | Read blocks until end tag.
-blocksTilEnd :: String -> GenParser Char ParserState [Block]
-blocksTilEnd tag = do
-  blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
-  return $ filter (/= Null) blocks
-
--- | Read inlines until end tag.
-inlinesTilEnd :: String -> GenParser Char ParserState [Inline]
-inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-
--- | Parse blocks between open and close tag.
-blocksIn :: String -> GenParser Char ParserState [Block]
-blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
-
--- | Parse inlines between open and close tag.
-inlinesIn :: String -> GenParser Char ParserState [Inline]
-inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
-
--- | Extract type from a tag:  e.g. @br@ from @\<br\>@
-extractTagType :: String -> String
-extractTagType ('<':rest) = 
-  let isSpaceOrSlash c = c `elem` "/ \n\t" in
-  map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
-extractTagType _ = ""
-
--- | Parse any HTML tag (opening or self-closing) and return text of tag
-anyHtmlTag :: GenParser Char ParserState [Char]
-anyHtmlTag = try $ do
-  char '<'
-  spaces
-  tag <- many1 alphaNum
-  attribs <- many htmlAttribute
-  spaces
-  ender <- option "" (string "/")
-  let ender' = if null ender then "" else " /"
-  spaces
-  char '>'
-  let result = "<" ++ tag ++ 
-               concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
-  unsanitary <- unsanitaryTag tag
-  if unsanitary
-     then return $ "<!-- unsafe HTML removed -->"
-     else return result
-
-anyHtmlEndTag :: GenParser Char ParserState [Char]
-anyHtmlEndTag = try $ do
-  char '<'   
-  spaces
-  char '/'
-  spaces
-  tag <- many1 alphaNum
-  spaces
-  char '>'
-  let result = "</" ++ tag ++ ">"
-  unsanitary <- unsanitaryTag tag
-  if unsanitary
-     then return $ "<!-- unsafe HTML removed -->"
-     else return result 
-
-htmlTag :: String -> GenParser Char ParserState (String, [(String, String)])
-htmlTag tag = try $ do
-  char '<'
-  spaces
-  stringAnyCase tag
-  attribs <- many htmlAttribute
-  spaces
-  optional (string "/")
-  spaces
-  char '>'
-  return (tag, (map (\(name, content, _) -> (name, content)) attribs))
-
--- parses a quoted html attribute value
-quoted :: Char -> GenParser Char st (String, String)
-quoted quoteChar = do
-  result <- between (char quoteChar) (char quoteChar) 
-                    (many (noneOf [quoteChar]))
-  return (result, [quoteChar])
-
-nullAttribute :: ([Char], [Char], [Char])
-nullAttribute = ("", "", "") 
-
-htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char])
-htmlAttribute = do
-  attr <- htmlRegularAttribute <|> htmlMinimizedAttribute
-  unsanitary <- unsanitaryAttribute attr
-  if unsanitary
-     then return nullAttribute
-     else return attr
-
--- minimized boolean attribute
-htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char])
-htmlMinimizedAttribute = try $ do
-  many1 space
-  name <- many1 (choice [letter, oneOf ".-_:"])
-  return (name, name, name)
-
-htmlRegularAttribute :: GenParser Char st ([Char], [Char], [Char])
-htmlRegularAttribute = try $ do
-  many1 space
-  name <- many1 (choice [letter, oneOf ".-_:"])
-  spaces
-  char '='
-  spaces
-  (content, quoteStr) <- choice [ (quoted '\''), 
-                                  (quoted '"'), 
-                                  (do
-                                     a <- many (alphaNum <|> (oneOf "-._:"))
-                                     return (a,"")) ]
-  return (name, content,
-          (name ++ "=" ++ quoteStr ++ content ++ quoteStr))
-
--- | Parse an end tag of type 'tag'
-htmlEndTag :: [Char] -> GenParser Char st [Char]
-htmlEndTag tag = try $ do
-  char '<'   
-  spaces
-  char '/'
-  spaces
-  stringAnyCase tag
-  spaces
-  char '>'
-  return $ "</" ++ tag ++ ">"
-
-{-
--- | Returns @True@ if the tag is (or can be) an inline tag.
-isInline :: String -> Bool
-isInline tag = (extractTagType tag) `elem` inlineHtmlTags
--}
-
--- | Returns @True@ if the tag is (or can be) a block tag.
-isBlock :: String -> Bool
-isBlock tag = (extractTagType tag) `elem` blockHtmlTags 
-
-anyHtmlBlockTag :: GenParser Char ParserState [Char]
-anyHtmlBlockTag = try $ do
-  tag <- anyHtmlTag <|> anyHtmlEndTag
-  if isBlock tag then return tag else fail "not a block tag"
-
-anyHtmlInlineTag :: GenParser Char ParserState [Char]
-anyHtmlInlineTag = try $ do
-  tag <- anyHtmlTag <|> anyHtmlEndTag
-  if not (isBlock tag) then return tag else fail "not an inline tag"
-
--- | Parses material between script tags.
--- Scripts must be treated differently, because they can contain '<>' etc.
-htmlScript :: GenParser Char ParserState [Char]
-htmlScript = try $ do
-  open <- string "<script"
-  rest <- manyTill anyChar (htmlEndTag "script")
-  st <- getState
-  if stateSanitizeHTML st && not ("script" `elem` sanitaryTags)
-     then return "<!-- unsafe HTML removed -->"
-     else return $ open ++ rest ++ "</script>"
-
--- | Parses material between style tags.
--- Style tags must be treated differently, because they can contain CSS
-htmlStyle :: GenParser Char ParserState [Char]
-htmlStyle = try $ do
-  open <- string "<style"
-  rest <- manyTill anyChar (htmlEndTag "style")
-  st <- getState
-  if stateSanitizeHTML st && not ("style" `elem` sanitaryTags)
-     then return "<!-- unsafe HTML removed -->"
-     else return $ open ++ rest ++ "</style>"
-
-htmlBlockElement :: GenParser Char ParserState [Char]
-htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ]
-
-rawHtmlBlock :: GenParser Char ParserState Block
-rawHtmlBlock = try $ do
-  body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag
-  state <- getState
-  if stateParseRaw state then return (RawHtml body) else return Null
-
--- This is a block whose contents should be passed through verbatim, not interpreted.
-rawVerbatimBlock :: GenParser Char ParserState [Char]
-rawVerbatimBlock = try $ do
-  start <- anyHtmlBlockTag
-  let tagtype = extractTagType start
-  if tagtype `elem` ["pre"]
-     then do
-       contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar)
-       end <- htmlEndTag tagtype
-       return $ start ++ contents ++ end
-     else fail "Not a verbatim block"
-
--- We don't want to parse </body> or </html> as raw HTML, since these
--- are handled in parseHtml.
-rawHtmlBlock' :: GenParser Char ParserState Block
-rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
-                   rawHtmlBlock
-
--- | Parses an HTML comment.
-htmlComment :: GenParser Char st [Char]
-htmlComment = try $ do
-  string "<!--"
-  comment <- manyTill anyChar (try (string "-->"))
-  return $ "<!--" ++ comment ++ "-->"
-
---
--- parsing documents
---
-
-xmlDec :: GenParser Char st [Char]
-xmlDec = try $ do
-  string "<?"
-  rest <- manyTill anyChar (char '>')
-  return $ "<?" ++ rest ++ ">"
-
-definition :: GenParser Char st [Char]
-definition = try $ do
-  string "<!"
-  rest <- manyTill anyChar (char '>')
-  return $ "<!" ++ rest ++ ">"
-
-nonTitleNonHead :: GenParser Char ParserState Char
-nonTitleNonHead = try $ do
-  notFollowedBy $ (htmlTag "title" >> return ' ') <|> 
-                  (htmlEndTag "head" >> return ' ')
-  (rawHtmlBlock >> return ' ') <|> anyChar
-
-parseTitle :: GenParser Char ParserState [Inline]
-parseTitle = try $ do
-  (tag, _) <- htmlTag "title"
-  contents <- inlinesTilEnd tag
-  spaces
-  return contents
-
--- parse header and return meta-information (for now, just title)
-parseHead :: GenParser Char ParserState ([Inline], [a], [Char])
-parseHead = try $ do
-  htmlTag "head"
-  spaces
-  skipMany nonTitleNonHead
-  contents <- option [] parseTitle
-  skipMany nonTitleNonHead
-  htmlEndTag "head"
-  return (contents, [], "")
-
-skipHtmlTag :: String -> GenParser Char ParserState ()
-skipHtmlTag tag = optional (htmlTag tag)
-
--- h1 class="title" representation of title in body
-bodyTitle :: GenParser Char ParserState [Inline]
-bodyTitle = try $ do
-  (_, attribs) <- htmlTag "h1"  
-  case (extractAttribute "class" attribs) of
-       Just "title" -> return ""
-       _            -> fail "not title"
-  inlinesTilEnd "h1"
-
-parseHtml :: GenParser Char ParserState Pandoc
-parseHtml = do
-  sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
-  skipHtmlTag "html"
-  spaces
-  (title, authors, date) <- option ([], [], "") parseHead 
-  spaces
-  skipHtmlTag "body"
-  spaces
-  optional bodyTitle  -- skip title in body, because it's represented in meta
-  blocks <- parseBlocks
-  spaces
-  optional (htmlEndTag "body")
-  spaces
-  optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html>
-  eof
-  return $ Pandoc (Meta title authors date) blocks
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
-
-block :: GenParser Char ParserState Block
-block = choice [ codeBlock
-               , header
-               , hrule
-               , list
-               , blockQuote
-               , para
-               , plain
-               , rawHtmlBlock'
-               ] <?> "block"
-
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
-
-headerLevel :: Int -> GenParser Char ParserState Block
-headerLevel n = try $ do
-    let level = "h" ++ show n
-    htmlTag level
-    contents <- inlinesTilEnd level
-    return $ Header n (normalizeSpaces contents)
-
---
--- hrule block
---
-
-hrule :: GenParser Char ParserState Block
-hrule = try  $ do
-  (_, attribs) <- htmlTag "hr"
-  state <- getState
-  if not (null attribs) && stateParseRaw state
-     then unexpected "attributes in hr" -- parse as raw in this case
-     else return HorizontalRule
-
---
--- code blocks
---
-
--- Note:  HTML tags in code blocks (e.g. for syntax highlighting) are 
--- skipped, because they are not portable to output formats other than HTML.
-codeBlock :: GenParser Char ParserState Block
-codeBlock = try $ do
-    htmlTag "pre" 
-    result <- manyTill 
-              (many1 (satisfy (/= '<')) <|> 
-               ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
-              (htmlEndTag "pre")
-    let result' = concat result
-    -- drop leading newline if any
-    let result'' = if "\n" `isPrefixOf` result'
-                      then drop 1 result'
-                      else result'
-    -- drop trailing newline if any
-    let result''' = if "\n" `isSuffixOf` result''
-                       then init result''
-                       else result''
-    return $ CodeBlock ("",[],[]) $ decodeCharacterReferences result'''
-
---
--- block quotes
---
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = try $ htmlTag "blockquote" >> spaces >> 
-                   blocksTilEnd "blockquote" >>= (return . BlockQuote)
-
---
--- list blocks
---
-
-list :: GenParser Char ParserState Block
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
-  (_, attribs) <- htmlTag "ol"
-  (start, style) <- option (1, DefaultStyle) $
-                           do failIfStrict
-                              let sta = fromMaybe "1" $ 
-                                        lookup "start" attribs
-                              let sty = fromMaybe (fromMaybe "" $
-                                        lookup "style" attribs) $
-                                        lookup "class" attribs
-                              let sty' = case sty of
-                                          "lower-roman"  -> LowerRoman
-                                          "upper-roman"  -> UpperRoman
-                                          "lower-alpha"  -> LowerAlpha
-                                          "upper-alpha"  -> UpperAlpha
-                                          "decimal"      -> Decimal
-                                          _              -> DefaultStyle
-                              return (read sta, sty')
-  spaces
-  items <- sepEndBy1 (blocksIn "li") spaces
-  htmlEndTag "ol"
-  return $ OrderedList (start, style, DefaultDelim) items
-
-bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
-  htmlTag "ul"
-  spaces
-  items <- sepEndBy1 (blocksIn "li") spaces
-  htmlEndTag "ul"
-  return $ BulletList items
-
-definitionList :: GenParser Char ParserState Block
-definitionList = try $ do
-  failIfStrict  -- def lists not part of standard markdown
-  htmlTag "dl"
-  spaces
-  items <- sepEndBy1 definitionListItem spaces
-  htmlEndTag "dl"
-  return $ DefinitionList items
-
-definitionListItem :: GenParser Char ParserState ([Inline], [Block])
-definitionListItem = try $ do
-  terms <- sepEndBy1 (inlinesIn "dt") spaces
-  defs <- sepEndBy1 (blocksIn "dd") spaces
-  let term = intercalate [LineBreak] terms
-  return (term, concat defs)
-
---
--- paragraph block
---
-
-para :: GenParser Char ParserState Block
-para = try $ htmlTag "p" >> inlinesTilEnd "p" >>= 
-             return . Para . normalizeSpaces
-
--- 
--- plain block
---
-
-plain :: GenParser Char ParserState Block
-plain = many1 inline >>= return . Plain . normalizeSpaces
-
--- 
--- inline
---
-
-inline :: GenParser Char ParserState Inline
-inline = choice [ charRef
-                , strong
-                , emph
-                , superscript
-                , subscript
-                , strikeout
-                , spanStrikeout
-                , code
-                , str
-                , linebreak
-                , whitespace
-                , link
-                , image
-                , rawHtmlInline
-                ] <?> "inline"
-
-code :: GenParser Char ParserState Inline
-code = try $ do 
-  htmlTag "code"
-  result <- manyTill anyChar (htmlEndTag "code")
-  -- remove internal line breaks, leading and trailing space,
-  -- and decode character references
-  return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ 
-                  intercalate " " $ lines result
-
-rawHtmlInline :: GenParser Char ParserState Inline
-rawHtmlInline = do
-  result <- htmlScript <|> htmlStyle <|> htmlComment <|> anyHtmlInlineTag
-  state <- getState
-  if stateParseRaw state then return (HtmlInline result) else return (Str "")
-
-betweenTags :: [Char] -> GenParser Char ParserState [Inline]
-betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= 
-                        return . normalizeSpaces
-
-emph :: GenParser Char ParserState Inline
-emph = (betweenTags "em" <|> betweenTags "i") >>= return . Emph
-
-strong :: GenParser Char ParserState Inline
-strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
-
-superscript :: GenParser Char ParserState Inline
-superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
-
-subscript :: GenParser Char ParserState Inline
-subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
-
-strikeout :: GenParser Char ParserState Inline
-strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
-            return . Strikeout
-
-spanStrikeout :: GenParser Char ParserState Inline
-spanStrikeout = try $ do
-  failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
-  (_, attributes) <- htmlTag "span" 
-  result <- case (extractAttribute "class" attributes) of
-              Just "strikeout" -> inlinesTilEnd "span"
-              _                -> fail "not a strikeout"
-  return $ Strikeout result
-
-whitespace :: GenParser Char st Inline
-whitespace = many1 space >> return Space
-
--- hard line break
-linebreak :: GenParser Char ParserState Inline
-linebreak = htmlTag "br" >> optional newline >> return LineBreak
-
-str :: GenParser Char st Inline
-str = many1 (noneOf "<& \t\n") >>= return . Str
-
---
--- links and images
---
-
--- extract contents of attribute (attribute names are case-insensitive)
-extractAttribute :: [Char] -> [([Char], String)] -> Maybe String
-extractAttribute _    [] = Nothing
-extractAttribute name ((attrName, contents):rest) = 
-  let name'     = map toLower name 
-      attrName' = map toLower attrName
-  in  if attrName' == name'
-         then Just (decodeCharacterReferences contents)
-         else extractAttribute name rest
-
-link :: GenParser Char ParserState Inline
-link = try $ do
-  (_, attributes) <- htmlTag "a"  
-  url <- case (extractAttribute "href" attributes) of
-           Just url -> return url
-           Nothing  -> fail "no href"
-  let title = fromMaybe "" $ extractAttribute "title" attributes
-  lab <- inlinesTilEnd "a"
-  return $ Link (normalizeSpaces lab) (url, title)
-
-image :: GenParser Char ParserState Inline
-image = try $ do
-  (_, attributes) <- htmlTag "img" 
-  url <- case (extractAttribute "src" attributes) of
-           Just url -> return url
-           Nothing  -> fail "no src"
-  let title = fromMaybe "" $ extractAttribute "title" attributes
-  let alt = fromMaybe "" (extractAttribute "alt" attributes)
-  return $ Image [Str alt] (url, title)
-