From d5b7257d7f926cac07aaa6af5721c567a5debb07 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 15 Sep 2007 00:44:32 +0000 Subject: Simplified HTML attribute parsing (HTML reader). git-svn-id: https://pandoc.googlecode.com/svn/trunk@1016 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/HTML.hs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 80ef01da7..b6aac2b48 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -105,13 +105,14 @@ anyHtmlTag = try $ do char '<' spaces tag <- many1 alphaNum - attribs <- htmlAttributes + attribs <- many htmlAttribute spaces ender <- option "" (string "/") let ender' = if null ender then "" else " /" spaces char '>' - return $ "<" ++ tag ++ attribs ++ ender' ++ ">" + return $ "<" ++ tag ++ + concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">" anyHtmlEndTag = try $ do char '<' @@ -141,19 +142,13 @@ quoted quoteChar = do (many (noneOf [quoteChar])) return (result, [quoteChar]) -htmlAttributes = do - attrList <- many htmlAttribute - return $ concatMap (\(name, content, raw) -> raw) attrList - htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute -- minimized boolean attribute htmlMinimizedAttribute = try $ do many1 space name <- many1 (choice [letter, oneOf ".-_:"]) - notFollowedBy (spaces >> char '=') - let content = name - return (name, content, (" " ++ name)) + return (name, name, name) htmlRegularAttribute = try $ do many1 space @@ -167,7 +162,7 @@ htmlRegularAttribute = try $ do a <- many (alphaNum <|> (oneOf "-._:")) return (a,"")) ] return (name, content, - (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)) + (name ++ "=" ++ quoteStr ++ content ++ quoteStr)) -- | Parse an end tag of type 'tag' htmlEndTag tag = try $ do -- cgit v1.2.3