diff options
-rw-r--r-- | pandoc.cabal | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/CSS.hs | 35 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 58 | ||||
-rw-r--r-- | tests/html-reader.html | 7 | ||||
-rw-r--r-- | tests/html-reader.native | 13 |
5 files changed, 92 insertions, 24 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index d2bbff0c2..9c0d0c3ef 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -362,7 +362,8 @@ Library Text.Pandoc.Templates, Text.Pandoc.XML, Text.Pandoc.SelfContained, - Text.Pandoc.Process + Text.Pandoc.Process, + Text.Pandoc.CSS Other-Modules: Text.Pandoc.Readers.Docx.Lists, Text.Pandoc.Readers.Docx.Reducible, Text.Pandoc.Readers.Docx.Parse, diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs new file mode 100644 index 000000000..32a5ea129 --- /dev/null +++ b/src/Text/Pandoc/CSS.hs @@ -0,0 +1,35 @@ +module Text.Pandoc.CSS ( foldOrElse, + pickStyleAttrProps + ) +where + +import Text.Pandoc.Shared (trim) +import Text.Parsec +import Text.Parsec.String +import Control.Applicative ((<*)) + +ruleParser :: Parser (String, String) +ruleParser = do + p <- many1 (noneOf ":") <* char ':' + v <- many1 (noneOf ":;") <* char ';' <* spaces + return (trim p, trim v) + +styleAttrParser :: Parser [(String, String)] +styleAttrParser = do + p <- many1 ruleParser + return p + +orElse :: Eq a => a -> a -> a -> a +orElse v x y = if v == x then y else x + +foldOrElse :: Eq a => a -> [a] -> a +foldOrElse v xs = foldr (orElse v) v xs + +eitherToMaybe :: Either a b -> Maybe b +eitherToMaybe (Right x) = Just x +eitherToMaybe _ = Nothing + +pickStyleAttrProps :: [String] -> String -> Maybe String +pickStyleAttrProps lookupProps styleAttr = do + styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr + foldOrElse Nothing $ map (flip lookup styles) lookupProps diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index fcba16e04..02bfcb2bb 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -64,6 +64,7 @@ import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) import Network.URI (isURI) import Text.Pandoc.Error +import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Parsec.Error @@ -252,6 +253,22 @@ pListItem nonItem = do let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr) (liDiv <>) <$> pInTags "li" block <* skipMany nonItem +parseListStyleType :: String -> ListNumberStyle +parseListStyleType "lower-roman" = LowerRoman +parseListStyleType "upper-roman" = UpperRoman +parseListStyleType "lower-alpha" = LowerAlpha +parseListStyleType "upper-alpha" = UpperAlpha +parseListStyleType "decimal" = Decimal +parseListStyleType _ = DefaultStyle + +parseTypeAttr :: String -> ListNumberStyle +parseTypeAttr "i" = LowerRoman +parseTypeAttr "I" = UpperRoman +parseTypeAttr "a" = LowerAlpha +parseTypeAttr "A" = UpperAlpha +parseTypeAttr "1" = Decimal +parseTypeAttr _ = DefaultStyle + pOrderedList :: TagParser Blocks pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) @@ -261,23 +278,19 @@ pOrderedList = try $ do sta' = if all isDigit sta then read sta else 1 - sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> - case lookup "type" attribs of - Just "1" -> Decimal - Just "I" -> UpperRoman - Just "i" -> LowerRoman - Just "A" -> UpperAlpha - Just "a" -> LowerAlpha - _ -> DefaultStyle + + pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"] + + typeAttr = fromMaybe "" $ lookup "type" attribs + classAttr = fromMaybe "" $ lookup "class" attribs + styleAttr = fromMaybe "" $ lookup "style" attribs + listStyle = fromMaybe "" $ pickListStyle styleAttr + + sty' = foldOrElse DefaultStyle + [ parseTypeAttr typeAttr + , parseListStyleType classAttr + , parseListStyleType listStyle + ] let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && not (t ~== TagClose "ol")) @@ -622,12 +635,11 @@ pSpan = try $ do guardEnabled Ext_native_spans TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) contents <- pInTags "span" inline - let attr' = mkAttr attr - return $ case attr' of - ("",[],[("style",s)]) - | filter (`notElem` " \t;") s == "font-variant:small-caps" -> - B.smallcaps contents - _ -> B.spanWith (mkAttr attr) contents + let isSmallCaps = fontVariant == "small-caps" + where styleAttr = fromMaybe "" $ lookup "style" attr + fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr + let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) + return $ tag contents pRawHtmlInline :: TagParser Inlines pRawHtmlInline = do diff --git a/tests/html-reader.html b/tests/html-reader.html index b6dd50fcc..3bd5e4ce3 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -185,6 +185,13 @@ These should not be escaped: \$ \\ \> \[ \{ <li><p>Item 3.</p> </li> </ol> +<p>List styles:</p> +<ol></ol> +<ol type="i"></ol> +<ol class="lower-roman"></ol> +<ol style="lower-roman"></ol> +<ol style="list-style: lower-roman;"></ol> +<ol style="list-style-type: lower-roman;"></ol> <h2>Nested</h2> <ul> <li>Tab<ul> diff --git a/tests/html-reader.native b/tests/html-reader.native index b2d660fda..1abb0ba79 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -116,6 +116,19 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Str "Item",Space,Str "1.",Space,Str "graf",Space,Str "two.",Space,Str "The",Space,Str "quick",Space,Str "brown",Space,Str "fox",Space,Str "jumped",Space,Str "over",Space,Str "the",Space,Str "lazy",Space,Str "dog's",Space,Str "back."]] ,[Para [Str "Item",Space,Str "2."]] ,[Para [Str "Item",Space,Str "3."]]] +,Para [Str "List",Space,Str "styles:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [] +,OrderedList (1,LowerRoman,DefaultDelim) + [] +,OrderedList (1,LowerRoman,DefaultDelim) + [] +,OrderedList (1,DefaultStyle,DefaultDelim) + [] +,OrderedList (1,LowerRoman,DefaultDelim) + [] +,OrderedList (1,LowerRoman,DefaultDelim) + [] ,Header 2 ("",[],[]) [Str "Nested"] ,BulletList [[Plain [Str "Tab"] |