aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-08-07 11:08:53 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-08-07 11:08:53 -0700
commit74c31abb1aa696760d20efb202369e1cb0ecfd20 (patch)
treeca577819929a6e6f52f8c270606c673ad2bc7d76 /src
parent3e8590d8a47be8e45fb20f25530399b2134a52ab (diff)
parent18b1b21a6af5638f16c9aca745f463bfd65e8417 (diff)
downloadpandoc-74c31abb1aa696760d20efb202369e1cb0ecfd20.tar.gz
Merge pull request #2327 from hftf/list-style
HTML Reader: Correctly parse inline list-style(-type) for <ol>
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/CSS.hs35
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs58
2 files changed, 70 insertions, 23 deletions
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
new file mode 100644
index 000000000..32a5ea129
--- /dev/null
+++ b/src/Text/Pandoc/CSS.hs
@@ -0,0 +1,35 @@
+module Text.Pandoc.CSS ( foldOrElse,
+ pickStyleAttrProps
+ )
+where
+
+import Text.Pandoc.Shared (trim)
+import Text.Parsec
+import Text.Parsec.String
+import Control.Applicative ((<*))
+
+ruleParser :: Parser (String, String)
+ruleParser = do
+ p <- many1 (noneOf ":") <* char ':'
+ v <- many1 (noneOf ":;") <* char ';' <* spaces
+ return (trim p, trim v)
+
+styleAttrParser :: Parser [(String, String)]
+styleAttrParser = do
+ p <- many1 ruleParser
+ return p
+
+orElse :: Eq a => a -> a -> a -> a
+orElse v x y = if v == x then y else x
+
+foldOrElse :: Eq a => a -> [a] -> a
+foldOrElse v xs = foldr (orElse v) v xs
+
+eitherToMaybe :: Either a b -> Maybe b
+eitherToMaybe (Right x) = Just x
+eitherToMaybe _ = Nothing
+
+pickStyleAttrProps :: [String] -> String -> Maybe String
+pickStyleAttrProps lookupProps styleAttr = do
+ styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr
+ foldOrElse Nothing $ map (flip lookup styles) lookupProps
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fcba16e04..02bfcb2bb 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -64,6 +64,7 @@ import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
import Network.URI (isURI)
import Text.Pandoc.Error
+import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Parsec.Error
@@ -252,6 +253,22 @@ pListItem nonItem = do
let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
(liDiv <>) <$> pInTags "li" block <* skipMany nonItem
+parseListStyleType :: String -> ListNumberStyle
+parseListStyleType "lower-roman" = LowerRoman
+parseListStyleType "upper-roman" = UpperRoman
+parseListStyleType "lower-alpha" = LowerAlpha
+parseListStyleType "upper-alpha" = UpperAlpha
+parseListStyleType "decimal" = Decimal
+parseListStyleType _ = DefaultStyle
+
+parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr "i" = LowerRoman
+parseTypeAttr "I" = UpperRoman
+parseTypeAttr "a" = LowerAlpha
+parseTypeAttr "A" = UpperAlpha
+parseTypeAttr "1" = Decimal
+parseTypeAttr _ = DefaultStyle
+
pOrderedList :: TagParser Blocks
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
@@ -261,23 +278,19 @@ pOrderedList = try $ do
sta' = if all isDigit sta
then read sta
else 1
- sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ ->
- case lookup "type" attribs of
- Just "1" -> Decimal
- Just "I" -> UpperRoman
- Just "i" -> LowerRoman
- Just "A" -> UpperAlpha
- Just "a" -> LowerAlpha
- _ -> DefaultStyle
+
+ pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]
+
+ typeAttr = fromMaybe "" $ lookup "type" attribs
+ classAttr = fromMaybe "" $ lookup "class" attribs
+ styleAttr = fromMaybe "" $ lookup "style" attribs
+ listStyle = fromMaybe "" $ pickListStyle styleAttr
+
+ sty' = foldOrElse DefaultStyle
+ [ parseTypeAttr typeAttr
+ , parseListStyleType classAttr
+ , parseListStyleType listStyle
+ ]
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (t ~== TagClose "ol"))
@@ -622,12 +635,11 @@ pSpan = try $ do
guardEnabled Ext_native_spans
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
contents <- pInTags "span" inline
- let attr' = mkAttr attr
- return $ case attr' of
- ("",[],[("style",s)])
- | filter (`notElem` " \t;") s == "font-variant:small-caps" ->
- B.smallcaps contents
- _ -> B.spanWith (mkAttr attr) contents
+ let isSmallCaps = fontVariant == "small-caps"
+ where styleAttr = fromMaybe "" $ lookup "style" attr
+ fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
+ let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
+ return $ tag contents
pRawHtmlInline :: TagParser Inlines
pRawHtmlInline = do