From 7ef8700734ea8caae083e372b51cfe7bf2c51f9b Mon Sep 17 00:00:00 2001
From: Ophir Lifshitz <hangfromthefloor@gmail.com>
Date: Fri, 24 Jul 2015 02:53:17 -0400
Subject: HTML Reader: Parse <ol> type, class, and inline list-style(-type) CSS

---
 src/Text/Pandoc/CSS.hs          | 35 ++++++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/HTML.hs | 47 ++++++++++++++++++++++++++---------------
 2 files changed, 65 insertions(+), 17 deletions(-)
 create mode 100644 src/Text/Pandoc/CSS.hs

(limited to 'src')

diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
new file mode 100644
index 000000000..32a5ea129
--- /dev/null
+++ b/src/Text/Pandoc/CSS.hs
@@ -0,0 +1,35 @@
+module Text.Pandoc.CSS ( foldOrElse,
+                         pickStyleAttrProps
+                       )
+where
+
+import Text.Pandoc.Shared (trim)
+import Text.Parsec
+import Text.Parsec.String
+import Control.Applicative ((<*))
+
+ruleParser :: Parser (String, String)
+ruleParser = do
+    p <- many1 (noneOf ":")  <* char ':'
+    v <- many1 (noneOf ":;") <* char ';' <* spaces
+    return (trim p, trim v)
+
+styleAttrParser :: Parser [(String, String)]
+styleAttrParser = do
+    p <- many1 ruleParser
+    return p
+
+orElse :: Eq a => a -> a -> a -> a
+orElse v x y = if v == x then y else x
+
+foldOrElse :: Eq a => a -> [a] -> a
+foldOrElse v xs = foldr (orElse v) v xs
+
+eitherToMaybe :: Either a b -> Maybe b
+eitherToMaybe (Right x) = Just x
+eitherToMaybe _ = Nothing
+
+pickStyleAttrProps :: [String] -> String -> Maybe String
+pickStyleAttrProps lookupProps styleAttr = do
+    styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr
+    foldOrElse Nothing $ map (flip lookup styles) lookupProps
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fcba16e04..17296eb3d 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -64,6 +64,7 @@ import Data.Default (Default (..), def)
 import Control.Monad.Reader (Reader,ask, asks, local, runReader)
 import Network.URI (isURI)
 import Text.Pandoc.Error
+import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
 
 import Text.Parsec.Error
 
@@ -252,6 +253,22 @@ pListItem nonItem = do
   let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
   (liDiv <>) <$> pInTags "li" block <* skipMany nonItem
 
+parseListStyleType :: String -> ListNumberStyle
+parseListStyleType "lower-roman" = LowerRoman
+parseListStyleType "upper-roman" = UpperRoman
+parseListStyleType "lower-alpha" = LowerAlpha
+parseListStyleType "upper-alpha" = UpperAlpha
+parseListStyleType "decimal"     = Decimal
+parseListStyleType _             = DefaultStyle
+
+parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr "i" = LowerRoman
+parseTypeAttr "I" = UpperRoman
+parseTypeAttr "a" = LowerAlpha
+parseTypeAttr "A" = UpperAlpha
+parseTypeAttr "1" = Decimal
+parseTypeAttr _   = DefaultStyle
+
 pOrderedList :: TagParser Blocks
 pOrderedList = try $ do
   TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
@@ -261,23 +278,19 @@ pOrderedList = try $ do
                              sta' = if all isDigit sta
                                        then read sta
                                        else 1
-                             sty = fromMaybe (fromMaybe "" $
-                                   lookup "style" attribs) $
-                                   lookup "class" attribs
-                             sty' = case sty of
-                                     "lower-roman"  -> LowerRoman
-                                     "upper-roman"  -> UpperRoman
-                                     "lower-alpha"  -> LowerAlpha
-                                     "upper-alpha"  -> UpperAlpha
-                                     "decimal"      -> Decimal
-                                     _              ->
-                                       case lookup "type" attribs of
-                                               Just "1" -> Decimal
-                                               Just "I" -> UpperRoman
-                                               Just "i" -> LowerRoman
-                                               Just "A" -> UpperAlpha
-                                               Just "a" -> LowerAlpha
-                                               _        -> DefaultStyle
+
+                             pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]
+
+                             typeAttr  = fromMaybe "" $ lookup "type"  attribs
+                             classAttr = fromMaybe "" $ lookup "class" attribs
+                             styleAttr = fromMaybe "" $ lookup "style" attribs
+                             listStyle = fromMaybe "" $ pickListStyle styleAttr
+
+                             sty' = foldOrElse DefaultStyle
+                                      [ parseTypeAttr      typeAttr
+                                      , parseListStyleType classAttr
+                                      , parseListStyleType listStyle
+                                      ]
   let nonItem = pSatisfy (\t ->
                   not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
                   not (t ~== TagClose "ol"))
-- 
cgit v1.2.3


From 18b1b21a6af5638f16c9aca745f463bfd65e8417 Mon Sep 17 00:00:00 2001
From: Ophir Lifshitz <hangfromthefloor@gmail.com>
Date: Mon, 27 Jul 2015 20:08:04 -0400
Subject: HTML Reader: Detect font-variant with pickStyleAttrProps

---
 src/Text/Pandoc/Readers/HTML.hs | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 17296eb3d..02bfcb2bb 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -635,12 +635,11 @@ pSpan = try $ do
   guardEnabled Ext_native_spans
   TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
   contents <- pInTags "span" inline
-  let attr' = mkAttr attr
-  return $ case attr' of
-                ("",[],[("style",s)])
-                  | filter (`notElem` " \t;") s == "font-variant:small-caps" ->
-                     B.smallcaps contents
-                _ -> B.spanWith (mkAttr attr) contents
+  let isSmallCaps = fontVariant == "small-caps"
+                    where styleAttr   = fromMaybe "" $ lookup "style" attr
+                          fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
+  let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
+  return $ tag contents
 
 pRawHtmlInline :: TagParser Inlines
 pRawHtmlInline = do
-- 
cgit v1.2.3