From 9d52ecdd422a6821c9f37f53e3d30d4be4e41e8f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 16 Aug 2014 22:57:00 -0700 Subject: HTML reader: Parse appropriately styled span as SmallCaps. --- src/Text/Pandoc/Readers/HTML.hs | 7 ++++++- tests/html-reader.html | 1 + tests/html-reader.native | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index cee7ea300..bd60a74fa 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -570,7 +570,12 @@ pSpan = try $ do guardEnabled Ext_native_spans TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) contents <- pInTags "span" inline - return $ B.spanWith (mkAttr attr) contents + let attr' = mkAttr attr + return $ case attr' of + ("",[],[("style",s)]) + | filter (`notElem` " \t;") s == "font-variant:small-caps" -> + B.smallcaps contents + _ -> B.spanWith (mkAttr attr) contents pRawHtmlInline :: TagParser Inlines pRawHtmlInline = do diff --git a/tests/html-reader.html b/tests/html-reader.html index d059d7b4b..14ad3ed54 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -309,6 +309,7 @@ These should not be escaped: \$ \\ \> \[ \{

This is strong and em.

So is this word.

This is code: >, $, \, \$, <html>.

+

This is small caps.


Smart quotes, ellipses, dashes

"Hello," said the spider. "'Shelob' is my name."

diff --git a/tests/html-reader.native b/tests/html-reader.native index c6ed36910..aef6e40fc 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -193,6 +193,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]] ,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."] ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",Space,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",Space,Code ("",[],[]) "",Str "."] +,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "small",Space,Str "caps"],Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Smart",Space,Str "quotes,",Space,Str "ellipses,",Space,Str "dashes"] ,Para [Str "\"Hello,\"",Space,Str "said",Space,Str "the",Space,Str "spider.",Space,Str "\"'Shelob'",Space,Str "is",Space,Str "my",Space,Str "name.\""] -- cgit v1.2.3