aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOle Martin Ruud <barskern@outlook.com>2019-10-25 07:27:49 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2019-10-24 22:27:49 -0700
commit45479114e88a351fc63bb8eff142bb10cfd2c661 (patch)
treec7942576b5f590b78bab5c74c88a6a843afa78bd
parent91c325c714050313429f6d553d7fa1bef15892a2 (diff)
downloadpandoc-45479114e88a351fc63bb8eff142bb10cfd2c661.tar.gz
HTML reader/writer: Better handling of <q> with cite attribute (#5837)
* HTML reader: Handle cite attribute for quotes. If a `<q>` tag has a `cite` attribute, we interpret it as a Quoted element with an inner Span. Closes #5798 * Refactor url canonicalization into a helper function * Modify HTML writer to handle quote with cite. [0]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/q
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs57
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs12
-rw-r--r--test/Tests/Writers/HTML.hs17
-rw-r--r--test/html-reader.html4
-rw-r--r--test/html-reader.native4
5 files changed, 69 insertions, 25 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b6f88d3a9..1ed61591b 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -684,19 +684,27 @@ pSelfClosing f g = do
return open
pQ :: PandocMonad m => TagParser m Inlines
-pQ = do
- context <- asks quoteContext
- let quoteType = case context of
- InDoubleQuote -> SingleQuote
- _ -> DoubleQuote
- let innerQuoteContext = if quoteType == SingleQuote
- then InSingleQuote
- else InDoubleQuote
- let constructor = case quoteType of
- SingleQuote -> B.singleQuoted
- DoubleQuote -> B.doubleQuoted
- withQuoteContext innerQuoteContext $
- pInlinesInTags "q" constructor
+pQ = choice $ map try [citedQuote, normalQuote]
+ where citedQuote = do
+ tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
+
+ url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag
+ let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
+ maybeFromAttrib "id" tag
+ let cls = words $ T.unpack $ fromAttrib "class" tag
+
+ makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
+ normalQuote = do
+ pSatisfy $ tagOpenLit "q" (const True)
+ makeQuote id
+ makeQuote wrapper = do
+ ctx <- asks quoteContext
+ let (constructor, innerContext) = case ctx of
+ InDoubleQuote -> (B.singleQuoted, InSingleQuote)
+ _ -> (B.doubleQuoted, InDoubleQuote)
+
+ content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q"))
+ return $ extractSpaces (constructor . wrapper) content
pEmph :: PandocMonad m => TagParser m Inlines
pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
@@ -757,21 +765,13 @@ pLink = try $ do
Nothing ->
return $ extractSpaces (B.spanWith (uid, cls, [])) lab
Just url' -> do
- mbBaseHref <- baseHref <$> getState
- let url = case (parseURIReference url', mbBaseHref) of
- (Just rel, Just bs) ->
- show (rel `nonStrictRelativeTo` bs)
- _ -> url'
+ url <- canonicalizeUrl url'
return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
tag <- pSelfClosing (=="img") (isJust . lookup "src")
- mbBaseHref <- baseHref <$> getState
- let url' = T.unpack $ fromAttrib "src" tag
- let url = case (parseURIReference url', mbBaseHref) of
- (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
- _ -> url'
+ url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag
let title = T.unpack $ fromAttrib "title" tag
let alt = T.unpack $ fromAttrib "alt" tag
let uid = T.unpack $ fromAttrib "id" tag
@@ -1292,6 +1292,17 @@ isSpace '\n' = True
isSpace '\r' = True
isSpace _ = False
+-- Utilities
+
+-- | Adjusts a url according to the document's base URL.
+canonicalizeUrl :: PandocMonad m => String -> TagParser m String
+canonicalizeUrl url = do
+ mbBaseHref <- baseHref <$> getState
+ return $ case (parseURIReference url, mbBaseHref) of
+ (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
+ _ -> url
+
+
-- Instances
instance HasMacros HTMLState where
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index a1a617829..08d8345b0 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE CPP #-}
+{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
@@ -1047,10 +1048,17 @@ inlineToHtml opts inline = do
strToHtml "’")
DoubleQuote -> (strToHtml "“",
strToHtml "”")
- in if writerHtmlQTags opts
+
+ in if writerHtmlQTags opts
then do
modify $ \st -> st{ stQuotes = True }
- H.q `fmap` inlineListToHtml opts lst
+ let (maybeAttr, lst') = case lst of
+ [Span attr@(_, _, kvs) cs]
+ | any ((=="cite") . fst) kvs
+ -> (Just attr, cs)
+ cs -> (Nothing, cs)
+ H.q `fmap` inlineListToHtml opts lst'
+ >>= maybe return (addAttrs opts) maybeAttr
else (\x -> leftQuote >> x >> rightQuote)
`fmap` inlineListToHtml opts lst
(Math t str) -> do
diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs
index de8b1ef17..94549e0d8 100644
--- a/test/Tests/Writers/HTML.hs
+++ b/test/Tests/Writers/HTML.hs
@@ -13,6 +13,11 @@ import Text.Pandoc.Builder
html :: (ToPandoc a) => a -> String
html = unpack . purely (writeHtml4String def{ writerWrapText = WrapNone }) . toPandoc
+htmlQTags :: (ToPandoc a) => a -> String
+htmlQTags = unpack
+ . purely (writeHtml4String def{ writerWrapText = WrapNone, writerHtmlQTags = True })
+ . toPandoc
+
{-
"my test" =: X =?> Y
@@ -48,4 +53,16 @@ tests = [ testGroup "inline code"
definitionList [(mempty, [para $ text "foo bar"])]
=?> "<dl><dt></dt><dd><p>foo bar</p></dd></dl>"
]
+ , testGroup "quotes"
+ [ "quote with cite attribute (without q-tags)" =:
+ doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples"))
+ =?> "“<span cite=\"http://example.org\">examples</span>”"
+ , tQ "quote with cite attribute (with q-tags)" $
+ doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples"))
+ =?> "<q cite=\"http://example.org\">examples</q>"
+ ]
]
+ where
+ tQ :: (ToString a, ToPandoc a)
+ => String -> (a, String) -> TestTree
+ tQ = test htmlQTags
diff --git a/test/html-reader.html b/test/html-reader.html
index a2bca5d2c..ae937af82 100644
--- a/test/html-reader.html
+++ b/test/html-reader.html
@@ -81,6 +81,10 @@ span.pandocNoteMarker { }
</blockquote>
<p>And a following paragraph.</p>
<hr />
+<h1>Inline quotes</h1>
+<p>Normal text but then a <q cite="https://www.imdb.com/title/tt0062622/quotes/qt0396921">inline quote</q>.</p>
+<p><q>Missing a cite attribute means its just normal text</q></p>
+<hr />
<h1>Code Blocks</h1>
<p>Code:</p>
<pre><code>---- (should be four hyphens)
diff --git a/test/html-reader.native b/test/html-reader.native
index acd0087ef..5643fb73f 100644
--- a/test/html-reader.native
+++ b/test/html-reader.native
@@ -51,6 +51,10 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
[Para [Str "Don't",Space,Str "quote",Space,Str "me."]]]
,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."]
,HorizontalRule
+,Header 1 ("inline-quotes",[],[]) [Str "Inline",Space,Str "quotes"]
+,Para [Str "Normal",Space,Str "text",Space,Str "but",Space,Str "then",Space,Str "a",Space,Quoted DoubleQuote [Span ("",[],[("cite","https://www.imdb.com/title/tt0062622/quotes/qt0396921")]) [Str "inline",Space,Str "quote"]],Str "."]
+,Para [Quoted DoubleQuote [Str "Missing",Space,Str "a",Space,Str "cite",Space,Str "attribute",Space,Str "means",Space,Str "its",Space,Str "just",Space,Str "normal",Space,Str "text"]]
+,HorizontalRule
,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"]
,Para [Str "Code:"]
,CodeBlock ("",[],[]) "---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\nthis code block is indented by one tab"