diff options
author | Ole Martin Ruud <barskern@outlook.com> | 2019-10-25 07:27:49 +0200 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2019-10-24 22:27:49 -0700 |
commit | 45479114e88a351fc63bb8eff142bb10cfd2c661 (patch) | |
tree | c7942576b5f590b78bab5c74c88a6a843afa78bd | |
parent | 91c325c714050313429f6d553d7fa1bef15892a2 (diff) | |
download | pandoc-45479114e88a351fc63bb8eff142bb10cfd2c661.tar.gz |
HTML reader/writer: Better handling of <q> with cite attribute (#5837)
* HTML reader: Handle cite attribute for quotes. If a `<q>` tag has a `cite` attribute, we interpret it as a Quoted element with an inner Span. Closes #5798
* Refactor url canonicalization into a helper function
* Modify HTML writer to handle quote with cite.
[0]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/q
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 57 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 12 | ||||
-rw-r--r-- | test/Tests/Writers/HTML.hs | 17 | ||||
-rw-r--r-- | test/html-reader.html | 4 | ||||
-rw-r--r-- | test/html-reader.native | 4 |
5 files changed, 69 insertions, 25 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b6f88d3a9..1ed61591b 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -684,19 +684,27 @@ pSelfClosing f g = do return open pQ :: PandocMonad m => TagParser m Inlines -pQ = do - context <- asks quoteContext - let quoteType = case context of - InDoubleQuote -> SingleQuote - _ -> DoubleQuote - let innerQuoteContext = if quoteType == SingleQuote - then InSingleQuote - else InDoubleQuote - let constructor = case quoteType of - SingleQuote -> B.singleQuoted - DoubleQuote -> B.doubleQuoted - withQuoteContext innerQuoteContext $ - pInlinesInTags "q" constructor +pQ = choice $ map try [citedQuote, normalQuote] + where citedQuote = do + tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst)) + + url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag + let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + maybeFromAttrib "id" tag + let cls = words $ T.unpack $ fromAttrib "class" tag + + makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)]) + normalQuote = do + pSatisfy $ tagOpenLit "q" (const True) + makeQuote id + makeQuote wrapper = do + ctx <- asks quoteContext + let (constructor, innerContext) = case ctx of + InDoubleQuote -> (B.singleQuoted, InSingleQuote) + _ -> (B.doubleQuoted, InDoubleQuote) + + content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q")) + return $ extractSpaces (constructor . wrapper) content pEmph :: PandocMonad m => TagParser m Inlines pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph @@ -757,21 +765,13 @@ pLink = try $ do Nothing -> return $ extractSpaces (B.spanWith (uid, cls, [])) lab Just url' -> do - mbBaseHref <- baseHref <$> getState - let url = case (parseURIReference url', mbBaseHref) of - (Just rel, Just bs) -> - show (rel `nonStrictRelativeTo` bs) - _ -> url' + url <- canonicalizeUrl url' return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab pImage :: PandocMonad m => TagParser m Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") - mbBaseHref <- baseHref <$> getState - let url' = T.unpack $ fromAttrib "src" tag - let url = case (parseURIReference url', mbBaseHref) of - (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) - _ -> url' + url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag let title = T.unpack $ fromAttrib "title" tag let alt = T.unpack $ fromAttrib "alt" tag let uid = T.unpack $ fromAttrib "id" tag @@ -1292,6 +1292,17 @@ isSpace '\n' = True isSpace '\r' = True isSpace _ = False +-- Utilities + +-- | Adjusts a url according to the document's base URL. +canonicalizeUrl :: PandocMonad m => String -> TagParser m String +canonicalizeUrl url = do + mbBaseHref <- baseHref <$> getState + return $ case (parseURIReference url, mbBaseHref) of + (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + _ -> url + + -- Instances instance HasMacros HTMLState where diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index a1a617829..08d8345b0 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} @@ -1047,10 +1048,17 @@ inlineToHtml opts inline = do strToHtml "’") DoubleQuote -> (strToHtml "“", strToHtml "”") - in if writerHtmlQTags opts + + in if writerHtmlQTags opts then do modify $ \st -> st{ stQuotes = True } - H.q `fmap` inlineListToHtml opts lst + let (maybeAttr, lst') = case lst of + [Span attr@(_, _, kvs) cs] + | any ((=="cite") . fst) kvs + -> (Just attr, cs) + cs -> (Nothing, cs) + H.q `fmap` inlineListToHtml opts lst' + >>= maybe return (addAttrs opts) maybeAttr else (\x -> leftQuote >> x >> rightQuote) `fmap` inlineListToHtml opts lst (Math t str) -> do diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs index de8b1ef17..94549e0d8 100644 --- a/test/Tests/Writers/HTML.hs +++ b/test/Tests/Writers/HTML.hs @@ -13,6 +13,11 @@ import Text.Pandoc.Builder html :: (ToPandoc a) => a -> String html = unpack . purely (writeHtml4String def{ writerWrapText = WrapNone }) . toPandoc +htmlQTags :: (ToPandoc a) => a -> String +htmlQTags = unpack + . purely (writeHtml4String def{ writerWrapText = WrapNone, writerHtmlQTags = True }) + . toPandoc + {- "my test" =: X =?> Y @@ -48,4 +53,16 @@ tests = [ testGroup "inline code" definitionList [(mempty, [para $ text "foo bar"])] =?> "<dl><dt></dt><dd><p>foo bar</p></dd></dl>" ] + , testGroup "quotes" + [ "quote with cite attribute (without q-tags)" =: + doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) + =?> "“<span cite=\"http://example.org\">examples</span>”" + , tQ "quote with cite attribute (with q-tags)" $ + doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) + =?> "<q cite=\"http://example.org\">examples</q>" + ] ] + where + tQ :: (ToString a, ToPandoc a) + => String -> (a, String) -> TestTree + tQ = test htmlQTags diff --git a/test/html-reader.html b/test/html-reader.html index a2bca5d2c..ae937af82 100644 --- a/test/html-reader.html +++ b/test/html-reader.html @@ -81,6 +81,10 @@ span.pandocNoteMarker { } </blockquote> <p>And a following paragraph.</p> <hr /> +<h1>Inline quotes</h1> +<p>Normal text but then a <q cite="https://www.imdb.com/title/tt0062622/quotes/qt0396921">inline quote</q>.</p> +<p><q>Missing a cite attribute means its just normal text</q></p> +<hr /> <h1>Code Blocks</h1> <p>Code:</p> <pre><code>---- (should be four hyphens) diff --git a/test/html-reader.native b/test/html-reader.native index acd0087ef..5643fb73f 100644 --- a/test/html-reader.native +++ b/test/html-reader.native @@ -51,6 +51,10 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl [Para [Str "Don't",Space,Str "quote",Space,Str "me."]]] ,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."] ,HorizontalRule +,Header 1 ("inline-quotes",[],[]) [Str "Inline",Space,Str "quotes"] +,Para [Str "Normal",Space,Str "text",Space,Str "but",Space,Str "then",Space,Str "a",Space,Quoted DoubleQuote [Span ("",[],[("cite","https://www.imdb.com/title/tt0062622/quotes/qt0396921")]) [Str "inline",Space,Str "quote"]],Str "."] +,Para [Quoted DoubleQuote [Str "Missing",Space,Str "a",Space,Str "cite",Space,Str "attribute",Space,Str "means",Space,Str "its",Space,Str "just",Space,Str "normal",Space,Str "text"]] +,HorizontalRule ,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"] ,Para [Str "Code:"] ,CodeBlock ("",[],[]) "---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\nthis code block is indented by one tab" |