aboutsummaryrefslogtreecommitdiff
path: root/test/Tests/Readers/HTML.hs
blob: 4ed1e44af957afaf72a6a872357bda3103393876 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Tests.Readers.HTML
   Copyright   : © 2006-2021 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Tests for the HTML reader.
-}
module Tests.Readers.HTML (tests) where

import Data.Text (Text)
import qualified Data.Text as T
import Test.Tasty
import Test.Tasty.QuickCheck
import Test.Tasty.Options (IsOption(defaultValue))
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Shared (isHeaderBlock)
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder
import Text.Pandoc.Walk (walk)

html :: Text -> Pandoc
html = purely $ readHtml def

htmlNativeDivs :: Text -> Pandoc
htmlNativeDivs = purely $ readHtml def { readerExtensions = enableExtension Ext_native_divs $ readerExtensions def }

makeRoundTrip :: Block -> Block
makeRoundTrip CodeBlock{} = Para [Str "code block was here"]
makeRoundTrip LineBlock{} = Para [Str "line block was here"]
makeRoundTrip RawBlock{} = Para [Str "raw block was here"]
makeRoundTrip (Div attr bs) = Div attr $ filter (not . isHeaderBlock) bs
-- avoids round-trip failures related to makeSections
-- e.g. with [Div ("loc",[],[("a","11"),("b_2","a b c")]) [Header 3 ("",[],[]) []]]
makeRoundTrip Table{} = Para [Str "table block was here"]
makeRoundTrip x           = x

removeRawInlines :: Inline -> Inline
removeRawInlines RawInline{} = Str "raw inline was here"
removeRawInlines x           = x

roundTrip :: Blocks -> Bool
roundTrip b = d'' == d'''
  where d = walk removeRawInlines $
            walk makeRoundTrip $ Pandoc nullMeta $ toList b
        d' = rewrite d
        d'' = rewrite d'
        d''' = rewrite d''
        rewrite = html . (`T.snoc` '\n') .
                  purely (writeHtml5String def
                            { writerWrapText = WrapPreserve })

tests :: [TestTree]
tests = [ testGroup "base tag"
          [ test html "simple" $
            "<head><base href=\"http://www.w3schools.com/images/foo\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?>
            plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman"))
          , test html "slash at end of base" $
            "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?>
            plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman"))
          , test html "slash at beginning of href" $
            "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"/stickman.gif\" alt=\"Stickman\"></head>" =?>
            plain (image "http://www.w3schools.com/stickman.gif" "" (text "Stickman"))
          , test html "absolute URL" $
            "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"http://example.com/stickman.gif\" alt=\"Stickman\"></head>" =?>
            plain (image "http://example.com/stickman.gif" "" (text "Stickman"))
          ]
        , testGroup "anchors"
          [ test html "anchor without href" $ "<a name=\"anchor\"/>" =?>
            plain (spanWith ("anchor",[],[]) mempty)
          ]
        , testGroup "img"
          [ test html "data-external attribute" $ "<img data-external=\"1\" src=\"http://example.com/stickman.gif\">" =?>
            plain (imageWith ("", [], [("external", "1")]) "http://example.com/stickman.gif" "" "")
          , test html "title" $ "<img title=\"The title\" src=\"http://example.com/stickman.gif\">" =?>
            plain (imageWith ("", [], []) "http://example.com/stickman.gif" "The title" "")
          ]
        , testGroup "lang"
          [ test html "lang on <html>" $ "<html lang=\"es\">hola" =?>
            setMeta "lang" (text "es") (doc (plain (text "hola")))
          , test html "xml:lang on <html>" $ "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"es\"><head></head><body>hola</body></html>" =?>
            setMeta "lang" (text "es") (doc (plain (text "hola")))
          ]
        , testGroup "main"
          [ test htmlNativeDivs "<main> becomes <div role=main>" $ "<main>hello</main>" =?>
            doc (divWith ("", [], [("role", "main")]) (plain (text "hello")))
          , test htmlNativeDivs "<main role=X> becomes <div role=X>" $ "<main role=foobar>hello</main>" =?>
            doc (divWith ("", [], [("role", "foobar")]) (plain (text "hello")))
          , test htmlNativeDivs "<main> has attributes preserved" $ "<main id=foo class=bar data-baz=qux>hello</main>" =?>
            doc (divWith ("foo", ["bar"], [("role", "main"), ("baz", "qux")]) (plain (text "hello")))
          , test htmlNativeDivs "<main> closes <p>" $ "<p>hello<main>main content</main>" =?>
            doc (para (text "hello") <> divWith ("", [], [("role", "main")]) (plain (text "main content")))
          , test htmlNativeDivs "<main> followed by text" $ "<main>main content</main>non-main content" =?>
            doc (divWith ("", [], [("role", "main")]) (plain (text "main content")) <> plain (text "non-main content"))
          ]
        , testGroup "samp"
          [
            test html "inline samp block" $
            "<samp>Answer is 42</samp>" =?>
            plain (codeWith ("",["sample"],[]) "Answer is 42")
          ]
        , testGroup "var"
          [ test html "inline var block" $
            "<var>result</var>" =?>
            plain (codeWith ("",["variable"],[]) "result")
          ]
        , testGroup "header"
          [ test htmlNativeDivs "<header> is parsed as a div" $
            "<header id=\"title\">Title</header>" =?>
            divWith ("title", mempty, mempty) (plain "Title")
          ]
        , testGroup "code block"
          [ test html "attributes in pre > code element" $
            "<pre><code id=\"a\" class=\"python\">\nprint('hi')\n</code></pre>"
            =?>
            codeBlockWith ("a", ["python"], []) "print('hi')"

          , test html "attributes in pre take precendence" $
            "<pre id=\"c\"><code id=\"d\">\nprint('hi mom!')\n</code></pre>"
            =?>
            codeBlockWith ("c", [], []) "print('hi mom!')"
          ]
        , askOption $ \(QuickCheckTests numtests) ->
            testProperty "Round trip" $
              withMaxSuccess (if QuickCheckTests numtests == defaultValue
                                 then 25
                                 else numtests) roundTrip
        ]