1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{- |
Module : Text.Pandoc.Readers.HTML.Types
Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Types for pandoc's HTML reader.
-}
module Text.Pandoc.Readers.HTML.Types
( TagParser
, HTMLParser
, HTMLState (..)
, HTMLLocal (..)
)
where
import Control.Monad.Reader (ReaderT, asks, local)
import Data.Default (Default (def))
import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text)
import Network.URI (URI)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, HasMeta (..))
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Logging (LogMessage)
import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Parsing
( HasIdentifierList (..), HasLastStrPosition (..), HasLogMessages (..)
, HasMacros (..), HasQuoteContext (..), HasReaderOptions (..)
, ParserT, ParserState, QuoteContext (NoQuote)
)
import Text.Pandoc.Readers.LaTeX.Types (Macro)
-- | HTML parser type
type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m)
-- | HTML parser, expecting @Tag Text@ as tokens.
type TagParser m = HTMLParser m [Tag Text]
-- | Global HTML parser state
data HTMLState = HTMLState
{ parserState :: ParserState
, noteTable :: [(Text, Blocks)]
, baseHref :: Maybe URI
, identifiers :: Set Text
, logMessages :: [LogMessage]
, macros :: Map Text Macro
, readerOpts :: ReaderOptions
}
-- | Local HTML parser state
data HTMLLocal = HTMLLocal
{ quoteContext :: QuoteContext
, inChapter :: Bool -- ^ Set if in chapter section
, inPlain :: Bool -- ^ Set if in pPlain
}
-- Instances
instance HasMacros HTMLState where
extractMacros = macros
updateMacros f st = st{ macros = f $ macros st }
instance HasIdentifierList HTMLState where
extractIdentifierList = identifiers
updateIdentifierList f s = s{ identifiers = f (identifiers s) }
instance HasLogMessages HTMLState where
addLogMessage m s = s{ logMessages = m : logMessages s }
getLogMessages = reverse . logMessages
-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
instance PandocMonad m => HasQuoteContext HTMLState (ReaderT HTMLLocal m) where
getQuoteContext = asks quoteContext
withQuoteContext q = local (\s -> s{quoteContext = q})
instance HasReaderOptions HTMLState where
extractReaderOptions = extractReaderOptions . parserState
instance HasMeta HTMLState where
setMeta s b st = st {parserState = setMeta s b $ parserState st}
deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
instance Default HTMLLocal where
def = HTMLLocal NoQuote False False
instance HasLastStrPosition HTMLState where
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
getLastStrPos = getLastStrPos . parserState
|