aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Parsing.hs93
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs79
2 files changed, 114 insertions, 58 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index eec4a3bc9..66ebca253 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -2,6 +2,7 @@
FlexibleContexts
, GeneralizedNewtypeDeriving
, TypeSynonymInstances
+, MultiParamTypeClasses
, FlexibleInstances #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -80,6 +81,7 @@ module Text.Pandoc.Parsing ( anyLine,
HeaderType (..),
ParserContext (..),
QuoteContext (..),
+ HasQuoteContext (..),
NoteTable,
NoteTable',
KeyTable,
@@ -88,7 +90,6 @@ module Text.Pandoc.Parsing ( anyLine,
toKey,
registerHeader,
smartPunctuation,
- withQuoteContext,
singleQuoteStart,
singleQuoteEnd,
doubleQuoteStart,
@@ -106,6 +107,7 @@ module Text.Pandoc.Parsing ( anyLine,
runF,
askF,
asksF,
+ token,
-- * Re-exports from Text.Pandoc.Parsec
Stream,
runParser,
@@ -160,7 +162,6 @@ module Text.Pandoc.Parsing ( anyLine,
setSourceColumn,
setSourceLine,
newPos,
- token
)
where
@@ -170,7 +171,7 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.XML (fromEntities)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
-import Text.Parsec
+import Text.Parsec hiding (token)
import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
isHexDigit, isSpace )
@@ -484,7 +485,8 @@ mathDisplayWith op cl = try $ do
string op
many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
-mathDisplay :: Stream s m Char => ParserT s ParserState m String
+mathDisplay :: (HasReaderOptions st, Stream s m Char)
+ => ParserT s st m String
mathDisplay =
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -492,7 +494,8 @@ mathDisplay =
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathDisplayWith "\\\\[" "\\\\]")
-mathInline :: Stream s m Char => ParserT s ParserState m String
+mathInline :: (HasReaderOptions st , Stream s m Char)
+ => ParserT s st m String
mathInline =
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -909,6 +912,21 @@ class HasReaderOptions st where
-- default
getOption f = (f . extractReaderOptions) <$> getState
+class HasQuoteContext st m where
+ getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
+ withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
+
+instance Monad m => HasQuoteContext ParserState m where
+ getQuoteContext = stateQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
+
instance HasReaderOptions ParserState where
extractReaderOptions = stateOptions
@@ -1051,9 +1069,9 @@ registerHeader (ident,classes,kvs) header' = do
failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m ()
failUnlessSmart = getOption readerSmart >>= guard
-smartPunctuation :: Stream s m Char
- => ParserT s ParserState m Inlines
- -> ParserT s ParserState m Inlines
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
smartPunctuation inlineParser = do
failUnlessSmart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
@@ -1061,46 +1079,33 @@ smartPunctuation inlineParser = do
apostrophe :: Stream s m Char => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
-quoted :: Stream s m Char
- => ParserT s ParserState m Inlines
- -> ParserT s ParserState m Inlines
+quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
-withQuoteContext :: Stream s m t
- => QuoteContext
- -> ParserT s ParserState m a
- -> ParserT s ParserState m a
-withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-
-singleQuoted :: Stream s m Char
- => ParserT s ParserState m Inlines
- -> ParserT s ParserState m Inlines
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
singleQuoted inlineParser = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
return . B.singleQuoted . mconcat
-doubleQuoted :: Stream s m Char
- => ParserT s ParserState m Inlines
- -> ParserT s ParserState m Inlines
+doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
doubleQuoted inlineParser = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
return . B.doubleQuoted . mconcat
-failIfInQuoteContext :: Stream s m t
+failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
=> QuoteContext
- -> ParserT s ParserState m ()
+ -> ParserT s st m ()
failIfInQuoteContext context = do
- st <- getState
- if stateQuoteContext st == context
+ context' <- getQuoteContext
+ if context' == context
then fail "already inside quotes"
else return ()
@@ -1110,8 +1115,8 @@ charOrRef cs =
guard (c `elem` cs)
return c)
-singleQuoteStart :: Stream s m Char
- => ParserT s ParserState m ()
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
-- single quote start can't be right after str
@@ -1124,8 +1129,8 @@ singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
-doubleQuoteStart :: Stream s m Char
- => ParserT s ParserState m ()
+doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
@@ -1179,6 +1184,14 @@ citeKey = try $ do
let key = firstChar:rest
return (suppress_author, key)
+
+token :: (Stream s m t)
+ => (t -> String)
+ -> (t -> SourcePos)
+ -> (t -> Maybe a)
+ -> ParsecT s st m a
+token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
+
--
-- Macros
--
@@ -1200,9 +1213,9 @@ macro = do
else return $ rawBlock "latex" def'
-- | Apply current macros to string.
-applyMacros' :: Stream [Char] m Char
+applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
=> String
- -> ParserT [Char] ParserState m String
+ -> ParserT [Char] st m String
applyMacros' target = do
apply <- getOption readerApplyMacros
if apply
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 2414dfbf7..597156a5e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -40,7 +41,7 @@ import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
+import Text.Pandoc.Builder (HasMeta (..), Blocks, Inlines, trimInlines)
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing
@@ -52,6 +53,8 @@ import Control.Applicative ( (<$>), (<$), (<*) )
import Data.Monoid
import Text.Printf (printf)
import Debug.Trace (trace)
+import Data.Default (Default (..))
+import Control.Monad.Reader (Reader, runReader, asks, local, ask)
isSpace :: Char -> Bool
isSpace ' ' = True
@@ -64,17 +67,26 @@ readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
readHtml opts inp =
- case runParser parseDoc def{ stateOptions = opts } "source" tags of
+ case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } ) "source" tags of
Left err' -> error $ "\nError at " ++ show err'
Right result -> result
where tags = canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
- meta <- stateMeta <$> getState
+ meta <- stateMeta . parserState <$> getState
return $ Pandoc meta (B.toList blocks)
-type TagParser = Parser [Tag String] ParserState
+data HTMLState =
+ HTMLState
+ { parserState :: ParserState
+ }
+
+data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext }
+
+type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
+
+type TagParser = HTMLParser [Tag String]
pBody :: TagParser Blocks
pBody = pInTags "body" block
@@ -115,7 +127,6 @@ block = do
(take 60 $ show $ B.toList res)) (return ())
return res
-
pList :: TagParser Blocks
pList = pBulletList <|> pOrderedList <|> pDefinitionList
@@ -365,8 +376,8 @@ pSelfClosing f g = do
pQ :: TagParser Inlines
pQ = do
- quoteContext <- stateQuoteContext `fmap` getState
- let quoteType = case quoteContext of
+ context <- asks quoteContext
+ let quoteType = case context of
InDoubleQuote -> SingleQuote
_ -> DoubleQuote
let innerQuoteContext = if quoteType == SingleQuote
@@ -477,7 +488,8 @@ pTagText :: TagParser Inlines
pTagText = try $ do
(TagText str) <- pSatisfy isTagText
st <- getState
- case runParser (many pTagContents) st "text" str of
+ qu <- ask
+ case flip runReader qu $ runParserT (many pTagContents) st "text" str of
Left _ -> fail $ "Could not parse `" ++ str ++ "'"
Right result -> return $ mconcat result
@@ -486,7 +498,9 @@ pBlank = try $ do
(TagText str) <- pSatisfy isTagText
guard $ all isSpace str
-pTagContents :: Parser [Char] ParserState Inlines
+type InlinesParser = HTMLParser String
+
+pTagContents :: InlinesParser Inlines
pTagContents =
B.displayMath <$> mathDisplay
<|> B.math <$> mathInline
@@ -496,12 +510,11 @@ pTagContents =
<|> pSymbol
<|> pBad
-pStr :: Parser [Char] ParserState Inlines
+pStr :: InlinesParser Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
- pos <- getPosition
- updateState $ \s -> s{ stateLastStrPos = Just pos }
+ updateLastStrPos
return $ B.str result
isSpecial :: Char -> Bool
@@ -516,13 +529,13 @@ isSpecial '\8220' = True
isSpecial '\8221' = True
isSpecial _ = False
-pSymbol :: Parser [Char] ParserState Inlines
+pSymbol :: InlinesParser Inlines
pSymbol = satisfy isSpecial >>= return . B.str . (:[])
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
-pBad :: Parser [Char] ParserState Inlines
+pBad :: InlinesParser Inlines
pBad = do
c <- satisfy isBad
let c' = case c of
@@ -556,7 +569,7 @@ pBad = do
_ -> '?'
return $ B.str [c']
-pSpace :: Parser [Char] ParserState Inlines
+pSpace :: InlinesParser Inlines
pSpace = many1 (satisfy isSpace) >> return B.space
--
@@ -672,19 +685,23 @@ _ `closes` _ = False
--- parsers for use in markdown, textile readers
-- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String
+htmlInBalanced :: (Monad m)
+ => (Tag String -> Bool)
+ -> ParserT String st m String
htmlInBalanced f = try $ do
(TagOpen t _, tag) <- htmlTag f
guard $ '/' `notElem` tag -- not a self-closing tag
let stopper = htmlTag (~== TagClose t)
- let anytag = liftM snd $ htmlTag (const True)
+ let anytag = snd <$> htmlTag (const True)
contents <- many $ notFollowedBy' stopper >>
(htmlInBalanced f <|> anytag <|> count 1 anyChar)
endtag <- liftM snd stopper
return $ tag ++ concat contents ++ endtag
-- | Matches a tag meeting a certain condition.
-htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
+htmlTag :: Monad m
+ => (Tag String -> Bool)
+ -> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
@@ -707,3 +724,29 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV)
attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+-- Instances
+
+-- This signature should be more general
+-- MonadReader HTMLLocal m => HasQuoteContext st m
+instance HasQuoteContext st (Reader HTMLLocal) where
+ getQuoteContext = asks quoteContext
+ withQuoteContext q = local (\s -> s{quoteContext = q})
+
+instance HasReaderOptions HTMLState where
+ extractReaderOptions = extractReaderOptions . parserState
+
+instance Default HTMLState where
+ def = HTMLState def
+
+instance HasMeta HTMLState where
+ setMeta s b st = st {parserState = setMeta s b $ parserState st}
+ deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
+
+instance Default HTMLLocal where
+ def = HTMLLocal NoQuote
+
+instance HasLastStrPosition HTMLState where
+ setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
+ getLastStrPos = getLastStrPos . parserState
+
+