Merge pull request #1457 from mpickering/generalstate

Generalised more in Parsing.hs to enable the use of custom state
author: John MacFarlane <jgm@berkeley.edu> 2014-07-26 19:01:26 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2014-07-26 19:01:26 -0700
commit: d9751f91c43ad96becb6aaf2f5abcd9c0b37754c (patch)
tree: 6ac474d328d39f0932ec008f0918846bf893a2d5
parent: 74690b191e53535ac53c519bf07c6e38c34175b2 (diff)
parent: 5e2d22a27e231f0ac62739b3bcd15b548c782f25 (diff)
download: pandoc-d9751f91c43ad96becb6aaf2f5abcd9c0b37754c.tar.gz
2 files changed, 114 insertions, 58 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index eec4a3bc9..66ebca253 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -2,6 +2,7 @@
   FlexibleContexts
 , GeneralizedNewtypeDeriving
 , TypeSynonymInstances
+, MultiParamTypeClasses
 , FlexibleInstances #-}
 {-
 Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -80,6 +81,7 @@ module Text.Pandoc.Parsing ( anyLine,
                              HeaderType (..),
                              ParserContext (..),
                              QuoteContext (..),
+                             HasQuoteContext (..),
                              NoteTable,
                              NoteTable',
                              KeyTable,
@@ -88,7 +90,6 @@ module Text.Pandoc.Parsing ( anyLine,
                              toKey,
                              registerHeader,
                              smartPunctuation,
-                             withQuoteContext,
                              singleQuoteStart,
                              singleQuoteEnd,
                              doubleQuoteStart,
@@ -106,6 +107,7 @@ module Text.Pandoc.Parsing ( anyLine,
                              runF,
                              askF,
                              asksF,
+                             token,
                              -- * Re-exports from Text.Pandoc.Parsec
                              Stream,
                              runParser,
@@ -160,7 +162,6 @@ module Text.Pandoc.Parsing ( anyLine,
                              setSourceColumn,
                              setSourceLine,
                              newPos,
-                             token
                              )
 where
 
@@ -170,7 +171,7 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.XML (fromEntities)
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
-import Text.Parsec
+import Text.Parsec hiding (token)
 import Text.Parsec.Pos (newPos)
 import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
                    isHexDigit, isSpace )
@@ -484,7 +485,8 @@ mathDisplayWith op cl = try $ do
   string op
   many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
 
-mathDisplay :: Stream s m Char => ParserT s ParserState m String
+mathDisplay :: (HasReaderOptions st, Stream s m Char)
+            => ParserT s st m String
 mathDisplay =
       (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
   <|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -492,7 +494,8 @@ mathDisplay =
   <|> (guardEnabled Ext_tex_math_double_backslash >>
        mathDisplayWith "\\\\[" "\\\\]")
 
-mathInline :: Stream s m Char => ParserT s ParserState m String
+mathInline :: (HasReaderOptions st , Stream s m Char)
+           => ParserT s st m String
 mathInline =
       (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
   <|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -909,6 +912,21 @@ class HasReaderOptions st where
   -- default
   getOption  f         = (f . extractReaderOptions) <$> getState
 
+class HasQuoteContext st m where
+  getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
+  withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
+
+instance Monad m => HasQuoteContext ParserState m where
+  getQuoteContext = stateQuoteContext <$> getState
+  withQuoteContext context parser = do
+    oldState <- getState
+    let oldQuoteContext = stateQuoteContext oldState
+    setState oldState { stateQuoteContext = context }
+    result <- parser
+    newState <- getState
+    setState newState { stateQuoteContext = oldQuoteContext }
+    return result
+
 instance HasReaderOptions ParserState where
   extractReaderOptions = stateOptions
 
@@ -1051,9 +1069,9 @@ registerHeader (ident,classes,kvs) header' = do
 failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m ()
 failUnlessSmart = getOption readerSmart >>= guard
 
-smartPunctuation :: Stream s m Char
-                 => ParserT s ParserState m Inlines
-                 -> ParserT s ParserState m Inlines
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+                 => ParserT s st m Inlines
+                 -> ParserT s st m Inlines
 smartPunctuation inlineParser = do
   failUnlessSmart
   choice [ quoted inlineParser, apostrophe, dash, ellipses ]
@@ -1061,46 +1079,33 @@ smartPunctuation inlineParser = do
 apostrophe :: Stream s m Char => ParserT s st m Inlines
 apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
 
-quoted :: Stream s m Char
-       => ParserT s  ParserState m Inlines
-       -> ParserT s  ParserState m Inlines
+quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+       => ParserT s st m Inlines
+       -> ParserT s st m Inlines
 quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 
-withQuoteContext :: Stream s m t
-                 => QuoteContext
-                 -> ParserT s ParserState m a
-                 -> ParserT s ParserState m a
-withQuoteContext context parser = do
-  oldState <- getState
-  let oldQuoteContext = stateQuoteContext oldState
-  setState oldState { stateQuoteContext = context }
-  result <- parser
-  newState <- getState
-  setState newState { stateQuoteContext = oldQuoteContext }
-  return result
-
-singleQuoted :: Stream s m Char
-             => ParserT s ParserState m Inlines
-             -> ParserT s ParserState m Inlines
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+             => ParserT s st m Inlines
+             -> ParserT s st m Inlines
 singleQuoted inlineParser = try $ do
   singleQuoteStart
   withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
     return . B.singleQuoted . mconcat
 
-doubleQuoted :: Stream s m Char
-             => ParserT s ParserState m Inlines
-             -> ParserT s ParserState m Inlines
+doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
+             => ParserT s st m Inlines
+             -> ParserT s st m Inlines
 doubleQuoted inlineParser = try $ do
   doubleQuoteStart
   withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
     return . B.doubleQuoted . mconcat
 
-failIfInQuoteContext :: Stream s m t
+failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
                      => QuoteContext
-                     -> ParserT s ParserState m ()
+                     -> ParserT s st m ()
 failIfInQuoteContext context = do
-  st <- getState
-  if stateQuoteContext st == context
+  context' <- getQuoteContext
+  if context' == context
      then fail "already inside quotes"
      else return ()
 
@@ -1110,8 +1115,8 @@ charOrRef cs =
                        guard (c `elem` cs)
                        return c)
 
-singleQuoteStart :: Stream s m Char
-                 => ParserT s ParserState m ()
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+                 => ParserT s st m ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
   -- single quote start can't be right after str
@@ -1124,8 +1129,8 @@ singleQuoteEnd = try $ do
   charOrRef "'\8217\146"
   notFollowedBy alphaNum
 
-doubleQuoteStart :: Stream s m Char
-                 => ParserT s ParserState m ()
+doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
+                 => ParserT s st m ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
   try $ do charOrRef "\"\8220\147"
@@ -1179,6 +1184,14 @@ citeKey = try $ do
   let key = firstChar:rest
   return (suppress_author, key)
 
+
+token :: (Stream s m t)
+      => (t -> String)
+      -> (t -> SourcePos)
+      -> (t -> Maybe a)
+      -> ParsecT s st m a
+token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
+
 --
 -- Macros
 --
@@ -1200,9 +1213,9 @@ macro = do
                            else return $ rawBlock "latex" def'
 
 -- | Apply current macros to string.
-applyMacros' :: Stream [Char] m Char
+applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
              => String
-             -> ParserT [Char] ParserState m String
+             -> ParserT [Char] st m String
 applyMacros' target = do
   apply <- getOption readerApplyMacros
   if apply
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 2414dfbf7..597156a5e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
 {-
 Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
@@ -40,7 +41,7 @@ import Text.HTML.TagSoup
 import Text.HTML.TagSoup.Match
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
+import Text.Pandoc.Builder (HasMeta (..), Blocks, Inlines, trimInlines)
 import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
@@ -52,6 +53,8 @@ import Control.Applicative ( (<$>), (<$), (<*) )
 import Data.Monoid
 import Text.Printf (printf)
 import Debug.Trace (trace)
+import Data.Default (Default (..))
+import Control.Monad.Reader (Reader, runReader, asks, local, ask)
 
 isSpace :: Char -> Bool
 isSpace ' '  = True
@@ -64,17 +67,26 @@ readHtml :: ReaderOptions -- ^ Reader options
          -> String        -- ^ String to parse (assumes @'\n'@ line endings)
          -> Pandoc
 readHtml opts inp =
-  case runParser parseDoc def{ stateOptions = opts } "source" tags of
+  case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } )  "source" tags of
           Left err'    -> error $ "\nError at " ++ show  err'
           Right result -> result
     where tags = canonicalizeTags $
                    parseTagsOptions parseOptions{ optTagPosition = True } inp
           parseDoc = do
              blocks <- (fixPlains False) . mconcat <$> manyTill block eof
-             meta <- stateMeta <$> getState
+             meta <- stateMeta . parserState <$> getState
              return $ Pandoc meta (B.toList blocks)
 
-type TagParser = Parser [Tag String] ParserState
+data HTMLState =
+  HTMLState
+  {  parserState :: ParserState
+  }
+
+data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext }
+
+type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
+
+type TagParser = HTMLParser [Tag String]
 
 pBody :: TagParser Blocks
 pBody = pInTags "body" block
@@ -115,7 +127,6 @@ block = do
              (take 60 $ show $ B.toList res)) (return ())
   return res
 
-
 pList :: TagParser Blocks
 pList = pBulletList <|> pOrderedList <|> pDefinitionList
 
@@ -365,8 +376,8 @@ pSelfClosing f g = do
 
 pQ :: TagParser Inlines
 pQ = do
-  quoteContext <- stateQuoteContext `fmap` getState
-  let quoteType = case quoteContext of
+  context <- asks quoteContext
+  let quoteType = case context of
                        InDoubleQuote -> SingleQuote
                        _             -> DoubleQuote
   let innerQuoteContext = if quoteType == SingleQuote
@@ -477,7 +488,8 @@ pTagText :: TagParser Inlines
 pTagText = try $ do
   (TagText str) <- pSatisfy isTagText
   st <- getState
-  case runParser (many pTagContents) st "text" str of
+  qu <- ask
+  case flip runReader qu $ runParserT (many pTagContents) st "text" str of
        Left _        -> fail $ "Could not parse `" ++ str ++ "'"
        Right result  -> return $ mconcat result
 
@@ -486,7 +498,9 @@ pBlank = try $ do
   (TagText str) <- pSatisfy isTagText
   guard $ all isSpace str
 
-pTagContents :: Parser [Char] ParserState Inlines
+type InlinesParser = HTMLParser String
+
+pTagContents :: InlinesParser Inlines
 pTagContents =
       B.displayMath <$> mathDisplay
   <|> B.math        <$> mathInline
@@ -496,12 +510,11 @@ pTagContents =
   <|> pSymbol
   <|> pBad
 
-pStr :: Parser [Char] ParserState Inlines
+pStr :: InlinesParser Inlines
 pStr = do
   result <- many1 $ satisfy $ \c ->
                      not (isSpace c) && not (isSpecial c) && not (isBad c)
-  pos <- getPosition
-  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  updateLastStrPos
   return $ B.str result
 
 isSpecial :: Char -> Bool
@@ -516,13 +529,13 @@ isSpecial '\8220' = True
 isSpecial '\8221' = True
 isSpecial _ = False
 
-pSymbol :: Parser [Char] ParserState Inlines
+pSymbol :: InlinesParser Inlines
 pSymbol = satisfy isSpecial >>= return . B.str . (:[])
 
 isBad :: Char -> Bool
 isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
 
-pBad :: Parser [Char] ParserState Inlines
+pBad :: InlinesParser Inlines
 pBad = do
   c <- satisfy isBad
   let c' = case c of
@@ -556,7 +569,7 @@ pBad = do
                 _      -> '?'
   return $ B.str [c']
 
-pSpace :: Parser [Char] ParserState Inlines
+pSpace :: InlinesParser Inlines
 pSpace = many1 (satisfy isSpace) >> return B.space
 
 --
@@ -672,19 +685,23 @@ _ `closes` _ = False
 --- parsers for use in markdown, textile readers
 
 -- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String
+htmlInBalanced :: (Monad m)
+               => (Tag String -> Bool)
+               -> ParserT String st m String
 htmlInBalanced f = try $ do
   (TagOpen t _, tag) <- htmlTag f
   guard $ '/' `notElem` tag      -- not a self-closing tag
   let stopper = htmlTag (~== TagClose t)
-  let anytag = liftM snd $ htmlTag (const True)
+  let anytag = snd <$> htmlTag (const True)
   contents <- many $ notFollowedBy' stopper >>
                      (htmlInBalanced f <|> anytag <|> count 1 anyChar)
   endtag <- liftM snd stopper
   return $ tag ++ concat contents ++ endtag
 
 -- | Matches a tag meeting a certain condition.
-htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
+htmlTag :: Monad m
+        => (Tag String -> Bool)
+        -> ParserT [Char] st m (Tag String, String)
 htmlTag f = try $ do
   lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
   (next : _) <- getInput >>= return . canonicalizeTags . parseTags
@@ -707,3 +724,29 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV)
         attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
 
 
+-- Instances
+
+-- This signature should be more general
+-- MonadReader HTMLLocal m => HasQuoteContext st m
+instance HasQuoteContext st (Reader HTMLLocal) where
+  getQuoteContext = asks quoteContext
+  withQuoteContext q = local (\s -> s{quoteContext = q})
+
+instance HasReaderOptions HTMLState where
+    extractReaderOptions = extractReaderOptions . parserState
+
+instance Default HTMLState where
+  def = HTMLState def
+
+instance HasMeta HTMLState where
+  setMeta s b st = st {parserState = setMeta s b $ parserState st}
+  deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
+
+instance Default HTMLLocal where
+  def = HTMLLocal NoQuote
+
+instance HasLastStrPosition HTMLState where
+  setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
+  getLastStrPos = getLastStrPos . parserState
+
+
author	John MacFarlane <jgm@berkeley.edu>	2014-07-26 19:01:26 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2014-07-26 19:01:26 -0700
commit	d9751f91c43ad96becb6aaf2f5abcd9c0b37754c (patch)
tree	6ac474d328d39f0932ec008f0918846bf893a2d5
parent	74690b191e53535ac53c519bf07c6e38c34175b2 (diff)
parent	5e2d22a27e231f0ac62739b3bcd15b548c782f25 (diff)
download	pandoc-d9751f91c43ad96becb6aaf2f5abcd9c0b37754c.tar.gz