aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2012-09-13 15:10:40 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2012-09-13 15:10:40 -0700
commit8cc5625bd6ef2bda13b156b381d238f21beb789e (patch)
treec89e477267b4cc6e6beb10eec1c10c6f9c87ba67 /src/Text
parent5b29f0f5983e03607712faf36d296b7b06cd1e61 (diff)
downloadpandoc-8cc5625bd6ef2bda13b156b381d238f21beb789e.tar.gz
MediaWiki reader: Use MWState instead of ParserState.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs2
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs29
2 files changed, 25 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 90887bfae..295171ca8 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -597,7 +597,7 @@ htmlInBalanced f = try $ do
return $ tag ++ concat contents ++ endtag
-- | Matches a tag meeting a certain condition.
-htmlTag :: (Tag String -> Bool) -> Parser [Char] ParserState (Tag String, String)
+htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
htmlTag f = try $ do
lookAhead (char '<')
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 7e8e606e5..62660a1d2 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -34,6 +34,7 @@ _ support internal links http://www.mediawiki.org/wiki/Help:Links
_ support external links (partially implemented)
_ support images http://www.mediawiki.org/wiki/Help:Images
_ support tables http://www.mediawiki.org/wiki/Help:Tables
+- footnotes?
-}
module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where
@@ -44,7 +45,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag,
isBlockTag, isCommentTag )
import Text.Pandoc.XML ( fromEntities )
-import Text.Pandoc.Parsing
+import Text.Pandoc.Parsing hiding ( nested )
import Text.Pandoc.Generic ( bottomUp )
import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
import Data.Monoid (mconcat, mempty)
@@ -56,17 +57,35 @@ import Data.Sequence (viewl, ViewL(..), (<|))
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Pandoc
readMediaWiki opts s =
- (readWith parseMediaWiki) def{ stateOptions = opts } (s ++ "\n\n")
+ case runParser parseMediaWiki MWState{ mwOptions = opts, mwMaxNestingLevel = 4 }
+ "source" (s ++ "\n") of
+ Left err' -> error $ "\nError:\n" ++ show err'
+ Right result -> result
-type MWParser = Parser [Char] ParserState
+data MWState = MWState { mwOptions :: ReaderOptions
+ , mwMaxNestingLevel :: Int
+ }
+
+type MWParser = Parser [Char] MWState
--
-- auxiliary functions
--
+-- This is used to prevent exponential blowups for things like:
+-- ''a'''a''a'''a''a'''a''a'''a
+nested :: MWParser a -> MWParser a
+nested p = do
+ nestlevel <- mwMaxNestingLevel `fmap` getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ mwMaxNestingLevel = nestlevel }
+ return res
+
specialChars :: [Char]
specialChars = "'[]<=&*{}"