diff options
author | John MacFarlane <jgm@berkeley.edu> | 2012-09-13 15:10:40 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2012-09-13 15:10:40 -0700 |
commit | 8cc5625bd6ef2bda13b156b381d238f21beb789e (patch) | |
tree | c89e477267b4cc6e6beb10eec1c10c6f9c87ba67 /src/Text | |
parent | 5b29f0f5983e03607712faf36d296b7b06cd1e61 (diff) | |
download | pandoc-8cc5625bd6ef2bda13b156b381d238f21beb789e.tar.gz |
MediaWiki reader: Use MWState instead of ParserState.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 29 |
2 files changed, 25 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 90887bfae..295171ca8 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -597,7 +597,7 @@ htmlInBalanced f = try $ do return $ tag ++ concat contents ++ endtag -- | Matches a tag meeting a certain condition. -htmlTag :: (Tag String -> Bool) -> Parser [Char] ParserState (Tag String, String) +htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String) htmlTag f = try $ do lookAhead (char '<') (next : _) <- getInput >>= return . canonicalizeTags . parseTags diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 7e8e606e5..62660a1d2 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -34,6 +34,7 @@ _ support internal links http://www.mediawiki.org/wiki/Help:Links _ support external links (partially implemented) _ support images http://www.mediawiki.org/wiki/Help:Images _ support tables http://www.mediawiki.org/wiki/Help:Tables +- footnotes? -} module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where @@ -44,7 +45,7 @@ import Text.Pandoc.Options import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) -import Text.Pandoc.Parsing +import Text.Pandoc.Parsing hiding ( nested ) import Text.Pandoc.Generic ( bottomUp ) import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead ) import Data.Monoid (mconcat, mempty) @@ -56,17 +57,35 @@ import Data.Sequence (viewl, ViewL(..), (<|)) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc readMediaWiki opts s = - (readWith parseMediaWiki) def{ stateOptions = opts } (s ++ "\n\n") + case runParser parseMediaWiki MWState{ mwOptions = opts, mwMaxNestingLevel = 4 } + "source" (s ++ "\n") of + Left err' -> error $ "\nError:\n" ++ show err' + Right result -> result -type MWParser = Parser [Char] ParserState +data MWState = MWState { mwOptions :: ReaderOptions + , mwMaxNestingLevel :: Int + } + +type MWParser = Parser [Char] MWState -- -- auxiliary functions -- +-- This is used to prevent exponential blowups for things like: +-- ''a'''a''a'''a''a'''a''a'''a +nested :: MWParser a -> MWParser a +nested p = do + nestlevel <- mwMaxNestingLevel `fmap` getState + guard $ nestlevel > 0 + updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ mwMaxNestingLevel = nestlevel } + return res + specialChars :: [Char] specialChars = "'[]<=&*{}" |