From 4e294333b0ddca9d2ee6de408239de4ccf8eb637 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 12 Sep 2012 17:15:21 -0700 Subject: MediaWiki reader: Improvements to list parsing and HTML tag handling. --- src/Text/Pandoc/Readers/MediaWiki.hs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/Readers/MediaWiki.hs') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index cb5782f0b..cfc991872 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -34,7 +34,6 @@ _ tests for lists _ support HTML lists _ support list style attributes and start values in ol lists, also value attribute on li -_ support

tags in lists (and out?) _ support :, ::, etc. for indent (treat as list continuation paras?) _ support preformatted text (lines starting with space) _ support preformatted text blocks @@ -69,10 +68,12 @@ import Text.Pandoc.XML ( fromEntities ) import Text.Pandoc.Parsing import Text.Pandoc.Shared ( stripTrailingNewlines ) import Data.Monoid (mconcat, mempty) +import qualified Data.Foldable as F import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad import Data.List (intersperse) import Text.HTML.TagSoup +import Data.Sequence (viewl, ViewL(..), (<|)) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options @@ -140,11 +141,20 @@ block = header <|> haskell <|> mempty <$ skipMany1 blankline <|> mempty <$ try (spaces *> htmlComment) + <|> pTag + <|> blockHtml <|> para para :: MWParser Blocks para = B.para . trimInlines . mconcat <$> many1 inline +-- We can just skip pTags, as contents will be treated as paragraphs +pTag :: MWParser Blocks +pTag = mempty <$ (htmlTag (\t -> t ~== TagOpen "p" [] || t ~== TagClose "p")) + +blockHtml :: MWParser Blocks +blockHtml = (B.rawBlock "html" . snd <$> htmlTag isBlockTag) + hrule :: MWParser Blocks hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline) @@ -223,7 +233,11 @@ listItem' c = try $ do first <- manyTill anyChar newline rest <- many (try $ char c *> lookAhead listStartChar *> manyTill anyChar newline) - parseFromString (mconcat <$> many1 block) $ unlines $ first : rest + contents <- parseFromString (mconcat <$> many1 block) + $ unlines $ first : rest + case viewl (B.unMany contents) of + (Para xs) :< rest -> return $ B.Many $ (Plain xs) <| rest + _ -> return contents -- -- inline parsers -- cgit v1.2.3