aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/MediaWiki.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2012-09-12 17:15:21 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2012-09-12 17:16:05 -0700
commit4e294333b0ddca9d2ee6de408239de4ccf8eb637 (patch)
treeb7c7ed62f2dd6612c1e8e9844d22fe16a1631432 /src/Text/Pandoc/Readers/MediaWiki.hs
parentbf2666331d84fff400d75c63f0d8ed3a6d8fc855 (diff)
downloadpandoc-4e294333b0ddca9d2ee6de408239de4ccf8eb637.tar.gz
MediaWiki reader: Improvements to list parsing and HTML tag handling.
Diffstat (limited to 'src/Text/Pandoc/Readers/MediaWiki.hs')
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs18
1 files changed, 16 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index cb5782f0b..cfc991872 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -34,7 +34,6 @@ _ tests for lists
_ support HTML lists
_ support list style attributes and start values in ol lists, also
value attribute on li
-_ support <p> tags in lists (and out?)
_ support :, ::, etc. for indent (treat as list continuation paras?)
_ support preformatted text (lines starting with space)
_ support preformatted text blocks
@@ -69,10 +68,12 @@ import Text.Pandoc.XML ( fromEntities )
import Text.Pandoc.Parsing
import Text.Pandoc.Shared ( stripTrailingNewlines )
import Data.Monoid (mconcat, mempty)
+import qualified Data.Foldable as F
import Control.Applicative ((<$>), (<*), (*>), (<$))
import Control.Monad
import Data.List (intersperse)
import Text.HTML.TagSoup
+import Data.Sequence (viewl, ViewL(..), (<|))
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: ReaderOptions -- ^ Reader options
@@ -140,11 +141,20 @@ block = header
<|> haskell
<|> mempty <$ skipMany1 blankline
<|> mempty <$ try (spaces *> htmlComment)
+ <|> pTag
+ <|> blockHtml
<|> para
para :: MWParser Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
+-- We can just skip pTags, as contents will be treated as paragraphs
+pTag :: MWParser Blocks
+pTag = mempty <$ (htmlTag (\t -> t ~== TagOpen "p" [] || t ~== TagClose "p"))
+
+blockHtml :: MWParser Blocks
+blockHtml = (B.rawBlock "html" . snd <$> htmlTag isBlockTag)
+
hrule :: MWParser Blocks
hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline)
@@ -223,7 +233,11 @@ listItem' c = try $ do
first <- manyTill anyChar newline
rest <- many (try $ char c *> lookAhead listStartChar *>
manyTill anyChar newline)
- parseFromString (mconcat <$> many1 block) $ unlines $ first : rest
+ contents <- parseFromString (mconcat <$> many1 block)
+ $ unlines $ first : rest
+ case viewl (B.unMany contents) of
+ (Para xs) :< rest -> return $ B.Many $ (Plain xs) <| rest
+ _ -> return contents
--
-- inline parsers