MediaWiki reader: Improvements to list parsing and HTML tag handling.

author: John MacFarlane <jgm@berkeley.edu> 2012-09-12 17:15:21 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2012-09-12 17:16:05 -0700
commit: 4e294333b0ddca9d2ee6de408239de4ccf8eb637 (patch)
tree: b7c7ed62f2dd6612c1e8e9844d22fe16a1631432 /src/Text/Pandoc/Readers/MediaWiki.hs
parent: bf2666331d84fff400d75c63f0d8ed3a6d8fc855 (diff)
download: pandoc-4e294333b0ddca9d2ee6de408239de4ccf8eb637.tar.gz
1 files changed, 16 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index cb5782f0b..cfc991872 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -34,7 +34,6 @@ _ tests for lists
 _ support HTML lists
 _ support list style attributes and start values in ol lists, also
   value attribute on li
-_ support <p> tags in lists (and out?)
 _ support :, ::, etc. for indent (treat as list continuation paras?)
 _ support preformatted text (lines starting with space)
 _ support preformatted text blocks
@@ -69,10 +68,12 @@ import Text.Pandoc.XML ( fromEntities )
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared ( stripTrailingNewlines )
 import Data.Monoid (mconcat, mempty)
+import qualified Data.Foldable as F
 import Control.Applicative ((<$>), (<*), (*>), (<$))
 import Control.Monad
 import Data.List (intersperse)
 import Text.HTML.TagSoup
+import Data.Sequence (viewl, ViewL(..), (<|))
 
 -- | Read mediawiki from an input string and return a Pandoc document.
 readMediaWiki :: ReaderOptions -- ^ Reader options
@@ -140,11 +141,20 @@ block = header
      <|> haskell
      <|> mempty <$ skipMany1 blankline
      <|> mempty <$ try (spaces *> htmlComment)
+     <|> pTag
+     <|> blockHtml
      <|> para
 
 para :: MWParser Blocks
 para = B.para . trimInlines . mconcat <$> many1 inline
 
+-- We can just skip pTags, as contents will be treated as paragraphs
+pTag :: MWParser Blocks
+pTag = mempty <$ (htmlTag (\t -> t ~== TagOpen "p" [] || t ~== TagClose "p"))
+
+blockHtml :: MWParser Blocks
+blockHtml = (B.rawBlock "html" . snd <$> htmlTag isBlockTag)
+
 hrule :: MWParser Blocks
 hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline)
 
@@ -223,7 +233,11 @@ listItem' c = try $ do
   first <- manyTill anyChar newline
   rest <- many (try $ char c *> lookAhead listStartChar *>
                    manyTill anyChar newline)
-  parseFromString (mconcat <$> many1 block) $ unlines $ first : rest
+  contents <- parseFromString (mconcat <$> many1 block)
+               $ unlines $ first : rest
+  case viewl (B.unMany contents) of
+       (Para xs) :< rest -> return $ B.Many $ (Plain xs) <| rest
+       _                 -> return contents
 
 --
 -- inline parsers
author	John MacFarlane <jgm@berkeley.edu>	2012-09-12 17:15:21 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2012-09-12 17:16:05 -0700
commit	4e294333b0ddca9d2ee6de408239de4ccf8eb637 (patch)
tree	b7c7ed62f2dd6612c1e8e9844d22fe16a1631432 /src/Text/Pandoc/Readers/MediaWiki.hs
parent	bf2666331d84fff400d75c63f0d8ed3a6d8fc855 (diff)
download	pandoc-4e294333b0ddca9d2ee6de408239de4ccf8eb637.tar.gz