aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2012-09-15 13:44:59 -0400
committerJohn MacFarlane <jgm@berkeley.edu>2012-09-15 13:50:02 -0400
commiteca9eeab6bcb5b36f40ca54659bb22658cecad30 (patch)
treeee2776909d663f7f0932ecc9f0d20982ac086920
parentbc5fe70d155e1d91761da6d88662b1bb3d1d3aca (diff)
downloadpandoc-eca9eeab6bcb5b36f40ca54659bb22658cecad30.tar.gz
MediaWiki reader: Misc fixes, put category links at end.
-rw-r--r--Makefile2
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs63
-rw-r--r--tests/mediawiki-reader.native3
-rw-r--r--tests/mediawiki-reader.wiki4
4 files changed, 46 insertions, 26 deletions
diff --git a/Makefile b/Makefile
index de807334b..e5b4f3378 100644
--- a/Makefile
+++ b/Makefile
@@ -30,4 +30,4 @@ citeproc-hs: pandoc-types
cabal-dev add-source citeproc-hs
install:
- cabal-dev install
+ cabal-dev install --enable-tests --enable-benchmarks
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index db5252a29..751326bb6 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -43,8 +43,7 @@ import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import Text.Pandoc.Options
-import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag,
- isBlockTag, isCommentTag )
+import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
import Text.Pandoc.XML ( fromEntities )
import Text.Pandoc.Parsing hiding ( nested )
import Text.Pandoc.Generic ( bottomUp )
@@ -52,7 +51,7 @@ import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
import Data.Monoid (mconcat, mempty)
import Control.Applicative ((<$>), (<*), (*>), (<$))
import Control.Monad
-import Data.List (intersperse, intercalate )
+import Data.List (intersperse, intercalate, isPrefixOf )
import Text.HTML.TagSoup
import Data.Sequence (viewl, ViewL(..), (<|))
@@ -63,7 +62,9 @@ readMediaWiki :: ReaderOptions -- ^ Reader options
readMediaWiki opts s =
case runParser parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
- , mwNextLinkNumber = 1 }
+ , mwNextLinkNumber = 1
+ , mwCategoryLinks = []
+ }
"source" (s ++ "\n") of
Left err' -> error $ "\nError:\n" ++ show err'
Right result -> result
@@ -71,6 +72,7 @@ readMediaWiki opts s =
data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
, mwNextLinkNumber :: Int
+ , mwCategoryLinks :: [Inlines]
}
type MWParser = Parser [Char] MWState
@@ -103,10 +105,20 @@ newBlockTags :: [String]
newBlockTags = ["haskell","syntaxhighlight","source","gallery"]
isBlockTag' :: Tag String -> Bool
-isBlockTag' tag@(TagOpen t _) = isBlockTag tag || t `elem` newBlockTags
-isBlockTag' tag@(TagClose t) = isBlockTag tag || t `elem` newBlockTags
+isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
+ t `notElem` eitherBlockOrInline
+isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
+ t `notElem` eitherBlockOrInline
isBlockTag' tag = isBlockTag tag
+isInlineTag' :: Tag String -> Bool
+isInlineTag' (TagComment _) = True
+isInlineTag' t = not (isBlockTag' t)
+
+eitherBlockOrInline :: [String]
+eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
+ "map", "area", "object"]
+
htmlComment :: MWParser ()
htmlComment = () <$ htmlTag isCommentTag
@@ -142,7 +154,11 @@ parseMediaWiki = do
bs <- mconcat <$> many block
spaces
eof
- return $ B.doc bs
+ categoryLinks <- reverse . mwCategoryLinks <$> getState
+ let categories = if null categoryLinks
+ then mempty
+ else B.para $ mconcat $ intersperse B.space categoryLinks
+ return $ B.doc $ bs <> categories
--
-- block parsers
@@ -159,7 +175,7 @@ block = mempty <$ skipMany1 blankline
<|> mempty <$ try (spaces *> htmlComment)
<|> preformatted
<|> blockTag
- <|> template
+ <|> (B.rawBlock "mediawiki" <$> template)
<|> para
para :: MWParser Blocks
@@ -229,20 +245,18 @@ tableCell :: MWParser Blocks
tableCell = try $ do
cellsep
skipMany spaceChar
- attrs <- (parseAttrs <$>
- manyTill (satisfy (/='\n'))
- (try $ char '|' <* notFollowedBy (char '|')))
+ attrs <- option [] $ try $ parseAttrs <$>
+ manyTill (satisfy (/='\n')) (char '|' <* notFollowedBy (char '|'))
skipMany spaceChar
ls <- many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> anyChar)
parseFromString (mconcat <$> many block) ls
-template :: MWParser Blocks
-template = B.rawBlock "mediawiki" <$> doublebrackets
- where doublebrackets = try $
- do string "{{"
- notFollowedBy (char '{')
- contents <- manyTill anyChar (try $ string "}}")
- return $ "{{" ++ contents ++ "}}"
+template :: MWParser String
+template = try $ do
+ string "{{"
+ notFollowedBy (char '{')
+ contents <- manyTill anyChar (try $ string "}}")
+ return $ "{{" ++ contents ++ "}}"
blockTag :: MWParser Blocks
blockTag = do
@@ -403,7 +417,7 @@ inline = whitespace
<|> B.singleton <$> charRef
<|> inlineHtml
<|> variable
- <|> (mempty <$ template)
+ <|> (B.rawInline "mediawiki" <$> template)
<|> special
str :: MWParser Inlines
@@ -418,7 +432,7 @@ variable = B.rawInline "mediawiki" <$> triplebrackets
inlineTag :: MWParser Inlines
inlineTag = do
- (tag, _) <- lookAhead $ htmlTag isInlineTag
+ (tag, _) <- lookAhead $ htmlTag isInlineTag'
case tag of
TagOpen "nowiki" _ -> try $ do
(_,raw) <- htmlTag (~== tag)
@@ -443,7 +457,7 @@ special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *>
oneOf specialChars)
inlineHtml :: MWParser Inlines
-inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag
+inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
whitespace :: MWParser Inlines
whitespace = B.space <$ (skipMany1 spaceChar <|> endline <|> htmlComment)
@@ -491,7 +505,12 @@ internalLink = try $ do
<|> (return $ B.text $ drop 1 $ dropWhile (/=':') pagename) )
sym "]]"
linktrail <- B.text <$> many (char '\'' <|> letter)
- return $ B.link (addUnderscores pagename) "wikilink" (label <> linktrail)
+ let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail)
+ if "Category:" `isPrefixOf` pagename
+ then do
+ updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st }
+ return mempty
+ else return link
externalLink :: MWParser Inlines
externalLink = try $ do
diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native
index 9954481fd..25c0eaa72 100644
--- a/tests/mediawiki-reader.native
+++ b/tests/mediawiki-reader.native
@@ -51,7 +51,8 @@ Pandoc (Meta {docTitle = [], docAuthors = [], docDate = []})
,Para [Str "bud"]
,Para [Str "another"]
,Header 2 [Str "raw",Space,Str "html"]
-,Para [Str "hi",Space,RawInline "html" "<span style=\"color:red\">",Emph [Str "there"],RawInline "html" "</span>",Str ".",Space,RawInline "html" "<ins>",Str "inserted",RawInline "html" "</ins>"]
+,Para [Str "hi",Space,RawInline "html" "<span style=\"color:red\">",Emph [Str "there"],RawInline "html" "</span>",Str "."]
+,Para [RawInline "html" "<ins>",Str "inserted",RawInline "html" "</ins>"]
,RawBlock "html" "<div class=\"special\">"
,Para [Str "hi",Space,Emph [Str "there"]]
,RawBlock "html" "</div>"
diff --git a/tests/mediawiki-reader.wiki b/tests/mediawiki-reader.wiki
index fd1159cd4..154248e54 100644
--- a/tests/mediawiki-reader.wiki
+++ b/tests/mediawiki-reader.wiki
@@ -91,6 +91,7 @@ another
== raw html ==
hi <span style="color:red">''there''</span>.
+
<ins>inserted</ins>
<div class="special">
@@ -99,8 +100,7 @@ hi ''there''
== sup, sub, del ==
-H<sub>2</sub>O
-base<sup>''exponent''</sup>
+H<sub>2</sub>O base<sup>''exponent''</sup>
<del>hello</del>
== inline code ==