diff options
author | despresc <christian.j.j.despres@gmail.com> | 2019-11-04 16:12:37 -0500 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2019-11-12 16:03:45 -0800 |
commit | 90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch) | |
tree | 4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers/MediaWiki.hs | |
parent | d3966372f5049eea56213b069fc4d70d8af9144c (diff) | |
download | pandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz |
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
Diffstat (limited to 'src/Text/Pandoc/Readers/MediaWiki.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 198 |
1 files changed, 99 insertions, 99 deletions
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 35bb8e3eb..07240e951 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE RelaxedPolyRec #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RelaxedPolyRec #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- | Module : Text.Pandoc.Readers.MediaWiki @@ -24,11 +25,12 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isDigit, isSpace) import qualified Data.Foldable as F -import Data.List (intercalate, intersperse, isPrefixOf) +import Data.List (intersperse) import Data.Maybe (fromMaybe, maybeToList) import Data.Sequence (ViewL (..), viewl, (<|)) import qualified Data.Set as Set -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B @@ -39,7 +41,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, - trim) + trim, splitTextBy, tshow) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) @@ -57,7 +59,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (unpack (crFilter s) ++ "\n") + (crFilter s <> "\n") case parsed of Right result -> return result Left e -> throwError e @@ -66,12 +68,12 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int , mwNextLinkNumber :: Int , mwCategoryLinks :: [Inlines] - , mwIdentifierList :: Set.Set String + , mwIdentifierList :: Set.Set Text , mwLogMessages :: [LogMessage] , mwInTT :: Bool } -type MWParser m = ParserT [Char] MWState m +type MWParser m = ParserT Text MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions @@ -105,58 +107,58 @@ specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" -sym :: PandocMonad m => String -> MWParser m () -sym s = () <$ try (string s) +sym :: PandocMonad m => Text -> MWParser m () +sym s = () <$ try (string $ T.unpack s) -newBlockTags :: [String] +newBlockTags :: [Text] newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"] -isBlockTag' :: Tag String -> Bool +isBlockTag' :: Tag Text -> Bool isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag = isBlockTag tag -isInlineTag' :: Tag String -> Bool +isInlineTag' :: Tag Text -> Bool isInlineTag' (TagComment _) = True isInlineTag' t = not (isBlockTag' t) -eitherBlockOrInline :: [String] +eitherBlockOrInline :: [Text] eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", "map", "area", "object"] htmlComment :: PandocMonad m => MWParser m () htmlComment = () <$ htmlTag isCommentTag -inlinesInTags :: PandocMonad m => String -> MWParser m Inlines +inlinesInTags :: PandocMonad m => Text -> MWParser m Inlines inlinesInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else trimInlines . mconcat <$> manyTill inline (htmlTag (~== TagClose tag)) -blocksInTags :: PandocMonad m => String -> MWParser m Blocks +blocksInTags :: PandocMonad m => Text -> MWParser m Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) let closer = if tag == "li" - then htmlTag (~== TagClose "li") + then htmlTag (~== TagClose ("li" :: Text)) <|> lookAhead ( - htmlTag (~== TagOpen "li" []) - <|> htmlTag (~== TagClose "ol") - <|> htmlTag (~== TagClose "ul")) + htmlTag (~== TagOpen ("li" :: Text) []) + <|> htmlTag (~== TagClose ("ol" :: Text)) + <|> htmlTag (~== TagClose ("ul" :: Text))) else htmlTag (~== TagClose tag) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else mconcat <$> manyTill block closer -charsInTags :: PandocMonad m => String -> MWParser m [Char] -charsInTags tag = try $ do +textInTags :: PandocMonad m => Text -> MWParser m Text +textInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return "" - else manyTill anyChar (htmlTag (~== TagClose tag)) + else T.pack <$> manyTill anyChar (htmlTag (~== TagClose tag)) -- -- main parser @@ -192,7 +194,7 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res para :: PandocMonad m => MWParser m Blocks @@ -234,16 +236,16 @@ table = do else (replicate cols mempty, hdr:rows') return $ B.table caption cellspecs headers rows -parseAttrs :: PandocMonad m => MWParser m [(String,String)] +parseAttrs :: PandocMonad m => MWParser m [(Text,Text)] parseAttrs = many1 parseAttr -parseAttr :: PandocMonad m => MWParser m (String, String) +parseAttr :: PandocMonad m => MWParser m (Text, Text) parseAttr = try $ do skipMany spaceChar - k <- many1 letter + k <- many1Char letter char '=' - v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) - <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|') + v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"')) + <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) tableStart :: PandocMonad m => MWParser m () @@ -293,8 +295,8 @@ tableCell = try $ do notFollowedBy (char '|') skipMany spaceChar pos' <- getPosition - ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> - ((snd <$> withRaw table) <|> count 1 anyChar)) + ls <- T.concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> + ((snd <$> withRaw table) <|> countChar 1 anyChar)) bs <- parseFromString (do setPosition pos' mconcat <$> many block) ls let align = case lookup "align" attrs of @@ -307,48 +309,49 @@ tableCell = try $ do Nothing -> 0.0 return ((align, width), bs) -parseWidth :: String -> Maybe Double +parseWidth :: Text -> Maybe Double parseWidth s = - case reverse s of - ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds) - _ -> Nothing + case T.unsnoc s of + Just (ds, '%') | T.all isDigit ds -> safeRead $ "0." <> ds + _ -> Nothing -template :: PandocMonad m => MWParser m String +template :: PandocMonad m => MWParser m Text template = try $ do string "{{" notFollowedBy (char '{') lookAhead $ letter <|> digit <|> char ':' - let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar + let chunk = template <|> variable <|> many1Char (noneOf "{}") <|> countChar 1 anyChar contents <- manyTill chunk (try $ string "}}") - return $ "{{" ++ concat contents ++ "}}" + return $ "{{" <> T.concat contents <> "}}" blockTag :: PandocMonad m => MWParser m Blocks blockTag = do (tag, _) <- lookAhead $ htmlTag isBlockTag' case tag of TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote" - TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre" + TagOpen "pre" _ -> B.codeBlock . trimCode <$> textInTags "pre" TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs TagOpen "source" attrs -> syntaxhighlight "source" attrs TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$> - charsInTags "haskell" + textInTags "haskell" TagOpen "gallery" _ -> blocksInTags "gallery" TagOpen "p" _ -> mempty <$ htmlTag (~== tag) TagClose "p" -> mempty <$ htmlTag (~== tag) _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag) -trimCode :: String -> String -trimCode ('\n':xs) = stripTrailingNewlines xs -trimCode xs = stripTrailingNewlines xs +trimCode :: Text -> Text +trimCode t = case T.uncons t of + Just ('\n', xs) -> stripTrailingNewlines xs + _ -> stripTrailingNewlines t -syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks +syntaxhighlight :: PandocMonad m => Text -> [Attribute Text] -> MWParser m Blocks syntaxhighlight tag attrs = try $ do let mblang = lookup "lang" attrs let mbstart = lookup "start" attrs let mbline = lookup "line" attrs let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart - contents <- charsInTags tag + contents <- textInTags tag return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents hrule :: PandocMonad m => MWParser m Blocks @@ -362,17 +365,17 @@ preformatted = try $ do guardColumnOne char ' ' let endline' = B.linebreak <$ try (newline <* char ' ') - let whitespace' = B.str <$> many1 ('\160' <$ spaceChar) + let whitespace' = B.str <$> many1Char ('\160' <$ spaceChar) let spToNbsp ' ' = '\160' spToNbsp x = x let nowiki' = mconcat . intersperse B.linebreak . map B.str . - lines . fromEntities . map spToNbsp <$> try - (htmlTag (~== TagOpen "nowiki" []) *> - manyTill anyChar (htmlTag (~== TagClose "nowiki"))) + T.lines . fromEntities . T.map spToNbsp <$> try + (htmlTag (~== TagOpen ("nowiki" :: Text) []) *> + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text)))) let inline' = whitespace' <|> endline' <|> nowiki' <|> try (notFollowedBy newline *> inline) contents <- mconcat <$> many1 inline' - let spacesStr (Str xs) = all isSpace xs + let spacesStr (Str xs) = T.all isSpace xs spacesStr _ = False if F.all spacesStr contents then return mempty @@ -385,7 +388,7 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode strToCode x = x normalizeCode [] = [] normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 = - normalizeCode $ Code a1 (x ++ y) : zs + normalizeCode $ Code a1 (x <> y) : zs normalizeCode (x:xs) = x : normalizeCode xs header :: PandocMonad m => MWParser m Blocks @@ -400,22 +403,22 @@ header = try $ do -- See #4731: modifyIdentifier :: Attr -> Attr modifyIdentifier (ident,cl,kv) = (ident',cl,kv) - where ident' = map (\c -> if c == '-' then '_' else c) ident + where ident' = T.map (\c -> if c == '-' then '_' else c) ident bulletList :: PandocMonad m => MWParser m Blocks bulletList = B.bulletList <$> ( many1 (listItem '*') - <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <* - optional (htmlTag (~== TagClose "ul"))) ) + <|> (htmlTag (~== TagOpen ("ul" :: Text) []) *> spaces *> many (listItem '*' <|> li) <* + optional (htmlTag (~== TagClose ("ul" :: Text)))) ) orderedList :: PandocMonad m => MWParser m Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) <|> try - (do (tag,_) <- htmlTag (~== TagOpen "ol" []) + (do (tag,_) <- htmlTag (~== TagOpen ("ol" :: Text) []) spaces items <- many (listItem '#' <|> li) - optional (htmlTag (~== TagClose "ol")) + optional (htmlTag (~== TagClose ("ol" :: Text))) let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) @@ -452,7 +455,7 @@ anyListStart :: PandocMonad m => MWParser m Char anyListStart = guardColumnOne >> oneOf "*#:;" li :: PandocMonad m => MWParser m Blocks -li = lookAhead (htmlTag (~== TagOpen "li" [])) *> +li = lookAhead (htmlTag (~== TagOpen ("li" :: Text) [])) *> (firstParaToPlain <$> blocksInTags "li") <* spaces listItem :: PandocMonad m => Char -> MWParser m Blocks @@ -464,13 +467,13 @@ listItem c = try $ do else do skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ string extras *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) contents <- parseFromString (do setPosition pos' many1 $ listItem' c) - (unlines (first : rest)) + (T.unlines (first : rest)) case c of '*' -> return $ B.bulletList contents '#' -> return $ B.orderedList contents @@ -484,20 +487,20 @@ listItem c = try $ do -- }} -- * next list item -- which seems to be valid mediawiki. -listChunk :: PandocMonad m => MWParser m String -listChunk = template <|> count 1 anyChar +listChunk :: PandocMonad m => MWParser m Text +listChunk = template <|> countChar 1 anyChar listItem' :: PandocMonad m => Char -> MWParser m Blocks listItem' c = try $ do listStart c skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ char c *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) parseFromString (do setPosition pos' firstParaToPlain . mconcat <$> many1 block) - $ unlines $ first : rest + $ T.unlines $ first : rest firstParaToPlain :: Blocks -> Blocks firstParaToPlain contents = @@ -528,23 +531,23 @@ inline = whitespace <|> special str :: PandocMonad m => MWParser m Inlines -str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) +str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) math :: PandocMonad m => MWParser m Inlines -math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) - <|> (B.math . trim <$> charsInTags "math") - <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) - <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) +math = (B.displayMath . trim <$> try (many1 (char ':') >> textInTags "math")) + <|> (B.math . trim <$> textInTags "math") + <|> (B.displayMath . trim <$> try (dmStart *> manyTillChar anyChar dmEnd)) + <|> (B.math . trim <$> try (mStart *> manyTillChar (satisfy (/='\n')) mEnd)) where dmStart = string "\\[" dmEnd = try (string "\\]") mStart = string "\\(" mEnd = try (string "\\)") -variable :: PandocMonad m => MWParser m String +variable :: PandocMonad m => MWParser m Text variable = try $ do string "{{{" - contents <- manyTill anyChar (try $ string "}}}") - return $ "{{{" ++ contents ++ "}}}" + contents <- manyTillChar anyChar (try $ string "}}}") + return $ "{{{" <> contents <> "}}}" inlineTag :: PandocMonad m => MWParser m Inlines inlineTag = do @@ -553,11 +556,11 @@ inlineTag = do TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref" TagOpen "nowiki" _ -> try $ do (_,raw) <- htmlTag (~== tag) - if '/' `elem` raw + if T.any (== '/') raw then return mempty else B.text . fromEntities <$> - manyTill anyChar (htmlTag (~== TagClose "nowiki")) - TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text))) + TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen ("br" :: Text) []) -- will get /> too *> optional blankline) TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike" TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" @@ -570,12 +573,12 @@ inlineTag = do result <- encode <$> inlinesInTags "tt" updateState $ \st -> st{ mwInTT = inTT } return result - TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" + TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> textInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) special :: PandocMonad m => MWParser m Inlines -special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *> - oneOf specialChars) +special = B.str <$> countChar 1 (notFollowedBy' (htmlTag isBlockTag') *> + oneOf specialChars) inlineHtml :: PandocMonad m => MWParser m Inlines inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' @@ -594,7 +597,7 @@ endline = () <$ try (newline <* notFollowedBy anyListStart) imageIdentifiers :: PandocMonad m => [MWParser m ()] -imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] +imageIdentifiers = [sym (identifier <> ":") | identifier <- identifiers] where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier", "Bild"] @@ -602,9 +605,9 @@ image :: PandocMonad m => MWParser m Inlines image = try $ do sym "[[" choice imageIdentifiers - fname <- addUnderscores <$> many1 (noneOf "|]") + fname <- addUnderscores <$> many1Char (noneOf "|]") _ <- many imageOption - dims <- try (char '|' *> sepBy (many digit) (char 'x') <* string "px") + dims <- try (char '|' *> sepBy (manyChar digit) (char 'x') <* string "px") <|> return [] _ <- many imageOption let kvs = case dims of @@ -614,9 +617,9 @@ image = try $ do let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption + return $ B.imageWith attr fname ("fig:" <> stringify caption) caption -imageOption :: PandocMonad m => MWParser m String +imageOption :: PandocMonad m => MWParser m Text imageOption = try $ char '|' *> opt where opt = try (oneOfStrings [ "border", "thumbnail", "frameless" @@ -624,30 +627,27 @@ imageOption = try $ char '|' *> opt , "center", "none", "baseline", "sub" , "super", "top", "text-top", "middle" , "bottom", "text-bottom" ]) - <|> try (string "frame") + <|> try (textStr "frame") <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) -collapseUnderscores :: String -> String -collapseUnderscores [] = [] -collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) -collapseUnderscores (x:xs) = x : collapseUnderscores xs - -addUnderscores :: String -> String -addUnderscores = collapseUnderscores . intercalate "_" . words +addUnderscores :: Text -> Text +addUnderscores = T.intercalate "_" . splitTextBy sep + where + sep c = isSpace c || c == '_' internalLink :: PandocMonad m => MWParser m Inlines internalLink = try $ do sym "[[" - pagename <- unwords . words <$> many (noneOf "|]") + pagename <- T.unwords . T.words <$> manyChar (noneOf "|]") label <- option (B.text pagename) $ char '|' *> ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) -- the "pipe trick" -- [[Help:Contents|] -> "Contents" - <|> return (B.text $ drop 1 $ dropWhile (/=':') pagename) ) + <|> return (B.text $ T.drop 1 $ T.dropWhile (/=':') pagename) ) sym "]]" - linktrail <- B.text <$> many letter + linktrail <- B.text <$> manyChar letter let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) - if "Category:" `isPrefixOf` pagename + if "Category:" `T.isPrefixOf` pagename then do updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st } return mempty @@ -662,7 +662,7 @@ externalLink = try $ do <|> do char ']' num <- mwNextLinkNumber <$> getState updateState $ \st -> st{ mwNextLinkNumber = num + 1 } - return $ B.str $ show num + return $ B.str $ tshow num return $ B.link src "" lab url :: PandocMonad m => MWParser m Inlines |