aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/DokuWiki.hs
diff options
context:
space:
mode:
authordespresc <christian.j.j.despres@gmail.com>2019-11-04 16:12:37 -0500
committerJohn MacFarlane <jgm@berkeley.edu>2019-11-12 16:03:45 -0800
commit90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch)
tree4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers/DokuWiki.hs
parentd3966372f5049eea56213b069fc4d70d8af9144c (diff)
downloadpandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884. + Use pandoc-types 1.20 and texmath 0.12. + Text is now used instead of String, with a few exceptions. + In the MediaBag module, some of the types using Strings were switched to use FilePath instead (not Text). + In the Parsing module, new parsers `manyChar`, `many1Char`, `manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`, `mantyUntilChar` have been added: these are like their unsuffixed counterparts but pack some or all of their output. + `glob` in Text.Pandoc.Class still takes String since it seems to be intended as an interface to Glob, which uses strings. It seems to be used only once in the package, in the EPUB writer, so that is not hard to change.
Diffstat (limited to 'src/Text/Pandoc/Readers/DokuWiki.hs')
-rw-r--r--src/Text/Pandoc/Readers/DokuWiki.hs187
1 files changed, 94 insertions, 93 deletions
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index 60d406df1..3a92cfa19 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.DokuWiki
Copyright : Copyright (C) 2018-2019 Alexander Krotov
@@ -20,8 +21,7 @@ import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isDigit)
import qualified Data.Foldable as F
-import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf)
-import Data.List.Split (splitOn)
+import Data.List (transpose)
import Data.Maybe (fromMaybe, catMaybes)
import Data.Text (Text)
import qualified Data.Text as T
@@ -31,7 +31,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
+import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow)
-- | Read DokuWiki from an input string and return a Pandoc document.
readDokuWiki :: PandocMonad m
@@ -42,7 +42,7 @@ readDokuWiki opts s = do
let input = crFilter s
res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
case res of
- Left e -> throwError $ PandocParsecError (T.unpack input) e
+ Left e -> throwError $ PandocParsecError input e
Right d -> return d
type DWParser = ParserT Text ParserState
@@ -71,9 +71,9 @@ parseDokuWiki =
B.doc . mconcat <$> many block <* spaces <* eof
-- | Parse <code> and <file> attributes
-codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)])
+codeLanguage :: PandocMonad m => DWParser m (Text, [Text], [(Text, Text)])
codeLanguage = try $ do
- rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>')))
+ rawLang <- option "-" (spaceChar *> manyTillChar anyChar (lookAhead (spaceChar <|> char '>')))
let attr = case rawLang of
"-" -> []
l -> [l]
@@ -81,16 +81,16 @@ codeLanguage = try $ do
-- | Generic parser for <code> and <file> tags
codeTag :: PandocMonad m
- => ((String, [String], [(String, String)]) -> String -> a)
- -> String
+ => ((Text, [Text], [(Text, Text)]) -> Text -> a)
+ -> Text
-> DWParser m a
codeTag f tag = try $ f
<$ char '<'
- <* string tag
+ <* textStr tag
<*> codeLanguage
<* manyTill anyChar (char '>')
<* optional (manyTill spaceChar eol)
- <*> manyTill anyChar (try $ string "</" <* string tag <* char '>')
+ <*> manyTillChar anyChar (try $ string "</" <* textStr tag <* char '>')
-- * Inline parsers
@@ -167,19 +167,19 @@ underlined :: PandocMonad m => DWParser m B.Inlines
underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines
nowiki :: PandocMonad m => DWParser m B.Inlines
-nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>")
+nowiki = try $ B.text <$ string "<nowiki>" <*> manyTillChar anyChar (try $ string "</nowiki>")
percent :: PandocMonad m => DWParser m B.Inlines
-percent = try $ B.text <$> enclosed (string "%%") nestedString
+percent = try $ B.text <$> enclosed (string "%%") nestedText
-nestedString :: (Show a, PandocMonad m)
- => DWParser m a -> DWParser m String
-nestedString end = innerSpace <|> count 1 nonspaceChar
+nestedText :: (Show a, PandocMonad m)
+ => DWParser m a -> DWParser m Text
+nestedText end = innerSpace <|> countChar 1 nonspaceChar
where
- innerSpace = try $ many1 spaceChar <* notFollowedBy end
+ innerSpace = try $ many1Char spaceChar <* notFollowedBy end
monospaced :: PandocMonad m => DWParser m B.Inlines
-monospaced = try $ B.code <$> enclosed (string "''") nestedString
+monospaced = try $ B.code <$> enclosed (string "''") nestedText
subscript :: PandocMonad m => DWParser m B.Inlines
subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines
@@ -201,12 +201,12 @@ inlineFile :: PandocMonad m => DWParser m B.Inlines
inlineFile = codeTag B.codeWith "file"
inlineHtml :: PandocMonad m => DWParser m B.Inlines
-inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>")
+inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTillChar anyChar (try $ string "</html>")
inlinePhp :: PandocMonad m => DWParser m B.Inlines
-inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>")
+inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTillChar anyChar (try $ string "</php>")
-makeLink :: (String, String) -> B.Inlines
+makeLink :: (Text, Text) -> B.Inlines
makeLink (text, url) = B.link url "" $ B.str text
autoEmail :: PandocMonad m => DWParser m B.Inlines
@@ -220,7 +220,7 @@ autoLink = try $ do
state <- getState
guard $ stateAllowLinks state
(text, url) <- uri
- guard $ checkLink (last url)
+ guard $ checkLink (T.last url)
return $ makeLink (text, url)
where
checkLink c
@@ -234,10 +234,10 @@ nocache :: PandocMonad m => DWParser m B.Inlines
nocache = try $ mempty <$ string "~~NOCACHE~~"
str :: PandocMonad m => DWParser m B.Inlines
-str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
+str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference)
symbol :: PandocMonad m => DWParser m B.Inlines
-symbol = B.str <$> count 1 nonspaceChar
+symbol = B.str <$> countChar 1 nonspaceChar
link :: PandocMonad m => DWParser m B.Inlines
link = try $ do
@@ -248,77 +248,78 @@ link = try $ do
setState $ st{ stateAllowLinks = True }
return l
-isExternalLink :: String -> Bool
-isExternalLink s =
- case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of
- (':':'/':'/':_) -> True
- _ -> False
-
-isAbsolutePath :: String -> Bool
-isAbsolutePath ('.':_) = False
-isAbsolutePath s = ':' `elem` s
-
-normalizeDots :: String -> String
-normalizeDots path@('.':_) =
- case dropWhile (== '.') path of
- ':':_ -> path
- _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path
-normalizeDots path = path
+isExternalLink :: Text -> Bool
+isExternalLink s = "://" `T.isPrefixOf` sSuff
+ where
+ sSuff = T.dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s
+
+isAbsolutePath :: Text -> Bool
+isAbsolutePath (T.uncons -> Just ('.', _)) = False
+isAbsolutePath s = T.any (== ':') s
+
+normalizeDots :: Text -> Text
+normalizeDots path
+ | not (T.null pref) = case T.uncons suff of
+ Just (':', _) -> path
+ _ -> pref <> ":" <> suff
+ | otherwise = path
+ where
+ (pref, suff) = T.span (== '.') path
-normalizeInternalPath :: String -> String
+normalizeInternalPath :: Text -> Text
normalizeInternalPath path =
if isAbsolutePath path
then ensureAbsolute normalizedPath
else normalizedPath
where
- normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path
- ensureAbsolute s@('/':_) = s
- ensureAbsolute s = '/':s
+ normalizedPath = T.intercalate "/" $ dropWhile (== ".") $ T.splitOn ":" $ normalizeDots path
+ ensureAbsolute s@(T.uncons -> Just ('/', _)) = s
+ ensureAbsolute s = "/" <> s
-normalizePath :: String -> String
+normalizePath :: Text -> Text
normalizePath path =
if isExternalLink path
then path
else normalizeInternalPath path
-urlToText :: String -> String
+urlToText :: Text -> Text
urlToText url =
if isExternalLink url
then url
- else reverse $ takeWhile (/= ':') $ reverse url
+ else T.takeWhileEnd (/= ':') url
-- Parse link or image
parseLink :: PandocMonad m
- => (String -> Maybe B.Inlines -> B.Inlines)
- -> String
- -> String
+ => (Text -> Maybe B.Inlines -> B.Inlines)
+ -> Text
+ -> Text
-> DWParser m B.Inlines
parseLink f l r = f
- <$ string l
- <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r)))
- <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r)))
- <* string r
+ <$ textStr l
+ <*> many1TillChar anyChar (lookAhead (void (char '|') <|> try (void $ textStr r)))
+ <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ textStr r)))
+ <* textStr r
-- | Split Interwiki link into left and right part
-- | Return Nothing if it is not Interwiki link
-splitInterwiki :: String -> Maybe (String, String)
+splitInterwiki :: Text -> Maybe (Text, Text)
splitInterwiki path =
- case span (\c -> isAlphaNum c || c == '.') path of
- (l, '>':r) -> Just (l, r)
+ case T.span (\c -> isAlphaNum c || c == '.') path of
+ (l, T.uncons -> Just ('>', r)) -> Just (l, r)
_ -> Nothing
-interwikiToUrl :: String -> String -> String
-interwikiToUrl "callto" page = "callto://" ++ page
-interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page
-interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page
-interwikiToUrl "tel" page = "tel:" ++ page
-interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page
-interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky"
+interwikiToUrl :: Text -> Text -> Text
+interwikiToUrl "callto" page = "callto://" <> page
+interwikiToUrl "doku" page = "https://www.dokuwiki.org/" <> page
+interwikiToUrl "phpfn" page = "https://secure.php.net/" <> page
+interwikiToUrl "tel" page = "tel:" <> page
+interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page
+interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page
+interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky"
linkText :: PandocMonad m => DWParser m B.Inlines
linkText = parseLink fromRaw "[[" "]]"
@@ -338,23 +339,23 @@ linkText = parseLink fromRaw "[[" "]]"
Just (_, r) -> r
-- Matches strings like "100x100" (width x height) and "50" (width)
-isWidthHeightParameter :: String -> Bool
+isWidthHeightParameter :: Text -> Bool
isWidthHeightParameter s =
- case s of
- (x:xs) ->
- isDigit x && case dropWhile isDigit xs of
- ('x':ys@(_:_)) -> all isDigit ys
- "" -> True
+ case T.uncons s of
+ Just (x, xs) ->
+ isDigit x && case T.uncons $ T.dropWhile isDigit xs of
+ Just ('x', ys) | not (T.null ys) -> T.all isDigit ys
+ Nothing -> True
_ -> False
_ -> False
-parseWidthHeight :: String -> (Maybe String, Maybe String)
+parseWidthHeight :: Text -> (Maybe Text, Maybe Text)
parseWidthHeight s = (width, height)
where
- width = Just $ takeWhile isDigit s
+ width = Just $ T.takeWhile isDigit s
height =
- case dropWhile isDigit s of
- ('x':xs) -> Just xs
+ case T.uncons $ T.dropWhile isDigit s of
+ Just ('x', xs) -> Just xs
_ -> Nothing
image :: PandocMonad m => DWParser m B.Inlines
@@ -365,17 +366,17 @@ image = try $ parseLink fromRaw "{{" "}}"
then B.link normalizedPath "" (fromMaybe defaultDescription description)
else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description)
where
- (path', parameters) = span (/= '?') $ trim path
+ (path', parameters) = T.span (/= '?') $ trim path
normalizedPath = normalizePath path'
- leftPadding = " " `isPrefixOf` path
- rightPadding = " " `isSuffixOf` path
+ leftPadding = " " `T.isPrefixOf` path
+ rightPadding = " " `T.isSuffixOf` path
classes =
case (leftPadding, rightPadding) of
(False, False) -> []
(False, True) -> ["align-left"]
(True, False) -> ["align-right"]
(True, True) -> ["align-center"]
- parameterList = splitOn "&" $ drop 1 parameters
+ parameterList = T.splitOn "&" $ T.drop 1 parameters
linkOnly = "linkonly" `elem` parameterList
(width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList)
attributes = catMaybes [fmap ("width",) width, fmap ("height",) height]
@@ -389,7 +390,7 @@ block = do
<|> blockElements
<|> para
skipMany blankline
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
blockElements :: PandocMonad m => DWParser m B.Blocks
@@ -417,30 +418,30 @@ header = try $ do
attr <- registerHeader nullAttr contents
return $ B.headerWith attr (7 - lev) contents
-list :: PandocMonad m => String -> DWParser m B.Blocks
+list :: PandocMonad m => Text -> DWParser m B.Blocks
list prefix = bulletList prefix <|> orderedList prefix
-bulletList :: PandocMonad m => String -> DWParser m B.Blocks
+bulletList :: PandocMonad m => Text -> DWParser m B.Blocks
bulletList prefix = try $ B.bulletList <$> parseList prefix '*'
-orderedList :: PandocMonad m => String -> DWParser m B.Blocks
+orderedList :: PandocMonad m => Text -> DWParser m B.Blocks
orderedList prefix = try $ B.orderedList <$> parseList prefix '-'
parseList :: PandocMonad m
- => String
+ => Text
-> Char
-> DWParser m [B.Blocks]
parseList prefix marker =
many1 ((<>) <$> item <*> fmap mconcat (many continuation))
where
- continuation = try $ list (" " ++ prefix)
- item = try $ string prefix *> char marker *> char ' ' *> itemContents
+ continuation = try $ list (" " <> prefix)
+ item = try $ textStr prefix *> char marker *> char ' ' *> itemContents
itemContents = B.plain . mconcat <$> many1Till inline' eol
indentedCode :: PandocMonad m => DWParser m B.Blocks
-indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine
+indentedCode = try $ B.codeBlock . T.unlines <$> many1 indentedLine
where
- indentedLine = try $ string " " *> manyTill anyChar eol
+ indentedLine = try $ string " " *> manyTillChar anyChar eol
quote :: PandocMonad m => DWParser m B.Blocks
quote = try $ nestedQuote 0
@@ -456,13 +457,13 @@ blockHtml :: PandocMonad m => DWParser m B.Blocks
blockHtml = try $ B.rawBlock "html"
<$ string "<HTML>"
<* optional (manyTill spaceChar eol)
- <*> manyTill anyChar (try $ string "</HTML>")
+ <*> manyTillChar anyChar (try $ string "</HTML>")
blockPhp :: PandocMonad m => DWParser m B.Blocks
blockPhp = try $ B.codeBlockWith ("", ["php"], [])
<$ string "<PHP>"
<* optional (manyTill spaceChar eol)
- <*> manyTill anyChar (try $ string "</PHP>")
+ <*> manyTillChar anyChar (try $ string "</PHP>")
table :: PandocMonad m => DWParser m B.Blocks
table = do