aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANUAL.txt1
-rw-r--r--pandoc.cabal7
-rw-r--r--src/Text/Pandoc/Readers.hs3
-rw-r--r--src/Text/Pandoc/Readers/DokuWiki.hs529
-rw-r--r--test/Tests/Readers/DokuWiki.hs315
-rw-r--r--test/test-pandoc.hs2
6 files changed, 855 insertions, 2 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index fe16b6c67..417ec8d6c 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -238,6 +238,7 @@ General options {.options}
- `creole` ([Creole 1.0])
- `docbook` ([DocBook])
- `docx` ([Word docx])
+ - `dokuwiki` ([DokuWiki markup])
- `epub` ([EPUB])
- `fb2` ([FictionBook2] e-book)
- `gfm` ([GitHub-Flavored Markdown]),
diff --git a/pandoc.cabal b/pandoc.cabal
index 5efd740cd..b99a9a8e0 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -18,8 +18,9 @@ description: Pandoc is a Haskell library for converting from one markup
format to another, and a command-line tool that uses
this library. It can read several dialects of Markdown and
(subsets of) HTML, reStructuredText, LaTeX, DocBook, JATS,
- MediaWiki markup, TWiki markup, TikiWiki markup, Creole 1.0,
- Haddock markup, OPML, Emacs Org-Mode, Emacs Muse, txt2tags,
+ MediaWiki markup, DokuWiki markup, TWiki markup,
+ TikiWiki markup, Creole 1.0, Haddock markup, OPML,
+ Emacs Org-Mode, Emacs Muse, txt2tags,
Vimwiki, Word Docx, ODT, EPUB, FictionBook2, roff man,
and Textile, and it can write Markdown, reStructuredText,
XHTML, HTML 5, LaTeX, ConTeXt, DocBook, JATS, OPML, TEI,
@@ -467,6 +468,7 @@ library
Text.Pandoc.Readers.Muse,
Text.Pandoc.Readers.Man,
Text.Pandoc.Readers.FB2,
+ Text.Pandoc.Readers.DokuWiki,
Text.Pandoc.Writers,
Text.Pandoc.Writers.Native,
Text.Pandoc.Writers.Docbook,
@@ -696,6 +698,7 @@ test-suite test-pandoc
Tests.Readers.Creole
Tests.Readers.Man
Tests.Readers.FB2
+ Tests.Readers.DokuWiki
Tests.Writers.Native
Tests.Writers.ConTeXt
Tests.Writers.Docbook
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 630b8b858..434725887 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -47,6 +47,7 @@ module Text.Pandoc.Readers
, readMarkdown
, readCommonMark
, readCreole
+ , readDokuWiki
, readMediaWiki
, readVimwiki
, readRST
@@ -86,6 +87,7 @@ import Text.Pandoc.Readers.CommonMark
import Text.Pandoc.Readers.Creole
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.Docx
+import Text.Pandoc.Readers.DokuWiki
import Text.Pandoc.Readers.EPUB
import Text.Pandoc.Readers.FB2
import Text.Pandoc.Readers.Haddock
@@ -123,6 +125,7 @@ readers = [ ("native" , TextReader readNative)
,("markdown_mmd", TextReader readMarkdown)
,("commonmark" , TextReader readCommonMark)
,("creole" , TextReader readCreole)
+ ,("dokuwiki" , TextReader readDokuWiki)
,("gfm" , TextReader readCommonMark)
,("rst" , TextReader readRST)
,("mediawiki" , TextReader readMediaWiki)
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
new file mode 100644
index 000000000..b9ef96675
--- /dev/null
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -0,0 +1,529 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections #-}
+
+{-
+ Copyright (C) 2018-2019 Alexander Krotov <ilabdsf@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.DokuWiki
+ Copyright : Copyright (C) 2018 Alexander Krotov
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Krotov <ilabdsf@gmail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of DokuWiki text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.DokuWiki (readDokuWiki) where
+
+import Prelude
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Data.Char (isAlphaNum, isDigit)
+import qualified Data.Foldable as F
+import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf)
+import Data.List.Split (splitOn)
+import Data.Maybe (fromMaybe, catMaybes)
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError (PandocParsecError))
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (enclosed, nested)
+import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
+
+-- | Read DokuWiki from an input string and return a Pandoc document.
+readDokuWiki :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readDokuWiki opts s = do
+ let input = crFilter s
+ res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+ case res of
+ Left e -> throwError $ PandocParsecError (T.unpack input) e
+ Right d -> return d
+
+type DWParser = ParserT Text ParserState
+
+-- * Utility functions
+
+-- | Parse end-of-line, which can be either a newline or end-of-file.
+eol :: Stream s m Char => ParserT s st m ()
+eol = void newline <|> eof
+
+nested :: PandocMonad m => DWParser m a -> DWParser m a
+nested p = do
+ nestlevel <- stateMaxNestingLevel <$> getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
+guardColumnOne :: PandocMonad m => DWParser m ()
+guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
+
+-- | Parse DokuWiki document.
+parseDokuWiki :: PandocMonad m => DWParser m Pandoc
+parseDokuWiki =
+ B.doc . mconcat <$> many block <* spaces <* eof
+
+-- | Parse <code> and <file> attributes
+codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)])
+codeLanguage = try $ do
+ rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>')))
+ let attr = case rawLang of
+ "-" -> []
+ l -> [l]
+ return ("", attr, [])
+
+-- | Generic parser for <code> and <file> tags
+codeTag :: PandocMonad m
+ => ((String, [String], [(String, String)]) -> String -> a)
+ -> String
+ -> DWParser m a
+codeTag f tag = try $ f
+ <$ char '<'
+ <* string tag
+ <*> codeLanguage
+ <* manyTill anyChar (char '>')
+ <* optional (manyTill spaceChar eol)
+ <*> manyTill anyChar (try $ string "</" <* string tag <* char '>')
+
+-- * Inline parsers
+
+-- | Parse any inline element but softbreak.
+inline' :: PandocMonad m => DWParser m B.Inlines
+inline' = whitespace
+ <|> br
+ <|> bold
+ <|> italic
+ <|> underlined
+ <|> nowiki
+ <|> percent
+ <|> link
+ <|> image
+ <|> monospaced
+ <|> subscript
+ <|> superscript
+ <|> deleted
+ <|> footnote
+ <|> inlineCode
+ <|> inlineFile
+ <|> inlineHtml
+ <|> inlinePhp
+ <|> autoLink
+ <|> autoEmail
+ <|> notoc
+ <|> nocache
+ <|> str
+ <|> symbol
+ <?> "inline"
+
+-- | Parse any inline element, including soft break.
+inline :: PandocMonad m => DWParser m B.Inlines
+inline = endline <|> inline'
+
+endline :: PandocMonad m => DWParser m B.Inlines
+endline = try $ B.softbreak <$ skipMany spaceChar <* linebreak
+
+whitespace :: PandocMonad m => DWParser m B.Inlines
+whitespace = try $ B.space <$ skipMany1 spaceChar
+
+br :: PandocMonad m => DWParser m B.Inlines
+br = try $ B.linebreak <$ string "\\\\" <* space
+
+linebreak :: PandocMonad m => DWParser m B.Inlines
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = mempty <$ eof
+ innerNewline = pure B.space
+
+between :: (Monoid c, PandocMonad m, Show b)
+ => DWParser m a -> DWParser m b -> (DWParser m b -> DWParser m c)
+ -> DWParser m c
+between start end p =
+ mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
+
+enclosed :: (Monoid b, PandocMonad m, Show a)
+ => DWParser m a -> (DWParser m a -> DWParser m b) -> DWParser m b
+enclosed sep p = between sep (try sep) p
+
+nestedInlines :: (Show a, PandocMonad m)
+ => DWParser m a -> DWParser m B.Inlines
+nestedInlines end = innerSpace <|> nestedInline
+ where
+ innerSpace = try $ whitespace <* notFollowedBy end
+ nestedInline = notFollowedBy whitespace >> nested inline
+
+bold :: PandocMonad m => DWParser m B.Inlines
+bold = try $ B.strong <$> enclosed (string "**") nestedInlines
+
+italic :: PandocMonad m => DWParser m B.Inlines
+italic = try $ B.emph <$> enclosed (string "//") nestedInlines
+
+underlined :: PandocMonad m => DWParser m B.Inlines
+underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines
+
+nowiki :: PandocMonad m => DWParser m B.Inlines
+nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>")
+
+percent :: PandocMonad m => DWParser m B.Inlines
+percent = try $ B.text <$> enclosed (string "%%") nestedString
+
+nestedString :: (Show a, PandocMonad m)
+ => DWParser m a -> DWParser m String
+nestedString end = innerSpace <|> count 1 nonspaceChar
+ where
+ innerSpace = try $ many1 spaceChar <* notFollowedBy end
+
+monospaced :: PandocMonad m => DWParser m B.Inlines
+monospaced = try $ B.code <$> enclosed (string "''") nestedString
+
+subscript :: PandocMonad m => DWParser m B.Inlines
+subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines
+
+superscript :: PandocMonad m => DWParser m B.Inlines
+superscript = try $ B.superscript <$> between (string "<sup>") (try $ string "</sup>") nestedInlines
+
+deleted :: PandocMonad m => DWParser m B.Inlines
+deleted = try $ B.strikeout <$> between (string "<del>") (try $ string "</del>") nestedInlines
+
+-- | Parse a footnote.
+footnote :: PandocMonad m => DWParser m B.Inlines
+footnote = try $ B.note . B.para <$> between (string "((") (try $ string "))") nestedInlines
+
+inlineCode :: PandocMonad m => DWParser m B.Inlines
+inlineCode = codeTag B.codeWith "code"
+
+inlineFile :: PandocMonad m => DWParser m B.Inlines
+inlineFile = codeTag B.codeWith "file"
+
+inlineHtml :: PandocMonad m => DWParser m B.Inlines
+inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>")
+
+inlinePhp :: PandocMonad m => DWParser m B.Inlines
+inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>")
+
+makeLink :: (String, String) -> B.Inlines
+makeLink (text, url) = B.link url "" $ B.str text
+
+autoEmail :: PandocMonad m => DWParser m B.Inlines
+autoEmail = try $ do
+ state <- getState
+ guard $ stateAllowLinks state
+ makeLink <$ char '<' <*> emailAddress <* char '>'
+
+autoLink :: PandocMonad m => DWParser m B.Inlines
+autoLink = try $ do
+ state <- getState
+ guard $ stateAllowLinks state
+ (text, url) <- uri
+ guard $ checkLink (last url)
+ return $ makeLink (text, url)
+ where
+ checkLink c
+ | c == '/' = True
+ | otherwise = isAlphaNum c
+
+notoc :: PandocMonad m => DWParser m B.Inlines
+notoc = try $ mempty <$ string "~~NOTOC~~"
+
+nocache :: PandocMonad m => DWParser m B.Inlines
+nocache = try $ mempty <$ string "~~NOCACHE~~"
+
+str :: PandocMonad m => DWParser m B.Inlines
+str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
+
+symbol :: PandocMonad m => DWParser m B.Inlines
+symbol = B.str <$> count 1 nonspaceChar
+
+link :: PandocMonad m => DWParser m B.Inlines
+link = try $ do
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ l <- linkText
+ setState $ st{ stateAllowLinks = True }
+ return l
+
+isExternalLink :: String -> Bool
+isExternalLink s =
+ case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of
+ (':':'/':'/':_) -> True
+ _ -> False
+
+isAbsolutePath :: String -> Bool
+isAbsolutePath ('.':_) = False
+isAbsolutePath s = ':' `elem` s
+
+normalizeDots :: String -> String
+normalizeDots path@('.':_) =
+ case dropWhile (== '.') path of
+ ':':_ -> path
+ _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path
+normalizeDots path = path
+
+normalizeInternalPath :: String -> String
+normalizeInternalPath path =
+ if isAbsolutePath path
+ then ensureAbsolute normalizedPath
+ else normalizedPath
+ where
+ normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path
+ ensureAbsolute s@('/':_) = s
+ ensureAbsolute s = '/':s
+
+normalizePath :: String -> String
+normalizePath path =
+ if isExternalLink path
+ then path
+ else normalizeInternalPath path
+
+urlToText :: String -> String
+urlToText url =
+ if isExternalLink url
+ then url
+ else reverse $ takeWhile (/= ':') $ reverse url
+
+-- Parse link or image
+parseLink :: PandocMonad m
+ => (String -> Maybe B.Inlines -> B.Inlines)
+ -> String
+ -> String
+ -> DWParser m B.Inlines
+parseLink f l r = f
+ <$ string l
+ <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r)))
+ <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r)))
+ <* string r
+
+-- | Split Interwiki link into left and right part
+-- | Return Nothing if it is not Interwiki link
+splitInterwiki :: String -> Maybe (String, String)
+splitInterwiki path =
+ case span (\c -> isAlphaNum c || c == '.') path of
+ (l, '>':r) -> Just (l, r)
+ _ -> Nothing
+
+interwikiToUrl :: String -> String -> String
+interwikiToUrl "callto" page = "callto://" ++ page
+interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page
+interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page
+interwikiToUrl "tel" page = "tel:" ++ page
+interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page
+interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page
+interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky"
+
+linkText :: PandocMonad m => DWParser m B.Inlines
+linkText = parseLink fromRaw "[[" "]]"
+ where
+ fromRaw path description =
+ B.link normalizedPath "" (fromMaybe (B.str defaultDescription) description)
+ where
+ path' = trim path
+ interwiki = splitInterwiki path'
+ normalizedPath =
+ case interwiki of
+ Nothing -> normalizePath path'
+ Just (l, r) -> interwikiToUrl l r
+ defaultDescription =
+ case interwiki of
+ Nothing -> urlToText path'
+ Just (_, r) -> r
+
+-- Matches strings like "100x100" (width x height) and "50" (width)
+isWidthHeightParameter :: String -> Bool
+isWidthHeightParameter s =
+ case s of
+ (x:xs) ->
+ isDigit x && case dropWhile isDigit xs of
+ ('x':ys@(_:_)) -> all isDigit ys
+ "" -> True
+ _ -> False
+ _ -> False
+
+parseWidthHeight :: String -> (Maybe String, Maybe String)
+parseWidthHeight s = (width, height)
+ where
+ width = Just $ takeWhile isDigit s
+ height =
+ case dropWhile isDigit s of
+ ('x':xs) -> Just xs
+ _ -> Nothing
+
+image :: PandocMonad m => DWParser m B.Inlines
+image = parseLink fromRaw "{{" "}}"
+ where
+ fromRaw path description =
+ if linkOnly
+ then B.link normalizedPath "" (fromMaybe defaultDescription description)
+ else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description)
+ where
+ (path', parameters) = span (/= '?') $ trim path
+ normalizedPath = normalizePath path'
+ leftPadding = " " `isPrefixOf` path
+ rightPadding = " " `isSuffixOf` path
+ classes =
+ case (leftPadding, rightPadding) of
+ (False, False) -> []
+ (False, True) -> ["align-left"]
+ (True, False) -> ["align-right"]
+ (True, True) -> ["align-center"]
+ parameterList = splitOn "&" $ drop 1 parameters
+ linkOnly = "linkonly" `elem` parameterList
+ (width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList)
+ attributes = catMaybes [fmap ("width",) width, fmap ("height",) height]
+ defaultDescription = B.str $ urlToText path'
+
+-- * Block parsers
+
+block :: PandocMonad m => DWParser m B.Blocks
+block = do
+ res <- mempty <$ skipMany1 blankline
+ <|> blockElements
+ <|> para
+ skipMany blankline
+ trace (take 60 $ show $ B.toList res)
+ return res
+
+blockElements :: PandocMonad m => DWParser m B.Blocks
+blockElements = horizontalLine
+ <|> header
+ <|> list " "
+ <|> indentedCode
+ <|> quote
+ <|> blockCode
+ <|> blockFile
+ <|> blockHtml
+ <|> blockPhp
+ <|> table
+
+horizontalLine :: PandocMonad m => DWParser m B.Blocks
+horizontalLine = try $ B.horizontalRule <$ string "---" <* many1 (char '-') <* eol
+
+header :: PandocMonad m => DWParser m B.Blocks
+header = try $ do
+ guardColumnOne
+ eqs <- many1 (char '=')
+ let lev = length eqs
+ guard $ lev < 7
+ contents <- B.trimInlines . mconcat <$> manyTill inline (try $ char '=' *> many1 (char '='))
+ attr <- registerHeader nullAttr contents
+ return $ B.headerWith attr (7 - lev) contents
+
+list :: PandocMonad m => String -> DWParser m B.Blocks
+list prefix = bulletList prefix <|> orderedList prefix
+
+bulletList :: PandocMonad m => String -> DWParser m B.Blocks
+bulletList prefix = try $ B.bulletList <$> parseList prefix '*'
+
+orderedList :: PandocMonad m => String -> DWParser m B.Blocks
+orderedList prefix = try $ B.orderedList <$> parseList prefix '-'
+
+parseList :: PandocMonad m
+ => String
+ -> Char
+ -> DWParser m [B.Blocks]
+parseList prefix marker =
+ many1 ((<>) <$> item <*> fmap mconcat (many continuation))
+ where
+ continuation = try $ list (" " ++ prefix)
+ item = try $ string prefix *> char marker *> char ' ' *> itemContents
+ itemContents = B.plain . mconcat <$> many1Till inline' eol
+
+indentedCode :: PandocMonad m => DWParser m B.Blocks
+indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine
+ where
+ indentedLine = try $ string " " *> manyTill anyChar eol
+
+quote :: PandocMonad m => DWParser m B.Blocks
+quote = try $ nestedQuote 0
+ where
+ prefix level = count level (char '>')
+ contents level = nestedQuote level <|> quoteLine
+ quoteLine = try $ B.plain . B.trimInlines . mconcat <$> many1Till inline' eol
+ quoteContents level = (<>) <$> contents level <*> quoteContinuation level
+ quoteContinuation level = mconcat <$> many (try $ prefix level *> contents level)
+ nestedQuote level = B.blockQuote <$ char '>' <*> quoteContents (level + 1 :: Int)
+
+blockHtml :: PandocMonad m => DWParser m B.Blocks
+blockHtml = try $ B.rawBlock "html"
+ <$ string "<HTML>"
+ <* optional (manyTill spaceChar eol)
+ <*> manyTill anyChar (try $ string "</HTML>")
+
+blockPhp :: PandocMonad m => DWParser m B.Blocks
+blockPhp = try $ B.codeBlockWith ("", ["php"], [])
+ <$ string "<PHP>"
+ <* optional (manyTill spaceChar eol)
+ <*> manyTill anyChar (try $ string "</PHP>")
+
+table :: PandocMonad m => DWParser m B.Blocks
+table = do
+ firstSeparator <- lookAhead tableCellSeparator
+ rows <- tableRows
+ let (headerRow, body) = if firstSeparator == '^'
+ then (head rows, tail rows)
+ else ([], rows)
+ let attrs = const (AlignDefault, 0.0) <$> transpose rows
+ pure $ B.table mempty attrs headerRow body
+
+tableRows :: PandocMonad m => DWParser m [[B.Blocks]]
+tableRows = many1 tableRow
+
+tableRow :: PandocMonad m => DWParser m [B.Blocks]
+tableRow = many1Till tableCell tableRowEnd
+
+tableRowEnd :: PandocMonad m => DWParser m Char
+tableRowEnd = try $ tableCellSeparator <* manyTill spaceChar eol
+
+tableCellSeparator :: PandocMonad m => DWParser m Char
+tableCellSeparator = char '|' <|> char '^'
+
+tableCell :: PandocMonad m => DWParser m B.Blocks
+tableCell = try $ B.plain . B.trimInlines . mconcat <$> (normalCell <|> headerCell)
+ where
+ normalCell = char '|' *> manyTill inline' (lookAhead tableCellSeparator)
+ headerCell = char '^' *> manyTill inline' (lookAhead tableCellSeparator)
+
+blockCode :: PandocMonad m => DWParser m B.Blocks
+blockCode = codeTag B.codeBlockWith "code"
+
+blockFile :: PandocMonad m => DWParser m B.Blocks
+blockFile = codeTag B.codeBlockWith "file"
+
+para :: PandocMonad m => DWParser m B.Blocks
+para = result . mconcat <$> many1Till inline endOfParaElement
+ where
+ endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ newBlockElement = try $ blankline >> void blockElements
+ result content = if F.all (==Space) content
+ then mempty
+ else B.para $ B.trimInlines content
diff --git a/test/Tests/Readers/DokuWiki.hs b/test/Tests/Readers/DokuWiki.hs
new file mode 100644
index 000000000..02fe1553b
--- /dev/null
+++ b/test/Tests/Readers/DokuWiki.hs
@@ -0,0 +1,315 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+module Tests.Readers.DokuWiki (tests) where
+
+import Prelude
+import Data.Text (Text)
+import qualified Data.Text as T
+import Test.Tasty
+import Tests.Helpers
+import Text.Pandoc
+import Text.Pandoc.Arbitrary ()
+import Text.Pandoc.Builder
+import Text.Pandoc.Shared (underlineSpan)
+
+dokuwiki :: Text -> Pandoc
+dokuwiki = purely $ readDokuWiki def{ readerStandalone = True }
+
+infix 4 =:
+(=:) :: ToString c
+ => String -> (Text, c) -> TestTree
+(=:) = test dokuwiki
+
+tests :: [TestTree]
+tests = [ testGroup "inlines"
+ [ "Bold" =:
+ "**bold**" =?>
+ para (strong "bold")
+ , "Italic" =:
+ "//italic//" =?>
+ para (emph "italic")
+ , "Underlined" =:
+ "__underlined__" =?>
+ para (underlineSpan "underlined")
+ , "Monospaced" =:
+ "''monospaced''" =?>
+ para (code "monospaced")
+ , "Combined" =:
+ "**__//''combine''//__**" =?>
+ para (strong $ underlineSpan $ emph $ code "combine")
+ , "Nowiki" =:
+ T.unlines [ "<nowiki>"
+ , "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it."
+ , "</nowiki>"
+ ] =?>
+ para "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it."
+ , "Percent" =:
+ "The same is true for %%//__this__ text// with a smiley ;-)%%." =?>
+ para "The same is true for //__this__ text// with a smiley ;-)."
+ , "Subscript" =:
+ "<sub>subscript</sub>" =?>
+ para (subscript "subscript")
+ , "Superscript" =:
+ "<sup>superscript</sup>" =?>
+ para (superscript "superscript")
+ , "Deleted" =:
+ "<del>deleted</del>" =?>
+ para (strikeout "deleted")
+ , "Inline code" =:
+ "foo <code java>public static void main</code> bar" =?>
+ para (text "foo " <> codeWith ("", ["java"], []) "public static void main" <> text " bar")
+ , "Inline file" =:
+ "foo <file></code></file> bar" =?>
+ para (text "foo " <> code "</code>" <> text " bar")
+ , "Inline HTML" =:
+ "<html>\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n</html>" =?>
+ para (rawInline "html" "\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n")
+ , "Inline PHP" =:
+ "<php>echo '<p>Hello World</p>';</php>" =?>
+ para (codeWith ("", ["php"], []) "echo '<p>Hello World</p>';")
+ , "Linebreak" =:
+ T.unlines [ "This is some text with some linebreaks\\\\ Note that the"
+ , "two backslashes are only recognized at the end of a line\\\\"
+ , "or followed by\\\\ a whitespace \\\\this happens without it."
+ ] =?>
+ para ("This is some text with some linebreaks" <> linebreak <> "Note that the\n" <>
+ "two backslashes are only recognized at the end of a line" <> linebreak <>
+ "or followed by" <> linebreak <> "a whitespace \\\\this happens without it.")
+ , testGroup "External links"
+ [ "Autolink" =:
+ "http://www.google.com" =?>
+ para (link "http://www.google.com" "" (str "http://www.google.com"))
+ , "Link without description" =:
+ "[[https://example.com]]" =?>
+ para (link "https://example.com" "" (str "https://example.com"))
+ , "Link with description" =:
+ "[[http://www.google.com|This Link points to google]]" =?>
+ para (link "http://www.google.com" "" (text "This Link points to google"))
+ , "Trim whitespace around link and description" =:
+ "[[ http://www.google.com | This Link points to google ]]" =?>
+ para (link "http://www.google.com" "" (text "This Link points to google"))
+ , "Email address" =:
+ "<andi@splitbrain.org>" =?>
+ para (link "mailto:andi@splitbrain.org" "" (str "andi@splitbrain.org"))
+ ]
+ , testGroup "Internal links"
+ [ "Current namespace" =:
+ "[[example]]" =?>
+ para (link "example" "" (str "example"))
+ , "Current namespace starting with dot" =:
+ "[[.example]]" =?>
+ para (link "example" "" (str ".example"))
+ , "Current namespace starting with dot and colon" =:
+ "[[.:example]]" =?>
+ para (link "example" "" (str "example"))
+ , "Root namespace" =:
+ "[[:example]]" =?>
+ para (link "/example" "" (str "example"))
+ , "Parent namespace" =:
+ "[[..example]]" =?>
+ para (link "../example" "" (str "..example"))
+ , "Parent namespace with colon" =:
+ "[[..:example]]" =?>
+ para (link "../example" "" (str "example"))
+ , "Beneath the root namespace" =:
+ "[[wiki:example]]" =?>
+ para (link "/wiki/example" "" (str "example"))
+ , "Explicitly beneath the root namespace" =:
+ "[[:wiki:example]]" =?>
+ para (link "/wiki/example" "" (str "example"))
+ ]
+ , testGroup "Interwiki links"
+ [ "Interwiki without description" =:
+ "[[doku>DokuWiki]]" =?>
+ para (link "https://www.dokuwiki.org/DokuWiki" "" (str "DokuWiki"))
+ , "Interwiki link with description" =:
+ "[[doku>toolbar|quickbuttons]]" =?>
+ para (link "https://www.dokuwiki.org/toolbar" "" (str "quickbuttons"))
+ ]
+ , "Footnote" =:
+ "((This is a footnote))" =?>
+ para (note (para "This is a footnote"))
+ , testGroup "Images"
+ [ "Image" =:
+ "{{image.jpg}}" =?>
+ para (image "image.jpg" "" (str "image.jpg"))
+ , "Image with caption" =:
+ "{{image.png|This is the caption}}" =?>
+ para (image "image.png" "" "This is the caption")
+ , "Image with } in caption" =:
+ "{{image.png|There is an } in the caption}}" =?>
+ para (image "image.png" "" "There is an } in the caption")
+ , "Wiki namespace starting with dot" =:
+ "{{.wiki:image.jpg}}" =?>
+ para (image "wiki/image.jpg" "" (str "image.jpg"))
+ , "Left aligned image" =:
+ "{{wiki:dokuwiki-128.png }}" =?>
+ para (imageWith ("", ["align-left"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+ , "Right aligned image" =:
+ "{{ wiki:dokuwiki-128.png}}" =?>
+ para (imageWith ("", ["align-right"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+ , "Centered image" =:
+ "{{ wiki:dokuwiki-128.png }}" =?>
+ para (imageWith ("", ["align-center"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+ , "Image with width" =:
+ "{{wiki:dokuwiki-128.png?50}}" =?>
+ para (imageWith ("", [], [("width", "50")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+ , "Image with width and height" =:
+ "{{wiki:dokuwiki-128.png?nocache&50x100}}" =?>
+ para (imageWith ("", [], [("width", "50"), ("height", "100")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+ , "Linkonly" =:
+ "{{wiki:dokuwiki-128.png?linkonly}}" =?>
+ para (link "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png"))
+ ]
+ , "Ignore ~~NOTOC~~" =:
+ "Here is a ~~NOTOC~~ macro" =?>
+ para "Here is a macro"
+ , "Ignore ~~NOCACHE~~" =:
+ "Here is a ~~NOCACHE~~ macro" =?>
+ para "Here is a macro"
+ ]
+ , testGroup "Sectioning"
+ [ "Headline level 1" =:
+ "====== Headline Level 1 ======" =?>
+ header 1 "Headline Level 1"
+ , "Headline level 2" =:
+ "===== Headline Level 2 =====" =?>
+ header 2 "Headline Level 2"
+ , "Headline level 3" =:
+ "==== Headline Level 3 ====" =?>
+ header 3 "Headline Level 3"
+ , "Headline level 4" =:
+ "=== Headline Level 4 ===" =?>
+ header 4 "Headline Level 4"
+ , "Headline level 5" =:
+ "== Headline Level 5 ==" =?>
+ header 5 "Headline Level 5"
+ , "Only two closing = are required" =:
+ "====== Headline Level 1 ==" =?>
+ header 1 "Headline Level 1"
+ , "One closing = is not enough" =:
+ "====== Headline Level 1 =" =?>
+ para "====== Headline Level 1 ="
+ , "One closing = is not enough" =:
+ "== Headline with = sign ==" =?>
+ header 5 "Headline with = sign"
+ ]
+ , "Horizontal line" =:
+ "----" =?>
+ horizontalRule
+ , testGroup "Lists"
+ [ "Unordered list" =:
+ T.unlines [ " * This is a list"
+ , " * The second item"
+ , " * You may have different levels"
+ , " * Another item"
+ ] =?>
+ bulletList [ plain "This is a list"
+ , plain "The second item" <>
+ bulletList [ plain "You may have different levels" ]
+ , plain "Another item"
+ ]
+ , "Ordered list" =:
+ T.unlines [ " - The same list but ordered"
+ , " - Another item"
+ , " - Just use indention for deeper levels"
+ , " - That's it"
+ ] =?>
+ orderedList [ plain "The same list but ordered"
+ , plain "Another item" <>
+ orderedList [ plain "Just use indention for deeper levels" ]
+ , plain "That's it"
+ ]
+ , "Multiline list items" =: -- https://www.dokuwiki.org/faq:lists
+ T.unlines [ " - first item"
+ , " - second item with linebreak\\\\ second line"
+ , " - third item with code: <code>"
+ , "some code"
+ , "comes here"
+ , "</code>"
+ , " - fourth item"
+ ] =?>
+ orderedList [ plain "first item"
+ , plain ("second item with linebreak" <> linebreak <> " second line")
+ , plain ("third item with code: " <> code "some code\ncomes here\n")
+ , plain "fourth item"
+ ]
+ ]
+ , "Block HTML" =:
+ T.unlines [ "<HTML>"
+ , "<p style=\"border:2px dashed red;\">And this is some block HTML</p>"
+ , "</HTML>"
+ ] =?>
+ rawBlock "html" "<p style=\"border:2px dashed red;\">And this is some block HTML</p>\n"
+ , "Block PHP" =:
+ T.unlines [ "<PHP>"
+ , "echo '<p>Hello World</p>';"
+ , "</PHP>"
+ ] =?>
+ codeBlockWith ("", ["php"], []) "echo '<p>Hello World</p>';\n"
+ , "Quote" =:
+ T.unlines [ "> foo"
+ , ">no space is required after >"
+ , "> bar"
+ , ">> baz"
+ , "> bat"
+ ] =?>
+ blockQuote (plain "foo" <>
+ plain "no space is required after >" <>
+ plain "bar" <>
+ blockQuote (plain "baz") <>
+ plain "bat")
+ , "Code block" =:
+ T.unlines [ "<code>"
+ , "foo bar baz"
+ , "</code>"
+ ] =?>
+ codeBlock "foo bar baz\n"
+ , "Java code block" =:
+ T.unlines [ "<code java>"
+ , "public static void main"
+ , "</code>"
+ ] =?>
+ codeBlockWith ("", ["java"], []) "public static void main\n"
+ , "File with filename and no language" =:
+ T.unlines [ "<file - foo.bar>"
+ , "file contents"
+ , "</file>"
+ ] =?>
+ codeBlock "file contents\n"
+ , "Table" =:
+ T.unlines [ "| foo | bar |"
+ , "| bat | baz |"
+ ] =?>
+ table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)]
+ []
+ [[plain "foo", plain "bar"]
+ ,[plain "bat", plain "baz"]]
+ , "Table with header" =:
+ T.unlines [ "^ foo ^ bar ^"
+ , "| bat | baz |"
+ ] =?>
+ table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)]
+ [plain "foo", plain "bar"]
+ [[plain "bat", plain "baz"]]
+ , "Table with colspan" =:
+ T.unlines [ "^ 0,0 ^ 0,1 ^ 0,2 ^"
+ , "| 1,0 | 1,1 ||"
+ , "| 2,0 | 2,1 | 2,2 |"
+ ] =?>
+ table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0), (AlignDefault, 0.0)]
+ [plain "0,0", plain "0,1", plain "0,2"]
+ [[plain "1,0", plain "1,1", mempty]
+ ,[plain "2,0", plain "2,1", plain "2,2"]
+ ]
+ , "Indented code block" =:
+ T.unlines [ "foo"
+ , " bar"
+ , " bat"
+ , "baz"
+ ] =?>
+ para "foo" <>
+ codeBlock "bar\n bat\n" <>
+ para "baz"
+ ]
diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs
index 63560936c..946a676e0 100644
--- a/test/test-pandoc.hs
+++ b/test/test-pandoc.hs
@@ -11,6 +11,7 @@ import qualified Tests.Lua
import qualified Tests.Old
import qualified Tests.Readers.Creole
import qualified Tests.Readers.Docx
+import qualified Tests.Readers.DokuWiki
import qualified Tests.Readers.EPUB
import qualified Tests.Readers.FB2
import qualified Tests.Readers.HTML
@@ -80,6 +81,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
, testGroup "Creole" Tests.Readers.Creole.tests
, testGroup "Man" Tests.Readers.Man.tests
, testGroup "FB2" Tests.Readers.FB2.tests
+ , testGroup "DokuWiki" Tests.Readers.DokuWiki.tests
]
, testGroup "Lua filters" Tests.Lua.tests
]