diff options
-rw-r--r-- | MANUAL.txt | 1 | ||||
-rw-r--r-- | pandoc.cabal | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/DokuWiki.hs | 529 | ||||
-rw-r--r-- | test/Tests/Readers/DokuWiki.hs | 315 | ||||
-rw-r--r-- | test/test-pandoc.hs | 2 |
6 files changed, 855 insertions, 2 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index fe16b6c67..417ec8d6c 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -238,6 +238,7 @@ General options {.options} - `creole` ([Creole 1.0]) - `docbook` ([DocBook]) - `docx` ([Word docx]) + - `dokuwiki` ([DokuWiki markup]) - `epub` ([EPUB]) - `fb2` ([FictionBook2] e-book) - `gfm` ([GitHub-Flavored Markdown]), diff --git a/pandoc.cabal b/pandoc.cabal index 5efd740cd..b99a9a8e0 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -18,8 +18,9 @@ description: Pandoc is a Haskell library for converting from one markup format to another, and a command-line tool that uses this library. It can read several dialects of Markdown and (subsets of) HTML, reStructuredText, LaTeX, DocBook, JATS, - MediaWiki markup, TWiki markup, TikiWiki markup, Creole 1.0, - Haddock markup, OPML, Emacs Org-Mode, Emacs Muse, txt2tags, + MediaWiki markup, DokuWiki markup, TWiki markup, + TikiWiki markup, Creole 1.0, Haddock markup, OPML, + Emacs Org-Mode, Emacs Muse, txt2tags, Vimwiki, Word Docx, ODT, EPUB, FictionBook2, roff man, and Textile, and it can write Markdown, reStructuredText, XHTML, HTML 5, LaTeX, ConTeXt, DocBook, JATS, OPML, TEI, @@ -467,6 +468,7 @@ library Text.Pandoc.Readers.Muse, Text.Pandoc.Readers.Man, Text.Pandoc.Readers.FB2, + Text.Pandoc.Readers.DokuWiki, Text.Pandoc.Writers, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, @@ -696,6 +698,7 @@ test-suite test-pandoc Tests.Readers.Creole Tests.Readers.Man Tests.Readers.FB2 + Tests.Readers.DokuWiki Tests.Writers.Native Tests.Writers.ConTeXt Tests.Writers.Docbook diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 630b8b858..434725887 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -47,6 +47,7 @@ module Text.Pandoc.Readers , readMarkdown , readCommonMark , readCreole + , readDokuWiki , readMediaWiki , readVimwiki , readRST @@ -86,6 +87,7 @@ import Text.Pandoc.Readers.CommonMark import Text.Pandoc.Readers.Creole import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.Docx +import Text.Pandoc.Readers.DokuWiki import Text.Pandoc.Readers.EPUB import Text.Pandoc.Readers.FB2 import Text.Pandoc.Readers.Haddock @@ -123,6 +125,7 @@ readers = [ ("native" , TextReader readNative) ,("markdown_mmd", TextReader readMarkdown) ,("commonmark" , TextReader readCommonMark) ,("creole" , TextReader readCreole) + ,("dokuwiki" , TextReader readDokuWiki) ,("gfm" , TextReader readCommonMark) ,("rst" , TextReader readRST) ,("mediawiki" , TextReader readMediaWiki) diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs new file mode 100644 index 000000000..b9ef96675 --- /dev/null +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -0,0 +1,529 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} + +{- + Copyright (C) 2018-2019 Alexander Krotov <ilabdsf@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.DokuWiki + Copyright : Copyright (C) 2018 Alexander Krotov + License : GNU GPL, version 2 or above + + Maintainer : Alexander Krotov <ilabdsf@gmail.com> + Stability : alpha + Portability : portable + +Conversion of DokuWiki text to 'Pandoc' document. +-} +module Text.Pandoc.Readers.DokuWiki (readDokuWiki) where + +import Prelude +import Control.Monad +import Control.Monad.Except (throwError) +import Data.Char (isAlphaNum, isDigit) +import qualified Data.Foldable as F +import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf) +import Data.List.Split (splitOn) +import Data.Maybe (fromMaybe, catMaybes) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError (PandocParsecError)) +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (enclosed, nested) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan) + +-- | Read DokuWiki from an input string and return a Pandoc document. +readDokuWiki :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readDokuWiki opts s = do + let input = crFilter s + res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input + case res of + Left e -> throwError $ PandocParsecError (T.unpack input) e + Right d -> return d + +type DWParser = ParserT Text ParserState + +-- * Utility functions + +-- | Parse end-of-line, which can be either a newline or end-of-file. +eol :: Stream s m Char => ParserT s st m () +eol = void newline <|> eof + +nested :: PandocMonad m => DWParser m a -> DWParser m a +nested p = do + nestlevel <- stateMaxNestingLevel <$> getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + +guardColumnOne :: PandocMonad m => DWParser m () +guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) + +-- | Parse DokuWiki document. +parseDokuWiki :: PandocMonad m => DWParser m Pandoc +parseDokuWiki = + B.doc . mconcat <$> many block <* spaces <* eof + +-- | Parse <code> and <file> attributes +codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)]) +codeLanguage = try $ do + rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>'))) + let attr = case rawLang of + "-" -> [] + l -> [l] + return ("", attr, []) + +-- | Generic parser for <code> and <file> tags +codeTag :: PandocMonad m + => ((String, [String], [(String, String)]) -> String -> a) + -> String + -> DWParser m a +codeTag f tag = try $ f + <$ char '<' + <* string tag + <*> codeLanguage + <* manyTill anyChar (char '>') + <* optional (manyTill spaceChar eol) + <*> manyTill anyChar (try $ string "</" <* string tag <* char '>') + +-- * Inline parsers + +-- | Parse any inline element but softbreak. +inline' :: PandocMonad m => DWParser m B.Inlines +inline' = whitespace + <|> br + <|> bold + <|> italic + <|> underlined + <|> nowiki + <|> percent + <|> link + <|> image + <|> monospaced + <|> subscript + <|> superscript + <|> deleted + <|> footnote + <|> inlineCode + <|> inlineFile + <|> inlineHtml + <|> inlinePhp + <|> autoLink + <|> autoEmail + <|> notoc + <|> nocache + <|> str + <|> symbol + <?> "inline" + +-- | Parse any inline element, including soft break. +inline :: PandocMonad m => DWParser m B.Inlines +inline = endline <|> inline' + +endline :: PandocMonad m => DWParser m B.Inlines +endline = try $ B.softbreak <$ skipMany spaceChar <* linebreak + +whitespace :: PandocMonad m => DWParser m B.Inlines +whitespace = try $ B.space <$ skipMany1 spaceChar + +br :: PandocMonad m => DWParser m B.Inlines +br = try $ B.linebreak <$ string "\\\\" <* space + +linebreak :: PandocMonad m => DWParser m B.Inlines +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = mempty <$ eof + innerNewline = pure B.space + +between :: (Monoid c, PandocMonad m, Show b) + => DWParser m a -> DWParser m b -> (DWParser m b -> DWParser m c) + -> DWParser m c +between start end p = + mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) + +enclosed :: (Monoid b, PandocMonad m, Show a) + => DWParser m a -> (DWParser m a -> DWParser m b) -> DWParser m b +enclosed sep p = between sep (try sep) p + +nestedInlines :: (Show a, PandocMonad m) + => DWParser m a -> DWParser m B.Inlines +nestedInlines end = innerSpace <|> nestedInline + where + innerSpace = try $ whitespace <* notFollowedBy end + nestedInline = notFollowedBy whitespace >> nested inline + +bold :: PandocMonad m => DWParser m B.Inlines +bold = try $ B.strong <$> enclosed (string "**") nestedInlines + +italic :: PandocMonad m => DWParser m B.Inlines +italic = try $ B.emph <$> enclosed (string "//") nestedInlines + +underlined :: PandocMonad m => DWParser m B.Inlines +underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines + +nowiki :: PandocMonad m => DWParser m B.Inlines +nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>") + +percent :: PandocMonad m => DWParser m B.Inlines +percent = try $ B.text <$> enclosed (string "%%") nestedString + +nestedString :: (Show a, PandocMonad m) + => DWParser m a -> DWParser m String +nestedString end = innerSpace <|> count 1 nonspaceChar + where + innerSpace = try $ many1 spaceChar <* notFollowedBy end + +monospaced :: PandocMonad m => DWParser m B.Inlines +monospaced = try $ B.code <$> enclosed (string "''") nestedString + +subscript :: PandocMonad m => DWParser m B.Inlines +subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines + +superscript :: PandocMonad m => DWParser m B.Inlines +superscript = try $ B.superscript <$> between (string "<sup>") (try $ string "</sup>") nestedInlines + +deleted :: PandocMonad m => DWParser m B.Inlines +deleted = try $ B.strikeout <$> between (string "<del>") (try $ string "</del>") nestedInlines + +-- | Parse a footnote. +footnote :: PandocMonad m => DWParser m B.Inlines +footnote = try $ B.note . B.para <$> between (string "((") (try $ string "))") nestedInlines + +inlineCode :: PandocMonad m => DWParser m B.Inlines +inlineCode = codeTag B.codeWith "code" + +inlineFile :: PandocMonad m => DWParser m B.Inlines +inlineFile = codeTag B.codeWith "file" + +inlineHtml :: PandocMonad m => DWParser m B.Inlines +inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>") + +inlinePhp :: PandocMonad m => DWParser m B.Inlines +inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>") + +makeLink :: (String, String) -> B.Inlines +makeLink (text, url) = B.link url "" $ B.str text + +autoEmail :: PandocMonad m => DWParser m B.Inlines +autoEmail = try $ do + state <- getState + guard $ stateAllowLinks state + makeLink <$ char '<' <*> emailAddress <* char '>' + +autoLink :: PandocMonad m => DWParser m B.Inlines +autoLink = try $ do + state <- getState + guard $ stateAllowLinks state + (text, url) <- uri + guard $ checkLink (last url) + return $ makeLink (text, url) + where + checkLink c + | c == '/' = True + | otherwise = isAlphaNum c + +notoc :: PandocMonad m => DWParser m B.Inlines +notoc = try $ mempty <$ string "~~NOTOC~~" + +nocache :: PandocMonad m => DWParser m B.Inlines +nocache = try $ mempty <$ string "~~NOCACHE~~" + +str :: PandocMonad m => DWParser m B.Inlines +str = B.str <$> (many1 alphaNum <|> count 1 characterReference) + +symbol :: PandocMonad m => DWParser m B.Inlines +symbol = B.str <$> count 1 nonspaceChar + +link :: PandocMonad m => DWParser m B.Inlines +link = try $ do + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + l <- linkText + setState $ st{ stateAllowLinks = True } + return l + +isExternalLink :: String -> Bool +isExternalLink s = + case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of + (':':'/':'/':_) -> True + _ -> False + +isAbsolutePath :: String -> Bool +isAbsolutePath ('.':_) = False +isAbsolutePath s = ':' `elem` s + +normalizeDots :: String -> String +normalizeDots path@('.':_) = + case dropWhile (== '.') path of + ':':_ -> path + _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path +normalizeDots path = path + +normalizeInternalPath :: String -> String +normalizeInternalPath path = + if isAbsolutePath path + then ensureAbsolute normalizedPath + else normalizedPath + where + normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path + ensureAbsolute s@('/':_) = s + ensureAbsolute s = '/':s + +normalizePath :: String -> String +normalizePath path = + if isExternalLink path + then path + else normalizeInternalPath path + +urlToText :: String -> String +urlToText url = + if isExternalLink url + then url + else reverse $ takeWhile (/= ':') $ reverse url + +-- Parse link or image +parseLink :: PandocMonad m + => (String -> Maybe B.Inlines -> B.Inlines) + -> String + -> String + -> DWParser m B.Inlines +parseLink f l r = f + <$ string l + <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r))) + <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r))) + <* string r + +-- | Split Interwiki link into left and right part +-- | Return Nothing if it is not Interwiki link +splitInterwiki :: String -> Maybe (String, String) +splitInterwiki path = + case span (\c -> isAlphaNum c || c == '.') path of + (l, '>':r) -> Just (l, r) + _ -> Nothing + +interwikiToUrl :: String -> String -> String +interwikiToUrl "callto" page = "callto://" ++ page +interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page +interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page +interwikiToUrl "tel" page = "tel:" ++ page +interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page +interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page +interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page +interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page +interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page +interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page +interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky" + +linkText :: PandocMonad m => DWParser m B.Inlines +linkText = parseLink fromRaw "[[" "]]" + where + fromRaw path description = + B.link normalizedPath "" (fromMaybe (B.str defaultDescription) description) + where + path' = trim path + interwiki = splitInterwiki path' + normalizedPath = + case interwiki of + Nothing -> normalizePath path' + Just (l, r) -> interwikiToUrl l r + defaultDescription = + case interwiki of + Nothing -> urlToText path' + Just (_, r) -> r + +-- Matches strings like "100x100" (width x height) and "50" (width) +isWidthHeightParameter :: String -> Bool +isWidthHeightParameter s = + case s of + (x:xs) -> + isDigit x && case dropWhile isDigit xs of + ('x':ys@(_:_)) -> all isDigit ys + "" -> True + _ -> False + _ -> False + +parseWidthHeight :: String -> (Maybe String, Maybe String) +parseWidthHeight s = (width, height) + where + width = Just $ takeWhile isDigit s + height = + case dropWhile isDigit s of + ('x':xs) -> Just xs + _ -> Nothing + +image :: PandocMonad m => DWParser m B.Inlines +image = parseLink fromRaw "{{" "}}" + where + fromRaw path description = + if linkOnly + then B.link normalizedPath "" (fromMaybe defaultDescription description) + else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description) + where + (path', parameters) = span (/= '?') $ trim path + normalizedPath = normalizePath path' + leftPadding = " " `isPrefixOf` path + rightPadding = " " `isSuffixOf` path + classes = + case (leftPadding, rightPadding) of + (False, False) -> [] + (False, True) -> ["align-left"] + (True, False) -> ["align-right"] + (True, True) -> ["align-center"] + parameterList = splitOn "&" $ drop 1 parameters + linkOnly = "linkonly" `elem` parameterList + (width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList) + attributes = catMaybes [fmap ("width",) width, fmap ("height",) height] + defaultDescription = B.str $ urlToText path' + +-- * Block parsers + +block :: PandocMonad m => DWParser m B.Blocks +block = do + res <- mempty <$ skipMany1 blankline + <|> blockElements + <|> para + skipMany blankline + trace (take 60 $ show $ B.toList res) + return res + +blockElements :: PandocMonad m => DWParser m B.Blocks +blockElements = horizontalLine + <|> header + <|> list " " + <|> indentedCode + <|> quote + <|> blockCode + <|> blockFile + <|> blockHtml + <|> blockPhp + <|> table + +horizontalLine :: PandocMonad m => DWParser m B.Blocks +horizontalLine = try $ B.horizontalRule <$ string "---" <* many1 (char '-') <* eol + +header :: PandocMonad m => DWParser m B.Blocks +header = try $ do + guardColumnOne + eqs <- many1 (char '=') + let lev = length eqs + guard $ lev < 7 + contents <- B.trimInlines . mconcat <$> manyTill inline (try $ char '=' *> many1 (char '=')) + attr <- registerHeader nullAttr contents + return $ B.headerWith attr (7 - lev) contents + +list :: PandocMonad m => String -> DWParser m B.Blocks +list prefix = bulletList prefix <|> orderedList prefix + +bulletList :: PandocMonad m => String -> DWParser m B.Blocks +bulletList prefix = try $ B.bulletList <$> parseList prefix '*' + +orderedList :: PandocMonad m => String -> DWParser m B.Blocks +orderedList prefix = try $ B.orderedList <$> parseList prefix '-' + +parseList :: PandocMonad m + => String + -> Char + -> DWParser m [B.Blocks] +parseList prefix marker = + many1 ((<>) <$> item <*> fmap mconcat (many continuation)) + where + continuation = try $ list (" " ++ prefix) + item = try $ string prefix *> char marker *> char ' ' *> itemContents + itemContents = B.plain . mconcat <$> many1Till inline' eol + +indentedCode :: PandocMonad m => DWParser m B.Blocks +indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine + where + indentedLine = try $ string " " *> manyTill anyChar eol + +quote :: PandocMonad m => DWParser m B.Blocks +quote = try $ nestedQuote 0 + where + prefix level = count level (char '>') + contents level = nestedQuote level <|> quoteLine + quoteLine = try $ B.plain . B.trimInlines . mconcat <$> many1Till inline' eol + quoteContents level = (<>) <$> contents level <*> quoteContinuation level + quoteContinuation level = mconcat <$> many (try $ prefix level *> contents level) + nestedQuote level = B.blockQuote <$ char '>' <*> quoteContents (level + 1 :: Int) + +blockHtml :: PandocMonad m => DWParser m B.Blocks +blockHtml = try $ B.rawBlock "html" + <$ string "<HTML>" + <* optional (manyTill spaceChar eol) + <*> manyTill anyChar (try $ string "</HTML>") + +blockPhp :: PandocMonad m => DWParser m B.Blocks +blockPhp = try $ B.codeBlockWith ("", ["php"], []) + <$ string "<PHP>" + <* optional (manyTill spaceChar eol) + <*> manyTill anyChar (try $ string "</PHP>") + +table :: PandocMonad m => DWParser m B.Blocks +table = do + firstSeparator <- lookAhead tableCellSeparator + rows <- tableRows + let (headerRow, body) = if firstSeparator == '^' + then (head rows, tail rows) + else ([], rows) + let attrs = const (AlignDefault, 0.0) <$> transpose rows + pure $ B.table mempty attrs headerRow body + +tableRows :: PandocMonad m => DWParser m [[B.Blocks]] +tableRows = many1 tableRow + +tableRow :: PandocMonad m => DWParser m [B.Blocks] +tableRow = many1Till tableCell tableRowEnd + +tableRowEnd :: PandocMonad m => DWParser m Char +tableRowEnd = try $ tableCellSeparator <* manyTill spaceChar eol + +tableCellSeparator :: PandocMonad m => DWParser m Char +tableCellSeparator = char '|' <|> char '^' + +tableCell :: PandocMonad m => DWParser m B.Blocks +tableCell = try $ B.plain . B.trimInlines . mconcat <$> (normalCell <|> headerCell) + where + normalCell = char '|' *> manyTill inline' (lookAhead tableCellSeparator) + headerCell = char '^' *> manyTill inline' (lookAhead tableCellSeparator) + +blockCode :: PandocMonad m => DWParser m B.Blocks +blockCode = codeTag B.codeBlockWith "code" + +blockFile :: PandocMonad m => DWParser m B.Blocks +blockFile = codeTag B.codeBlockWith "file" + +para :: PandocMonad m => DWParser m B.Blocks +para = result . mconcat <$> many1Till inline endOfParaElement + where + endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + newBlockElement = try $ blankline >> void blockElements + result content = if F.all (==Space) content + then mempty + else B.para $ B.trimInlines content diff --git a/test/Tests/Readers/DokuWiki.hs b/test/Tests/Readers/DokuWiki.hs new file mode 100644 index 000000000..02fe1553b --- /dev/null +++ b/test/Tests/Readers/DokuWiki.hs @@ -0,0 +1,315 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +module Tests.Readers.DokuWiki (tests) where + +import Prelude +import Data.Text (Text) +import qualified Data.Text as T +import Test.Tasty +import Tests.Helpers +import Text.Pandoc +import Text.Pandoc.Arbitrary () +import Text.Pandoc.Builder +import Text.Pandoc.Shared (underlineSpan) + +dokuwiki :: Text -> Pandoc +dokuwiki = purely $ readDokuWiki def{ readerStandalone = True } + +infix 4 =: +(=:) :: ToString c + => String -> (Text, c) -> TestTree +(=:) = test dokuwiki + +tests :: [TestTree] +tests = [ testGroup "inlines" + [ "Bold" =: + "**bold**" =?> + para (strong "bold") + , "Italic" =: + "//italic//" =?> + para (emph "italic") + , "Underlined" =: + "__underlined__" =?> + para (underlineSpan "underlined") + , "Monospaced" =: + "''monospaced''" =?> + para (code "monospaced") + , "Combined" =: + "**__//''combine''//__**" =?> + para (strong $ underlineSpan $ emph $ code "combine") + , "Nowiki" =: + T.unlines [ "<nowiki>" + , "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it." + , "</nowiki>" + ] =?> + para "This is some text which contains addresses like this: http://www.splitbrain.org and **formatting**, but nothing is done with it." + , "Percent" =: + "The same is true for %%//__this__ text// with a smiley ;-)%%." =?> + para "The same is true for //__this__ text// with a smiley ;-)." + , "Subscript" =: + "<sub>subscript</sub>" =?> + para (subscript "subscript") + , "Superscript" =: + "<sup>superscript</sup>" =?> + para (superscript "superscript") + , "Deleted" =: + "<del>deleted</del>" =?> + para (strikeout "deleted") + , "Inline code" =: + "foo <code java>public static void main</code> bar" =?> + para (text "foo " <> codeWith ("", ["java"], []) "public static void main" <> text " bar") + , "Inline file" =: + "foo <file></code></file> bar" =?> + para (text "foo " <> code "</code>" <> text " bar") + , "Inline HTML" =: + "<html>\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n</html>" =?> + para (rawInline "html" "\nThis is some <span style=\"color:red;font-size:150%;\">inline HTML</span>\n") + , "Inline PHP" =: + "<php>echo '<p>Hello World</p>';</php>" =?> + para (codeWith ("", ["php"], []) "echo '<p>Hello World</p>';") + , "Linebreak" =: + T.unlines [ "This is some text with some linebreaks\\\\ Note that the" + , "two backslashes are only recognized at the end of a line\\\\" + , "or followed by\\\\ a whitespace \\\\this happens without it." + ] =?> + para ("This is some text with some linebreaks" <> linebreak <> "Note that the\n" <> + "two backslashes are only recognized at the end of a line" <> linebreak <> + "or followed by" <> linebreak <> "a whitespace \\\\this happens without it.") + , testGroup "External links" + [ "Autolink" =: + "http://www.google.com" =?> + para (link "http://www.google.com" "" (str "http://www.google.com")) + , "Link without description" =: + "[[https://example.com]]" =?> + para (link "https://example.com" "" (str "https://example.com")) + , "Link with description" =: + "[[http://www.google.com|This Link points to google]]" =?> + para (link "http://www.google.com" "" (text "This Link points to google")) + , "Trim whitespace around link and description" =: + "[[ http://www.google.com | This Link points to google ]]" =?> + para (link "http://www.google.com" "" (text "This Link points to google")) + , "Email address" =: + "<andi@splitbrain.org>" =?> + para (link "mailto:andi@splitbrain.org" "" (str "andi@splitbrain.org")) + ] + , testGroup "Internal links" + [ "Current namespace" =: + "[[example]]" =?> + para (link "example" "" (str "example")) + , "Current namespace starting with dot" =: + "[[.example]]" =?> + para (link "example" "" (str ".example")) + , "Current namespace starting with dot and colon" =: + "[[.:example]]" =?> + para (link "example" "" (str "example")) + , "Root namespace" =: + "[[:example]]" =?> + para (link "/example" "" (str "example")) + , "Parent namespace" =: + "[[..example]]" =?> + para (link "../example" "" (str "..example")) + , "Parent namespace with colon" =: + "[[..:example]]" =?> + para (link "../example" "" (str "example")) + , "Beneath the root namespace" =: + "[[wiki:example]]" =?> + para (link "/wiki/example" "" (str "example")) + , "Explicitly beneath the root namespace" =: + "[[:wiki:example]]" =?> + para (link "/wiki/example" "" (str "example")) + ] + , testGroup "Interwiki links" + [ "Interwiki without description" =: + "[[doku>DokuWiki]]" =?> + para (link "https://www.dokuwiki.org/DokuWiki" "" (str "DokuWiki")) + , "Interwiki link with description" =: + "[[doku>toolbar|quickbuttons]]" =?> + para (link "https://www.dokuwiki.org/toolbar" "" (str "quickbuttons")) + ] + , "Footnote" =: + "((This is a footnote))" =?> + para (note (para "This is a footnote")) + , testGroup "Images" + [ "Image" =: + "{{image.jpg}}" =?> + para (image "image.jpg" "" (str "image.jpg")) + , "Image with caption" =: + "{{image.png|This is the caption}}" =?> + para (image "image.png" "" "This is the caption") + , "Image with } in caption" =: + "{{image.png|There is an } in the caption}}" =?> + para (image "image.png" "" "There is an } in the caption") + , "Wiki namespace starting with dot" =: + "{{.wiki:image.jpg}}" =?> + para (image "wiki/image.jpg" "" (str "image.jpg")) + , "Left aligned image" =: + "{{wiki:dokuwiki-128.png }}" =?> + para (imageWith ("", ["align-left"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png")) + , "Right aligned image" =: + "{{ wiki:dokuwiki-128.png}}" =?> + para (imageWith ("", ["align-right"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png")) + , "Centered image" =: + "{{ wiki:dokuwiki-128.png }}" =?> + para (imageWith ("", ["align-center"], []) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png")) + , "Image with width" =: + "{{wiki:dokuwiki-128.png?50}}" =?> + para (imageWith ("", [], [("width", "50")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png")) + , "Image with width and height" =: + "{{wiki:dokuwiki-128.png?nocache&50x100}}" =?> + para (imageWith ("", [], [("width", "50"), ("height", "100")]) "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png")) + , "Linkonly" =: + "{{wiki:dokuwiki-128.png?linkonly}}" =?> + para (link "/wiki/dokuwiki-128.png" "" (str "dokuwiki-128.png")) + ] + , "Ignore ~~NOTOC~~" =: + "Here is a ~~NOTOC~~ macro" =?> + para "Here is a macro" + , "Ignore ~~NOCACHE~~" =: + "Here is a ~~NOCACHE~~ macro" =?> + para "Here is a macro" + ] + , testGroup "Sectioning" + [ "Headline level 1" =: + "====== Headline Level 1 ======" =?> + header 1 "Headline Level 1" + , "Headline level 2" =: + "===== Headline Level 2 =====" =?> + header 2 "Headline Level 2" + , "Headline level 3" =: + "==== Headline Level 3 ====" =?> + header 3 "Headline Level 3" + , "Headline level 4" =: + "=== Headline Level 4 ===" =?> + header 4 "Headline Level 4" + , "Headline level 5" =: + "== Headline Level 5 ==" =?> + header 5 "Headline Level 5" + , "Only two closing = are required" =: + "====== Headline Level 1 ==" =?> + header 1 "Headline Level 1" + , "One closing = is not enough" =: + "====== Headline Level 1 =" =?> + para "====== Headline Level 1 =" + , "One closing = is not enough" =: + "== Headline with = sign ==" =?> + header 5 "Headline with = sign" + ] + , "Horizontal line" =: + "----" =?> + horizontalRule + , testGroup "Lists" + [ "Unordered list" =: + T.unlines [ " * This is a list" + , " * The second item" + , " * You may have different levels" + , " * Another item" + ] =?> + bulletList [ plain "This is a list" + , plain "The second item" <> + bulletList [ plain "You may have different levels" ] + , plain "Another item" + ] + , "Ordered list" =: + T.unlines [ " - The same list but ordered" + , " - Another item" + , " - Just use indention for deeper levels" + , " - That's it" + ] =?> + orderedList [ plain "The same list but ordered" + , plain "Another item" <> + orderedList [ plain "Just use indention for deeper levels" ] + , plain "That's it" + ] + , "Multiline list items" =: -- https://www.dokuwiki.org/faq:lists + T.unlines [ " - first item" + , " - second item with linebreak\\\\ second line" + , " - third item with code: <code>" + , "some code" + , "comes here" + , "</code>" + , " - fourth item" + ] =?> + orderedList [ plain "first item" + , plain ("second item with linebreak" <> linebreak <> " second line") + , plain ("third item with code: " <> code "some code\ncomes here\n") + , plain "fourth item" + ] + ] + , "Block HTML" =: + T.unlines [ "<HTML>" + , "<p style=\"border:2px dashed red;\">And this is some block HTML</p>" + , "</HTML>" + ] =?> + rawBlock "html" "<p style=\"border:2px dashed red;\">And this is some block HTML</p>\n" + , "Block PHP" =: + T.unlines [ "<PHP>" + , "echo '<p>Hello World</p>';" + , "</PHP>" + ] =?> + codeBlockWith ("", ["php"], []) "echo '<p>Hello World</p>';\n" + , "Quote" =: + T.unlines [ "> foo" + , ">no space is required after >" + , "> bar" + , ">> baz" + , "> bat" + ] =?> + blockQuote (plain "foo" <> + plain "no space is required after >" <> + plain "bar" <> + blockQuote (plain "baz") <> + plain "bat") + , "Code block" =: + T.unlines [ "<code>" + , "foo bar baz" + , "</code>" + ] =?> + codeBlock "foo bar baz\n" + , "Java code block" =: + T.unlines [ "<code java>" + , "public static void main" + , "</code>" + ] =?> + codeBlockWith ("", ["java"], []) "public static void main\n" + , "File with filename and no language" =: + T.unlines [ "<file - foo.bar>" + , "file contents" + , "</file>" + ] =?> + codeBlock "file contents\n" + , "Table" =: + T.unlines [ "| foo | bar |" + , "| bat | baz |" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [] + [[plain "foo", plain "bar"] + ,[plain "bat", plain "baz"]] + , "Table with header" =: + T.unlines [ "^ foo ^ bar ^" + , "| bat | baz |" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [plain "foo", plain "bar"] + [[plain "bat", plain "baz"]] + , "Table with colspan" =: + T.unlines [ "^ 0,0 ^ 0,1 ^ 0,2 ^" + , "| 1,0 | 1,1 ||" + , "| 2,0 | 2,1 | 2,2 |" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0), (AlignDefault, 0.0)] + [plain "0,0", plain "0,1", plain "0,2"] + [[plain "1,0", plain "1,1", mempty] + ,[plain "2,0", plain "2,1", plain "2,2"] + ] + , "Indented code block" =: + T.unlines [ "foo" + , " bar" + , " bat" + , "baz" + ] =?> + para "foo" <> + codeBlock "bar\n bat\n" <> + para "baz" + ] diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 63560936c..946a676e0 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -11,6 +11,7 @@ import qualified Tests.Lua import qualified Tests.Old import qualified Tests.Readers.Creole import qualified Tests.Readers.Docx +import qualified Tests.Readers.DokuWiki import qualified Tests.Readers.EPUB import qualified Tests.Readers.FB2 import qualified Tests.Readers.HTML @@ -80,6 +81,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests , testGroup "Creole" Tests.Readers.Creole.tests , testGroup "Man" Tests.Readers.Man.tests , testGroup "FB2" Tests.Readers.FB2.tests + , testGroup "DokuWiki" Tests.Readers.DokuWiki.tests ] , testGroup "Lua filters" Tests.Lua.tests ] |