aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/TWiki.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/TWiki.hs')
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs525
1 files changed, 0 insertions, 525 deletions
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
deleted file mode 100644
index 3b89f2ee9..000000000
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ /dev/null
@@ -1,525 +0,0 @@
-{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-}
--- RelaxedPolyRec needed for inlinesBetween on GHC < 7
-{-
- Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.TWiki
- Copyright : Copyright (C) 2014 Alexander Sulfrian
- License : GNU GPL, version 2 or above
-
- Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
- Stability : alpha
- Portability : portable
-
-Conversion of twiki text to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.TWiki ( readTWiki
- ) where
-
-import Text.Pandoc.Definition
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Options
-import Text.Pandoc.Logging
-import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
-import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Control.Monad
-import Text.Pandoc.XML (fromEntities)
-import Data.Maybe (fromMaybe)
-import Text.HTML.TagSoup
-import Data.Char (isAlphaNum)
-import qualified Data.Foldable as F
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Class (PandocMonad, report)
-
--- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
- => ReaderOptions
- -> String
- -> m Pandoc
-readTWiki opts s = do
- res <- readWithM parseTWiki def{ stateOptions = opts } (s ++ "\n\n")
- case res of
- Left e -> throwError e
- Right d -> return d
-
-type TWParser = ParserT [Char] ParserState
-
---
--- utility functions
---
-
-tryMsg :: String -> TWParser m a -> TWParser m a
-tryMsg msg p = try p <?> msg
-
-skip :: TWParser m a -> TWParser m ()
-skip parser = parser >> return ()
-
-nested :: PandocMonad m => TWParser m a -> TWParser m a
-nested p = do
- nestlevel <- stateMaxNestingLevel <$> getState
- guard $ nestlevel > 0
- updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
- res <- p
- updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
- return res
-
-htmlElement :: PandocMonad m => String -> TWParser m (Attr, String)
-htmlElement tag = tryMsg tag $ do
- (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
- content <- manyTill anyChar (endtag <|> endofinput)
- return (htmlAttrToPandoc attr, trim content)
- where
- endtag = skip $ htmlTag (~== TagClose tag)
- endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
- trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
-
-htmlAttrToPandoc :: [Attribute String] -> Attr
-htmlAttrToPandoc attrs = (ident, classes, keyvals)
- where
- ident = fromMaybe "" $ lookup "id" attrs
- classes = maybe [] words $ lookup "class" attrs
- keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-
-parseHtmlContentWithAttrs :: PandocMonad m
- => String -> TWParser m a -> TWParser m (Attr, [a])
-parseHtmlContentWithAttrs tag parser = do
- (attr, content) <- htmlElement tag
- parsedContent <- try $ parseContent content
- return (attr, parsedContent)
- where
- parseContent = parseFromString $ nested $ manyTill parser endOfContent
- endOfContent = try $ skipMany blankline >> skipSpaces >> eof
-
-parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a]
-parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
-
---
--- main parser
---
-
-parseTWiki :: PandocMonad m => TWParser m Pandoc
-parseTWiki = do
- bs <- mconcat <$> many block
- spaces
- eof
- return $ B.doc bs
-
-
---
--- block parsers
---
-
-block :: PandocMonad m => TWParser m B.Blocks
-block = do
- pos <- getPosition
- res <- mempty <$ skipMany1 blankline
- <|> blockElements
- <|> para
- skipMany blankline
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
- return res
-
-blockElements :: PandocMonad m => TWParser m B.Blocks
-blockElements = choice [ separator
- , header
- , verbatim
- , literal
- , list ""
- , table
- , blockQuote
- , noautolink
- ]
-
-separator :: PandocMonad m => TWParser m B.Blocks
-separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule
-
-header :: PandocMonad m => TWParser m B.Blocks
-header = tryMsg "header" $ do
- string "---"
- level <- many1 (char '+') >>= return . length
- guard $ level <= 6
- classes <- option [] $ string "!!" >> return ["unnumbered"]
- skipSpaces
- content <- B.trimInlines . mconcat <$> manyTill inline newline
- attr <- registerHeader ("", classes, []) content
- return $ B.headerWith attr level $ content
-
-verbatim :: PandocMonad m => TWParser m B.Blocks
-verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
- >>= return . (uncurry B.codeBlockWith)
-
-literal :: PandocMonad m => TWParser m B.Blocks
-literal = htmlElement "literal" >>= return . rawBlock
- where
- format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
- rawBlock (attrs, content) = B.rawBlock (format attrs) content
-
-list :: PandocMonad m => String -> TWParser m B.Blocks
-list prefix = choice [ bulletList prefix
- , orderedList prefix
- , definitionList prefix]
-
-definitionList :: PandocMonad m => String -> TWParser m B.Blocks
-definitionList prefix = tryMsg "definitionList" $ do
- indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ "
- elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
- return $ B.definitionList elements
- where
- parseDefinitionListItem :: PandocMonad m
- => String -> TWParser m (B.Inlines, [B.Blocks])
- parseDefinitionListItem indent = do
- string (indent ++ "$ ") >> skipSpaces
- term <- many1Till inline $ string ": "
- line <- listItemLine indent $ string "$ "
- return $ (mconcat term, [line])
-
-bulletList :: PandocMonad m => String -> TWParser m B.Blocks
-bulletList prefix = tryMsg "bulletList" $
- parseList prefix (char '*') (char ' ')
-
-orderedList :: PandocMonad m => String -> TWParser m B.Blocks
-orderedList prefix = tryMsg "orderedList" $
- parseList prefix (oneOf "1iIaA") (string ". ")
-
-parseList :: PandocMonad m
- => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks
-parseList prefix marker delim = do
- (indent, style) <- lookAhead $ string prefix *> listStyle <* delim
- blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim)
- return $ case style of
- '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks
- 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks
- 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks
- 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks
- 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks
- _ -> B.bulletList blocks
- where
- listStyle = do
- indent <- many1 $ string " "
- style <- marker
- return (concat indent, style)
-
-parseListItem :: (PandocMonad m, Show a)
- => String -> TWParser m a -> TWParser m B.Blocks
-parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker
-
-listItemLine :: (PandocMonad m, Show a)
- => String -> TWParser m a -> TWParser m B.Blocks
-listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
- where
- lineContent = do
- content <- anyLine
- continuation <- optionMaybe listContinuation
- return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation)
- filterSpaces = reverse . dropWhile (== ' ') . reverse
- listContinuation = notFollowedBy (string prefix >> marker) >>
- string " " >> lineContent
- parseContent = parseFromString $ many1 $ nestedList <|> parseInline
- parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
- return . B.plain . mconcat
- nestedList = list prefix
- lastNewline = try $ char '\n' <* eof
- newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
-
-table :: PandocMonad m => TWParser m B.Blocks
-table = try $ do
- tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
- rows <- many1 tableParseRow
- return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
- where
- buildTable caption rows (aligns, heads)
- = B.table caption aligns heads rows
- align rows = replicate (columCount rows) (AlignDefault, 0)
- columns rows = replicate (columCount rows) mempty
- columCount rows = length $ head rows
-
-tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks)
-tableParseHeader = try $ do
- char '|'
- leftSpaces <- many spaceChar >>= return . length
- char '*'
- content <- tableColumnContent (char '*' >> skipSpaces >> char '|')
- char '*'
- rightSpaces <- many spaceChar >>= return . length
- optional tableEndOfRow
- return (tableAlign leftSpaces rightSpaces, content)
- where
- tableAlign left right
- | left >= 2 && left == right = (AlignCenter, 0)
- | left > right = (AlignRight, 0)
- | otherwise = (AlignLeft, 0)
-
-tableParseRow :: PandocMonad m => TWParser m [B.Blocks]
-tableParseRow = many1Till tableParseColumn newline
-
-tableParseColumn :: PandocMonad m => TWParser m B.Blocks
-tableParseColumn = char '|' *> skipSpaces *>
- tableColumnContent (skipSpaces >> char '|')
- <* skipSpaces <* optional tableEndOfRow
-
-tableEndOfRow :: PandocMonad m => TWParser m Char
-tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
-
-tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks
-tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
- where
- content = continuation <|> inline
- continuation = try $ char '\\' >> newline >> return mempty
-
-blockQuote :: PandocMonad m => TWParser m B.Blocks
-blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
-
-noautolink :: PandocMonad m => TWParser m B.Blocks
-noautolink = do
- (_, content) <- htmlElement "noautolink"
- st <- getState
- setState $ st{ stateAllowLinks = False }
- blocks <- try $ parseContent content
- setState $ st{ stateAllowLinks = True }
- return $ mconcat blocks
- where
- parseContent = parseFromString $ many $ block
-
-para :: PandocMonad m => TWParser m B.Blocks
-para = many1Till inline endOfParaElement >>= return . result . mconcat
- where
- endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
- endOfInput = try $ skipMany blankline >> skipSpaces >> eof
- endOfPara = try $ blankline >> skipMany1 blankline
- newBlockElement = try $ blankline >> skip blockElements
- result content = if F.all (==Space) content
- then mempty
- else B.para $ B.trimInlines content
-
-
---
--- inline parsers
---
-
-inline :: PandocMonad m => TWParser m B.Inlines
-inline = choice [ whitespace
- , br
- , macro
- , strong
- , strongHtml
- , strongAndEmph
- , emph
- , emphHtml
- , boldCode
- , smart
- , link
- , htmlComment
- , code
- , codeHtml
- , nop
- , autoLink
- , str
- , symbol
- ] <?> "inline"
-
-whitespace :: PandocMonad m => TWParser m B.Inlines
-whitespace = (lb <|> regsp) >>= return
- where lb = try $ skipMany spaceChar >> linebreak >> return B.space
- regsp = try $ skipMany1 spaceChar >> return B.space
-
-br :: PandocMonad m => TWParser m B.Inlines
-br = try $ string "%BR%" >> return B.linebreak
-
-linebreak :: PandocMonad m => TWParser m B.Inlines
-linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
- where lastNewline = eof >> return mempty
- innerNewline = return B.space
-
-between :: (Monoid c, PandocMonad m)
- => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c)
- -> TWParser m c
-between start end p =
- mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
-
-enclosed :: (Monoid b, PandocMonad m)
- => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b
-enclosed sep p = between sep (try $ sep <* endMarker) p
- where
- endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
- endSpace = (spaceChar <|> newline) >> return B.space
-
-macro :: PandocMonad m => TWParser m B.Inlines
-macro = macroWithParameters <|> withoutParameters
- where
- withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
- emptySpan name = buildSpan name [] mempty
-
-macroWithParameters :: PandocMonad m => TWParser m B.Inlines
-macroWithParameters = try $ do
- char '%'
- name <- macroName
- (content, kvs) <- attributes
- char '%'
- return $ buildSpan name kvs $ B.str content
-
-buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines
-buildSpan className kvs = B.spanWith attrs
- where
- attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses)
- additionalClasses = maybe [] words $ lookup "class" kvs
- kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"]
-
-macroName :: PandocMonad m => TWParser m String
-macroName = do
- first <- letter
- rest <- many $ alphaNum <|> char '_'
- return (first:rest)
-
-attributes :: PandocMonad m => TWParser m (String, [(String, String)])
-attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
- return . foldr (either mkContent mkKvs) ([], [])
- where
- spnl = skipMany (spaceChar <|> newline)
- mkContent c ([], kvs) = (c, kvs)
- mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
- mkKvs kv (cont, rest) = (cont, (kv : rest))
-
-attribute :: PandocMonad m => TWParser m (Either String (String, String))
-attribute = withKey <|> withoutKey
- where
- withKey = try $ do
- key <- macroName
- char '='
- parseValue False >>= return . (curry Right key)
- withoutKey = try $ parseValue True >>= return . Left
- parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities
- withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
- withoutQuotes allowSpaces
- | allowSpaces == True = many1 $ noneOf "}"
- | otherwise = many1 $ noneOf " }"
-
-nestedInlines :: (Show a, PandocMonad m)
- => TWParser m a -> TWParser m B.Inlines
-nestedInlines end = innerSpace <|> nestedInline
- where
- innerSpace = try $ whitespace <* (notFollowedBy end)
- nestedInline = notFollowedBy whitespace >> nested inline
-
-strong :: PandocMonad m => TWParser m B.Inlines
-strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
-
-strongHtml :: PandocMonad m => TWParser m B.Inlines
-strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
- >>= return . B.strong . mconcat
-
-strongAndEmph :: PandocMonad m => TWParser m B.Inlines
-strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
-
-emph :: PandocMonad m => TWParser m B.Inlines
-emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
-
-emphHtml :: PandocMonad m => TWParser m B.Inlines
-emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
- >>= return . B.emph . mconcat
-
-nestedString :: (Show a, PandocMonad m)
- => TWParser m a -> TWParser m String
-nestedString end = innerSpace <|> (count 1 nonspaceChar)
- where
- innerSpace = try $ many1 spaceChar <* notFollowedBy end
-
-boldCode :: PandocMonad m => TWParser m B.Inlines
-boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
-
-htmlComment :: PandocMonad m => TWParser m B.Inlines
-htmlComment = htmlTag isCommentTag >> return mempty
-
-code :: PandocMonad m => TWParser m B.Inlines
-code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
-
-codeHtml :: PandocMonad m => TWParser m B.Inlines
-codeHtml = do
- (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
- return $ B.codeWith attrs $ fromEntities content
-
-autoLink :: PandocMonad m => TWParser m B.Inlines
-autoLink = try $ do
- state <- getState
- guard $ stateAllowLinks state
- (text, url) <- parseLink
- guard $ checkLink (head $ reverse url)
- return $ makeLink (text, url)
- where
- parseLink = notFollowedBy nop >> (uri <|> emailAddress)
- makeLink (text, url) = B.link url "" $ B.str text
- checkLink c
- | c == '/' = True
- | otherwise = isAlphaNum c
-
-str :: PandocMonad m => TWParser m B.Inlines
-str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
-
-nop :: PandocMonad m => TWParser m B.Inlines
-nop = try $ (skip exclamation <|> skip nopTag) >> followContent
- where
- exclamation = char '!'
- nopTag = stringAnyCase "<nop>"
- followContent = many1 nonspaceChar >>= return . B.str . fromEntities
-
-symbol :: PandocMonad m => TWParser m B.Inlines
-symbol = count 1 nonspaceChar >>= return . B.str
-
-smart :: PandocMonad m => TWParser m B.Inlines
-smart = do
- guardEnabled Ext_smart
- doubleQuoted <|> singleQuoted <|>
- choice [ apostrophe
- , dash
- , ellipses
- ]
-
-singleQuoted :: PandocMonad m => TWParser m B.Inlines
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $
- many1Till inline singleQuoteEnd >>=
- (return . B.singleQuoted . B.trimInlines . mconcat)
-
-doubleQuoted :: PandocMonad m => TWParser m B.Inlines
-doubleQuoted = try $ do
- doubleQuoteStart
- contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ doubleQuoteEnd >>
- return (B.doubleQuoted $ B.trimInlines contents))
- <|> (return $ (B.str "\8220") B.<> contents)
-
-link :: PandocMonad m => TWParser m B.Inlines
-link = try $ do
- st <- getState
- guard $ stateAllowLinks st
- setState $ st{ stateAllowLinks = False }
- (url, title, content) <- linkText
- setState $ st{ stateAllowLinks = True }
- return $ B.link url title content
-
-linkText :: PandocMonad m => TWParser m (String, String, B.Inlines)
-linkText = do
- string "[["
- url <- many1Till anyChar (char ']')
- content <- option [B.str url] linkContent
- char ']'
- return (url, "", mconcat content)
- where
- linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
- parseLinkContent = parseFromString $ many1 inline