diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/TWiki.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/TWiki.hs | 525 |
1 files changed, 0 insertions, 525 deletions
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs deleted file mode 100644 index 3b89f2ee9..000000000 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ /dev/null @@ -1,525 +0,0 @@ -{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-} --- RelaxedPolyRec needed for inlinesBetween on GHC < 7 -{- - Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.TWiki - Copyright : Copyright (C) 2014 Alexander Sulfrian - License : GNU GPL, version 2 or above - - Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> - Stability : alpha - Portability : portable - -Conversion of twiki text to 'Pandoc' document. --} -module Text.Pandoc.Readers.TWiki ( readTWiki - ) where - -import Text.Pandoc.Definition -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Options -import Text.Pandoc.Logging -import Text.Pandoc.Parsing hiding (enclosed, macro, nested) -import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Control.Monad -import Text.Pandoc.XML (fromEntities) -import Data.Maybe (fromMaybe) -import Text.HTML.TagSoup -import Data.Char (isAlphaNum) -import qualified Data.Foldable as F -import Control.Monad.Except (throwError) -import Text.Pandoc.Class (PandocMonad, report) - --- | Read twiki from an input string and return a Pandoc document. -readTWiki :: PandocMonad m - => ReaderOptions - -> String - -> m Pandoc -readTWiki opts s = do - res <- readWithM parseTWiki def{ stateOptions = opts } (s ++ "\n\n") - case res of - Left e -> throwError e - Right d -> return d - -type TWParser = ParserT [Char] ParserState - --- --- utility functions --- - -tryMsg :: String -> TWParser m a -> TWParser m a -tryMsg msg p = try p <?> msg - -skip :: TWParser m a -> TWParser m () -skip parser = parser >> return () - -nested :: PandocMonad m => TWParser m a -> TWParser m a -nested p = do - nestlevel <- stateMaxNestingLevel <$> getState - guard $ nestlevel > 0 - updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } - return res - -htmlElement :: PandocMonad m => String -> TWParser m (Attr, String) -htmlElement tag = tryMsg tag $ do - (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) - content <- manyTill anyChar (endtag <|> endofinput) - return (htmlAttrToPandoc attr, trim content) - where - endtag = skip $ htmlTag (~== TagClose tag) - endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof - trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse - -htmlAttrToPandoc :: [Attribute String] -> Attr -htmlAttrToPandoc attrs = (ident, classes, keyvals) - where - ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs - keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - -parseHtmlContentWithAttrs :: PandocMonad m - => String -> TWParser m a -> TWParser m (Attr, [a]) -parseHtmlContentWithAttrs tag parser = do - (attr, content) <- htmlElement tag - parsedContent <- try $ parseContent content - return (attr, parsedContent) - where - parseContent = parseFromString $ nested $ manyTill parser endOfContent - endOfContent = try $ skipMany blankline >> skipSpaces >> eof - -parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] -parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd - --- --- main parser --- - -parseTWiki :: PandocMonad m => TWParser m Pandoc -parseTWiki = do - bs <- mconcat <$> many block - spaces - eof - return $ B.doc bs - - --- --- block parsers --- - -block :: PandocMonad m => TWParser m B.Blocks -block = do - pos <- getPosition - res <- mempty <$ skipMany1 blankline - <|> blockElements - <|> para - skipMany blankline - report $ ParsingTrace (take 60 $ show $ B.toList res) pos - return res - -blockElements :: PandocMonad m => TWParser m B.Blocks -blockElements = choice [ separator - , header - , verbatim - , literal - , list "" - , table - , blockQuote - , noautolink - ] - -separator :: PandocMonad m => TWParser m B.Blocks -separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule - -header :: PandocMonad m => TWParser m B.Blocks -header = tryMsg "header" $ do - string "---" - level <- many1 (char '+') >>= return . length - guard $ level <= 6 - classes <- option [] $ string "!!" >> return ["unnumbered"] - skipSpaces - content <- B.trimInlines . mconcat <$> manyTill inline newline - attr <- registerHeader ("", classes, []) content - return $ B.headerWith attr level $ content - -verbatim :: PandocMonad m => TWParser m B.Blocks -verbatim = (htmlElement "verbatim" <|> htmlElement "pre") - >>= return . (uncurry B.codeBlockWith) - -literal :: PandocMonad m => TWParser m B.Blocks -literal = htmlElement "literal" >>= return . rawBlock - where - format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs - rawBlock (attrs, content) = B.rawBlock (format attrs) content - -list :: PandocMonad m => String -> TWParser m B.Blocks -list prefix = choice [ bulletList prefix - , orderedList prefix - , definitionList prefix] - -definitionList :: PandocMonad m => String -> TWParser m B.Blocks -definitionList prefix = tryMsg "definitionList" $ do - indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ " - elements <- many $ parseDefinitionListItem (prefix ++ concat indent) - return $ B.definitionList elements - where - parseDefinitionListItem :: PandocMonad m - => String -> TWParser m (B.Inlines, [B.Blocks]) - parseDefinitionListItem indent = do - string (indent ++ "$ ") >> skipSpaces - term <- many1Till inline $ string ": " - line <- listItemLine indent $ string "$ " - return $ (mconcat term, [line]) - -bulletList :: PandocMonad m => String -> TWParser m B.Blocks -bulletList prefix = tryMsg "bulletList" $ - parseList prefix (char '*') (char ' ') - -orderedList :: PandocMonad m => String -> TWParser m B.Blocks -orderedList prefix = tryMsg "orderedList" $ - parseList prefix (oneOf "1iIaA") (string ". ") - -parseList :: PandocMonad m - => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks -parseList prefix marker delim = do - (indent, style) <- lookAhead $ string prefix *> listStyle <* delim - blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) - return $ case style of - '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks - 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks - 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks - 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks - 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks - _ -> B.bulletList blocks - where - listStyle = do - indent <- many1 $ string " " - style <- marker - return (concat indent, style) - -parseListItem :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks -parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker - -listItemLine :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks -listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat - where - lineContent = do - content <- anyLine - continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation) - filterSpaces = reverse . dropWhile (== ' ') . reverse - listContinuation = notFollowedBy (string prefix >> marker) >> - string " " >> lineContent - parseContent = parseFromString $ many1 $ nestedList <|> parseInline - parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>= - return . B.plain . mconcat - nestedList = list prefix - lastNewline = try $ char '\n' <* eof - newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList - -table :: PandocMonad m => TWParser m B.Blocks -table = try $ do - tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip - rows <- many1 tableParseRow - return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead - where - buildTable caption rows (aligns, heads) - = B.table caption aligns heads rows - align rows = replicate (columCount rows) (AlignDefault, 0) - columns rows = replicate (columCount rows) mempty - columCount rows = length $ head rows - -tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks) -tableParseHeader = try $ do - char '|' - leftSpaces <- many spaceChar >>= return . length - char '*' - content <- tableColumnContent (char '*' >> skipSpaces >> char '|') - char '*' - rightSpaces <- many spaceChar >>= return . length - optional tableEndOfRow - return (tableAlign leftSpaces rightSpaces, content) - where - tableAlign left right - | left >= 2 && left == right = (AlignCenter, 0) - | left > right = (AlignRight, 0) - | otherwise = (AlignLeft, 0) - -tableParseRow :: PandocMonad m => TWParser m [B.Blocks] -tableParseRow = many1Till tableParseColumn newline - -tableParseColumn :: PandocMonad m => TWParser m B.Blocks -tableParseColumn = char '|' *> skipSpaces *> - tableColumnContent (skipSpaces >> char '|') - <* skipSpaces <* optional tableEndOfRow - -tableEndOfRow :: PandocMonad m => TWParser m Char -tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|' - -tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks -tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat - where - content = continuation <|> inline - continuation = try $ char '\\' >> newline >> return mempty - -blockQuote :: PandocMonad m => TWParser m B.Blocks -blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat - -noautolink :: PandocMonad m => TWParser m B.Blocks -noautolink = do - (_, content) <- htmlElement "noautolink" - st <- getState - setState $ st{ stateAllowLinks = False } - blocks <- try $ parseContent content - setState $ st{ stateAllowLinks = True } - return $ mconcat blocks - where - parseContent = parseFromString $ many $ block - -para :: PandocMonad m => TWParser m B.Blocks -para = many1Till inline endOfParaElement >>= return . result . mconcat - where - endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement - endOfInput = try $ skipMany blankline >> skipSpaces >> eof - endOfPara = try $ blankline >> skipMany1 blankline - newBlockElement = try $ blankline >> skip blockElements - result content = if F.all (==Space) content - then mempty - else B.para $ B.trimInlines content - - --- --- inline parsers --- - -inline :: PandocMonad m => TWParser m B.Inlines -inline = choice [ whitespace - , br - , macro - , strong - , strongHtml - , strongAndEmph - , emph - , emphHtml - , boldCode - , smart - , link - , htmlComment - , code - , codeHtml - , nop - , autoLink - , str - , symbol - ] <?> "inline" - -whitespace :: PandocMonad m => TWParser m B.Inlines -whitespace = (lb <|> regsp) >>= return - where lb = try $ skipMany spaceChar >> linebreak >> return B.space - regsp = try $ skipMany1 spaceChar >> return B.space - -br :: PandocMonad m => TWParser m B.Inlines -br = try $ string "%BR%" >> return B.linebreak - -linebreak :: PandocMonad m => TWParser m B.Inlines -linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) - where lastNewline = eof >> return mempty - innerNewline = return B.space - -between :: (Monoid c, PandocMonad m) - => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c) - -> TWParser m c -between start end p = - mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) - -enclosed :: (Monoid b, PandocMonad m) - => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b -enclosed sep p = between sep (try $ sep <* endMarker) p - where - endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof - endSpace = (spaceChar <|> newline) >> return B.space - -macro :: PandocMonad m => TWParser m B.Inlines -macro = macroWithParameters <|> withoutParameters - where - withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan - emptySpan name = buildSpan name [] mempty - -macroWithParameters :: PandocMonad m => TWParser m B.Inlines -macroWithParameters = try $ do - char '%' - name <- macroName - (content, kvs) <- attributes - char '%' - return $ buildSpan name kvs $ B.str content - -buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines -buildSpan className kvs = B.spanWith attrs - where - attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses) - additionalClasses = maybe [] words $ lookup "class" kvs - kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] - -macroName :: PandocMonad m => TWParser m String -macroName = do - first <- letter - rest <- many $ alphaNum <|> char '_' - return (first:rest) - -attributes :: PandocMonad m => TWParser m (String, [(String, String)]) -attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>= - return . foldr (either mkContent mkKvs) ([], []) - where - spnl = skipMany (spaceChar <|> newline) - mkContent c ([], kvs) = (c, kvs) - mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) - mkKvs kv (cont, rest) = (cont, (kv : rest)) - -attribute :: PandocMonad m => TWParser m (Either String (String, String)) -attribute = withKey <|> withoutKey - where - withKey = try $ do - key <- macroName - char '=' - parseValue False >>= return . (curry Right key) - withoutKey = try $ parseValue True >>= return . Left - parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities - withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) - withoutQuotes allowSpaces - | allowSpaces == True = many1 $ noneOf "}" - | otherwise = many1 $ noneOf " }" - -nestedInlines :: (Show a, PandocMonad m) - => TWParser m a -> TWParser m B.Inlines -nestedInlines end = innerSpace <|> nestedInline - where - innerSpace = try $ whitespace <* (notFollowedBy end) - nestedInline = notFollowedBy whitespace >> nested inline - -strong :: PandocMonad m => TWParser m B.Inlines -strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong - -strongHtml :: PandocMonad m => TWParser m B.Inlines -strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline) - >>= return . B.strong . mconcat - -strongAndEmph :: PandocMonad m => TWParser m B.Inlines -strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong - -emph :: PandocMonad m => TWParser m B.Inlines -emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph - -emphHtml :: PandocMonad m => TWParser m B.Inlines -emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) - >>= return . B.emph . mconcat - -nestedString :: (Show a, PandocMonad m) - => TWParser m a -> TWParser m String -nestedString end = innerSpace <|> (count 1 nonspaceChar) - where - innerSpace = try $ many1 spaceChar <* notFollowedBy end - -boldCode :: PandocMonad m => TWParser m B.Inlines -boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities - -htmlComment :: PandocMonad m => TWParser m B.Inlines -htmlComment = htmlTag isCommentTag >> return mempty - -code :: PandocMonad m => TWParser m B.Inlines -code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities - -codeHtml :: PandocMonad m => TWParser m B.Inlines -codeHtml = do - (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar - return $ B.codeWith attrs $ fromEntities content - -autoLink :: PandocMonad m => TWParser m B.Inlines -autoLink = try $ do - state <- getState - guard $ stateAllowLinks state - (text, url) <- parseLink - guard $ checkLink (head $ reverse url) - return $ makeLink (text, url) - where - parseLink = notFollowedBy nop >> (uri <|> emailAddress) - makeLink (text, url) = B.link url "" $ B.str text - checkLink c - | c == '/' = True - | otherwise = isAlphaNum c - -str :: PandocMonad m => TWParser m B.Inlines -str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str - -nop :: PandocMonad m => TWParser m B.Inlines -nop = try $ (skip exclamation <|> skip nopTag) >> followContent - where - exclamation = char '!' - nopTag = stringAnyCase "<nop>" - followContent = many1 nonspaceChar >>= return . B.str . fromEntities - -symbol :: PandocMonad m => TWParser m B.Inlines -symbol = count 1 nonspaceChar >>= return . B.str - -smart :: PandocMonad m => TWParser m B.Inlines -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice [ apostrophe - , dash - , ellipses - ] - -singleQuoted :: PandocMonad m => TWParser m B.Inlines -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ - many1Till inline singleQuoteEnd >>= - (return . B.singleQuoted . B.trimInlines . mconcat) - -doubleQuoted :: PandocMonad m => TWParser m B.Inlines -doubleQuoted = try $ do - doubleQuoteStart - contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> - return (B.doubleQuoted $ B.trimInlines contents)) - <|> (return $ (B.str "\8220") B.<> contents) - -link :: PandocMonad m => TWParser m B.Inlines -link = try $ do - st <- getState - guard $ stateAllowLinks st - setState $ st{ stateAllowLinks = False } - (url, title, content) <- linkText - setState $ st{ stateAllowLinks = True } - return $ B.link url title content - -linkText :: PandocMonad m => TWParser m (String, String, B.Inlines) -linkText = do - string "[[" - url <- many1Till anyChar (char ']') - content <- option [B.str url] linkContent - char ']' - return (url, "", mconcat content) - where - linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent - parseLinkContent = parseFromString $ many1 inline |