aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorSascha Wilde <wilde@sha-bang.de>2017-10-27 01:19:28 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2017-10-26 19:19:28 -0400
commit66fd3247eac9a564aca179ca9fe4fd007ee471e7 (patch)
tree42b7db75af17a55e4191b25d40b5001adc5f7b68 /src/Text/Pandoc
parent76886678a6be4db6e8e60ade514710bfe3e62b28 (diff)
downloadpandoc-66fd3247eac9a564aca179ca9fe4fd007ee471e7.tar.gz
Creole reader (#3994)
This is feature complete but not very thoroughly tested yet.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers.hs3
-rw-r--r--src/Text/Pandoc/Readers/Creole.hs316
2 files changed, 319 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 92a185e0d..9fc9c3d18 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -45,6 +45,7 @@ module Text.Pandoc.Readers
, readOdt
, readMarkdown
, readCommonMark
+ , readCreole
, readMediaWiki
, readVimwiki
, readRST
@@ -76,6 +77,7 @@ import Text.Pandoc.Error
import Text.Pandoc.Extensions
import Text.Pandoc.Options
import Text.Pandoc.Readers.CommonMark
+import Text.Pandoc.Readers.Creole
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Readers.EPUB
@@ -117,6 +119,7 @@ readers = [ ("native" , TextReader readNative)
,("markdown_github" , TextReader readMarkdown)
,("markdown_mmd", TextReader readMarkdown)
,("commonmark" , TextReader readCommonMark)
+ ,("creole" , TextReader readCreole)
,("gfm" , TextReader readCommonMark)
,("rst" , TextReader readRST)
,("mediawiki" , TextReader readMediaWiki)
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
new file mode 100644
index 000000000..ab90772ef
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -0,0 +1,316 @@
+{-
+ Copyright (C) 2017 Sascha Wilde <wilde@sha-bang.de>
+
+ partly based on all the other readers, especialy the work by
+ John MacFarlane <jgm@berkeley.edu> and
+ Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+ all bugs are solely created by me.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Creole
+ Copyright : Copyright (C) 2017 Sascha Wilde
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Sascha Wilde <wilde@sha-bang.de>
+ Stability : WIP
+ Portability : portable
+
+Conversion of creole text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Creole ( readCreole
+ ) where
+
+import Control.Monad.Except (liftM2, throwError, guard)
+import qualified Data.Foldable as F
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad(..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (enclosed)
+import Text.Pandoc.Shared (crFilter)
+import Data.Maybe (fromMaybe)
+import Data.Monoid
+import Data.Text (Text)
+import qualified Data.Text as T
+
+
+-- | Read creole from an input string and return a Pandoc document.
+readCreole :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readCreole opts s = do
+ res <- readWithM parseCreole def{ stateOptions = opts }
+ (T.unpack (crFilter s) ++ "\n\n")
+ case res of
+ Left e -> throwError e
+ Right d -> return d
+
+type CRLParser = ParserT [Char] ParserState
+
+--
+-- Utility funcitons
+--
+
+(<+>) :: (Monad m, Monoid a) => m a -> m a -> m a
+(<+>) = liftM2 (<>)
+
+-- we have to redefine `enclosed' from Text.Pandoc.Parsing, because it
+-- assumes, that there can't be a space after the start parser, but
+-- with creole this is possible.
+enclosed :: (Show end, PandocMonad m) => CRLParser m start -- ^ start parser
+ -> CRLParser m end -- ^ end parser
+ -> CRLParser m a -- ^ content parser (to be used repeatedly)
+ -> CRLParser m [a]
+enclosed start end parser = try $ start >> many1Till parser end
+
+--
+-- main parser
+--
+
+specialChars :: [Char]
+specialChars = "*/~{}\\|[]()<>\"'"
+
+parseCreole :: PandocMonad m => CRLParser m Pandoc
+parseCreole = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ return $ B.doc bs
+
+
+--
+-- block parsers
+--
+
+block :: PandocMonad m => CRLParser m B.Blocks
+block = do
+ res <- mempty <$ skipMany1 blankline
+ <|> nowiki
+ <|> header
+ <|> horizontalRule
+ <|> anyList 1
+ <|> table
+ <|> para
+ skipMany blankline
+ return res
+
+nowiki :: PandocMonad m => CRLParser m B.Blocks
+nowiki = try $ nowikiStart >> manyTill content nowikiEnd >>= return . B.codeBlock . mconcat
+ where
+ content = brackets <|> line
+ brackets = try $ option "" ((:[]) <$> newline)
+ <+> (char ' ' >> (many (char ' ') <+> string "}}}") <* eol)
+ line = option "" ((:[]) <$> newline) <+> manyTill anyChar eol
+ eol = lookAhead $ try $ nowikiEnd <|> newline
+ nowikiStart = optional newline >> string "{{{" >> skipMany spaceChar >> newline
+ nowikiEnd = try $ linebreak >> string "}}}" >> skipMany spaceChar >> newline
+
+header :: PandocMonad m => CRLParser m B.Blocks
+header = try $ do
+ skipSpaces
+ level <- many1 (char '=') >>= return . length
+ guard $ level <= 6
+ skipSpaces
+ content <- B.str <$> manyTill (noneOf "\n") headerEnd
+ return $ B.header level content
+ where
+ headerEnd = try $ skipSpaces >> many (char '=') >> skipSpaces >> newline
+
+unorderedList :: PandocMonad m => Int -> CRLParser m B.Blocks
+unorderedList = list '*' B.bulletList
+
+orderedList :: PandocMonad m => Int -> CRLParser m B.Blocks
+orderedList = list '#' B.orderedList
+
+anyList :: PandocMonad m => Int -> CRLParser m B.Blocks
+anyList n = unorderedList n <|> orderedList n
+
+anyListItem :: PandocMonad m => Int -> CRLParser m B.Blocks
+anyListItem n = listItem '*' n <|> listItem '#' n
+
+list :: PandocMonad m => Char -> ([B.Blocks] -> B.Blocks) -> Int -> CRLParser m B.Blocks
+list c f n = many1 (itemPlusSublist <|> listItem c n)
+ >>= return . f
+ where itemPlusSublist = try $ listItem c n <+> anyList (n+1)
+
+listItem :: PandocMonad m => Char -> Int -> CRLParser m B.Blocks
+listItem c n = (listStart >> many1Till inline itemEnd)
+ >>= return . B.plain . B.trimInlines .mconcat
+ where
+ listStart = try $ optional newline >> skipSpaces >> count n (char c)
+ >> (lookAhead $ noneOf [c]) >> skipSpaces
+ itemEnd = endOfParaElement <|> nextItem n
+ <|> if n < 3 then nextItem (n+1)
+ else nextItem (n+1) <|> nextItem (n-1)
+ nextItem x = lookAhead $ try $ blankline >> anyListItem x >> return mempty
+
+table :: PandocMonad m => CRLParser m B.Blocks
+table = try $ do
+ headers <- optionMaybe headerRow
+ rows <- many1 row
+ return $ B.simpleTable (fromMaybe [mempty] headers) rows
+ where
+ headerRow = try $ skipSpaces >> many1Till headerCell rowEnd
+ headerCell = B.plain . B.trimInlines . mconcat
+ <$> (string "|=" >> many1Till inline cellEnd)
+ row = try $ skipSpaces >> many1Till cell rowEnd
+ cell = B.plain . B.trimInlines . mconcat
+ <$> (char '|' >> many1Till inline cellEnd)
+ rowEnd = try $ optional (char '|') >> skipSpaces >> newline
+ cellEnd = lookAhead $ try $ char '|' <|> rowEnd
+
+para :: PandocMonad m => CRLParser m B.Blocks
+para = many1Till inline endOfParaElement >>= return . result . mconcat
+ where
+ result content = if F.all (==Space) content
+ then mempty
+ else B.para $ B.trimInlines content
+
+endOfParaElement :: PandocMonad m => CRLParser m ()
+endOfParaElement = lookAhead $ endOfInput <|> endOfPara
+ <|> startOfList <|> startOfTable
+ <|> startOfHeader <|> hr <|> startOfNowiki
+ where
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ startOf :: PandocMonad m => CRLParser m a -> CRLParser m ()
+ startOf p = try $ blankline >> p >> return mempty
+ startOfList = startOf $ anyList 1
+ startOfTable = startOf $ table
+ startOfHeader = startOf header
+ startOfNowiki = startOf nowiki
+ hr = startOf horizontalRule
+
+horizontalRule :: PandocMonad m => CRLParser m B.Blocks
+horizontalRule = try $ skipSpaces >> string "----" >> skipSpaces >> newline
+ >> return B.horizontalRule
+
+--
+-- inline parsers
+--
+
+inline :: PandocMonad m => CRLParser m B.Inlines
+inline = choice [ whitespace
+ , escapedLink
+ , escapedChar
+ , link
+ , inlineNowiki
+ , placeholder
+ , image
+ , forcedLinebreak
+ , bold
+ , finalBold
+ , italics
+ , finalItalics
+ , str
+ , symbol
+ ] <?> "inline"
+
+escapedChar :: PandocMonad m => CRLParser m B.Inlines
+escapedChar = (try $ char '~' >> noneOf "\t\n ") >>= return . B.str . (:[])
+
+escapedLink :: PandocMonad m => CRLParser m B.Inlines
+escapedLink = try $ do
+ char '~'
+ (orig, _) <- uri
+ return $ B.str orig
+
+image :: PandocMonad m => CRLParser m B.Inlines
+image = try $ do
+ (orig, src) <- wikiImg
+ return $ B.image src "" (B.str $ orig)
+ where
+ linkSrc = many $ noneOf "|}\n\r\t"
+ linkDsc = char '|' >> many (noneOf "}\n\r\t")
+ wikiImg = try $ do
+ string "{{"
+ src <- linkSrc
+ dsc <- option "" linkDsc
+ string "}}"
+ return (dsc, src)
+
+link :: PandocMonad m => CRLParser m B.Inlines
+link = try $ do
+ (orig, src) <- uriLink <|> wikiLink
+ return $ B.link src "" orig
+ where
+ linkSrc = many $ noneOf "|]\n\r\t"
+ linkDsc :: PandocMonad m => String -> CRLParser m B.Inlines
+ linkDsc otxt = B.str
+ <$> (try $ option otxt
+ (char '|' >> many (noneOf "]\n\r\t")))
+ linkImg = try $ char '|' >> image
+ wikiLink = try $ do
+ string "[["
+ src <- linkSrc
+ dsc <- linkImg <|> linkDsc src
+ string "]]"
+ return (dsc, src)
+ uriLink = try $ do
+ (orig, src) <- uri
+ return (B.str orig, src)
+
+inlineNowiki :: PandocMonad m => CRLParser m B.Inlines
+inlineNowiki = B.code <$> (start >> manyTill (noneOf "\n\r") end)
+ where
+ start = try $ string "{{{"
+ end = try $ string "}}}" >> (lookAhead $ noneOf "}")
+
+placeholder :: PandocMonad m => CRLParser m B.Inlines
+-- The semantics of the placeholder is basicallly implementation
+-- dependent, so there is no way to DTRT for all cases.
+-- So for now we just drop them.
+placeholder = B.text <$> (try $ string "<<<" >> manyTill anyChar (string ">>>")
+ >> return "")
+
+whitespace :: PandocMonad m => CRLParser m B.Inlines
+whitespace = (lb <|> regsp) >>= return
+ where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+ regsp = try $ skipMany1 spaceChar >> return B.space
+
+linebreak :: PandocMonad m => CRLParser m B.Inlines
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = eof >> return mempty
+ innerNewline = return B.space
+
+symbol :: PandocMonad m => CRLParser m B.Inlines
+symbol = oneOf specialChars >>= return . B.str . (:[])
+
+str :: PandocMonad m => CRLParser m B.Inlines
+str = let strChar = noneOf ("\t\n " ++ specialChars) in
+ many1 strChar >>= return . B.str
+
+bold :: PandocMonad m => CRLParser m B.Inlines
+bold = B.strong . mconcat <$>
+ enclosed (string "**") (try $ string "**") inline
+
+italics :: PandocMonad m => CRLParser m B.Inlines
+italics = B.emph . mconcat <$>
+ enclosed (string "//") (try $ string "//") inline
+
+finalBold :: PandocMonad m => CRLParser m B.Inlines
+finalBold = B.strong . mconcat <$>
+ try (string "**" >> many1Till inline endOfParaElement)
+
+finalItalics :: PandocMonad m => CRLParser m B.Inlines
+finalItalics = B.emph . mconcat <$>
+ try (string "//" >> many1Till inline endOfParaElement)
+
+forcedLinebreak :: PandocMonad m => CRLParser m B.Inlines
+forcedLinebreak = try $ string "\\\\" >> return B.linebreak