diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/App.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Class.hs | 77 | ||||
-rw-r--r-- | src/Text/Pandoc/MediaBag.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 49 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 20 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 188 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Muse.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/TikiWiki.hs | 658 | ||||
-rw-r--r-- | src/Text/Pandoc/Templates.hs | 20 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Shared.hs | 2 |
12 files changed, 884 insertions, 159 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 68bdc1432..0d4a82b70 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -218,7 +218,8 @@ convertWithOpts opts = do templ <- case optTemplate opts of _ | not standalone -> return Nothing Nothing -> do - deftemp <- getDefaultTemplate datadir format + deftemp <- runIO $ + getDefaultTemplate datadir format case deftemp of Left e -> E.throwIO e Right t -> return (Just t) @@ -991,10 +992,10 @@ options = , Option "D" ["print-default-template"] (ReqArg (\arg _ -> do - templ <- getDefaultTemplate Nothing arg + templ <- runIO $ getDefaultTemplate Nothing arg case templ of Right t -> UTF8.hPutStr stdout t - Left e -> E.throwIO $ PandocAppError (show e) + Left e -> E.throwIO e exitSuccess) "FORMAT") "" -- "Print default template for FORMAT" diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 120ba8fee..46e300953 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -5,7 +5,8 @@ {-# LANGUAGE FlexibleContexts #-} {- -Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2016-17 Jesse Rosenthal <jrosenthal@jhu.edu> +and John MacFarlane. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Class - Copyright : Copyright (C) 2016 Jesse Rosenthal + Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> @@ -60,6 +61,7 @@ module Text.Pandoc.Class ( PandocMonad(..) , PandocPure(..) , FileTree(..) , FileInfo(..) + , addToFileTree , runIO , runIOorExplode , runPure @@ -101,7 +103,8 @@ import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified System.Environment as IO (lookupEnv) import System.FilePath.Glob (match, compile) -import System.Directory (createDirectoryIfMissing) +import System.Directory (createDirectoryIfMissing, getDirectoryContents, + doesDirectoryExist) import System.FilePath ((</>), (<.>), takeDirectory, takeExtension, dropExtension, isRelative, normalise) import qualified System.FilePath.Glob as IO (glob) @@ -120,36 +123,64 @@ import qualified Data.Map as M import Text.Pandoc.Error import qualified Debug.Trace +-- | The PandocMonad typeclass contains all the potentially +-- IO-related functions used in pandoc's readers and writers. +-- Instances of this typeclass may implement these functions +-- in IO (as in 'PandocIO') or using an internal state that +-- represents a file system, time, and so on (as in 'PandocPure'). class (Functor m, Applicative m, Monad m, MonadError PandocError m) => PandocMonad m where + -- | Lookup an environment variable. lookupEnv :: String -> m (Maybe String) + -- | Get the current (UTC) time. getCurrentTime :: m UTCTime + -- | Get the locale's time zone. getCurrentTimeZone :: m TimeZone + -- | Return a new generator for random numbers. newStdGen :: m StdGen + -- | Return a new unique integer. newUniqueHash :: m Int + -- | Retrieve contents and mime type from a URL, raising + -- an error on failure. openURL :: String -> m (B.ByteString, Maybe MimeType) + -- | Read the lazy ByteString contents from a file path, + -- raising an error on failure. readFileLazy :: FilePath -> m BL.ByteString + -- | Read the strict ByteString contents from a file path, + -- raising an error on failure. readFileStrict :: FilePath -> m B.ByteString + -- | Read file from specified user data directory or, + -- if not found there, from Cabal data directory. readDataFile :: Maybe FilePath -> FilePath -> m B.ByteString + -- | Return a list of paths that match a glob, relative to + -- the working directory. See 'System.FilePath.Glob' for + -- the glob syntax. glob :: String -> m [FilePath] + -- | Return the modification time of a file. getModificationTime :: FilePath -> m UTCTime + -- | Get the value of the 'CommonState' used by all instances + -- of 'PandocMonad'. getCommonState :: m CommonState + -- | Set the value of the 'CommonState' used by all instances + -- of 'PandocMonad'. + -- | Get the value of a specific field of 'CommonState'. putCommonState :: CommonState -> m () - + -- | Get the value of a specific field of 'CommonState'. getsCommonState :: (CommonState -> a) -> m a getsCommonState f = f <$> getCommonState - + -- | Modify the 'CommonState'. modifyCommonState :: (CommonState -> CommonState) -> m () modifyCommonState f = getCommonState >>= putCommonState . f - + -- Output a log message. + logOutput :: LogMessage -> m () + -- Output a debug message to sterr, using 'Debug.Trace.trace'. + -- Note: this writes to stderr even in pure instances. trace :: String -> m () trace msg = do tracing <- getsCommonState stTrace when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ()) - logOutput :: LogMessage -> m () - --- Functions defined for all PandocMonad instances +-- * Functions defined for all PandocMonad instances setVerbosity :: PandocMonad m => Verbosity -> m () setVerbosity verbosity = @@ -192,10 +223,10 @@ setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps} getResourcePath :: PandocMonad m => m [FilePath] getResourcePath = getsCommonState stResourcePath -getPOSIXTime :: (PandocMonad m) => m POSIXTime +getPOSIXTime :: PandocMonad m => m POSIXTime getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime -getZonedTime :: (PandocMonad m) => m ZonedTime +getZonedTime :: PandocMonad m => m ZonedTime getZonedTime = do t <- getCurrentTime tz <- getCurrentTimeZone @@ -437,7 +468,6 @@ data PureState = PureState { stStdGen :: StdGen , stFiles :: FileTree , stUserDataDir :: FileTree , stCabalDataDir :: FileTree - , stFontFiles :: [FilePath] } instance Default PureState where @@ -452,7 +482,6 @@ instance Default PureState where , stFiles = mempty , stUserDataDir = mempty , stCabalDataDir = mempty - , stFontFiles = [] } @@ -479,6 +508,24 @@ newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo} getFileInfo :: FilePath -> FileTree -> Maybe FileInfo getFileInfo fp tree = M.lookup fp $ unFileTree tree +-- | Add the specified file to the FileTree. If file +-- is a directory, add its contents recursively. +addToFileTree :: FileTree -> FilePath -> IO FileTree +addToFileTree (FileTree treemap) fp = do + isdir <- doesDirectoryExist fp + if isdir + then do -- recursively add contents of directories + let isSpecial ".." = True + isSpecial "." = True + isSpecial _ = False + fs <- (map (fp </>) . filter (not . isSpecial)) <$> getDirectoryContents fp + foldM addToFileTree (FileTree treemap) fs + else do + contents <- B.readFile fp + mtime <- IO.getModificationTime fp + return $ FileTree $ + M.insert fp FileInfo{ infoFileMTime = mtime + , infoFileContents = contents } treemap newtype PandocPure a = PandocPure { unPandocPure :: ExceptT PandocError @@ -542,8 +589,8 @@ instance PandocMonad PandocPure where Nothing -> readDataFile Nothing fname glob s = do - fontFiles <- getsPureState stFontFiles - return (filter (match (compile s)) fontFiles) + FileTree ftmap <- getsPureState stFiles + return $ filter (match (compile s)) $ M.keys ftmap getModificationTime fp = do fps <- getsPureState stFiles diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index d8d6da345..f89c60c9e 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -79,5 +79,5 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(String, MimeType, Int)] mediaDirectory (MediaBag mediamap) = - M.foldWithKey (\fp (mime,contents) -> + M.foldrWithKey (\fp (mime,contents) -> (((Posix.joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 549042d14..0c97d4060 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -247,8 +247,9 @@ instance Monoid a => Monoid (Future s a) where mconcat = liftM mconcat . sequence -- | Parse characters while a predicate is true. -takeWhileP :: Stream [Char] m Char - => (Char -> Bool) -> ParserT [Char] st m [Char] +takeWhileP :: Monad m + => (Char -> Bool) + -> ParserT [Char] st m [Char] takeWhileP f = do -- faster than 'many (satisfy f)' inp <- getInput @@ -262,7 +263,7 @@ takeWhileP f = do -- Parse n characters of input (or the rest of the input if -- there aren't n characters). -takeP :: Stream [Char] m Char => Int -> ParserT [Char] st m [Char] +takeP :: Monad m => Int -> ParserT [Char] st m [Char] takeP n = do guard (n > 0) -- faster than 'count n anyChar' @@ -276,7 +277,7 @@ takeP n = do return xs -- | Parse any line of text -anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] +anyLine :: Monad m => ParserT [Char] st m [Char] anyLine = do -- This is much faster than: -- manyTill anyChar newline @@ -292,13 +293,13 @@ anyLine = do _ -> mzero -- | Parse any line, include the final newline in the output -anyLineNewline :: Stream [Char] m Char => ParserT [Char] st m [Char] +anyLineNewline :: Monad m => ParserT [Char] st m [Char] anyLineNewline = (++ "\n") <$> anyLine -- | Parse indent by specified number of spaces (or equiv. tabs) -indentWith :: Stream [Char] m Char +indentWith :: Stream s m Char => HasReaderOptions st - => Int -> ParserT [Char] st m [Char] + => Int -> ParserT s st m [Char] indentWith num = do tabStop <- getOption readerTabStop if (num < tabStop) @@ -394,9 +395,9 @@ stringAnyCase (x:xs) = do -- | Parse contents of 'str' using 'parser' and return result. parseFromString :: Monad m - => ParserT String st m a + => ParserT [Char] st m a -> String - -> ParserT String st m a + -> ParserT [Char] st m a parseFromString parser str = do oldPos <- getPosition setPosition $ initialPos "chunk" @@ -422,9 +423,9 @@ parseFromString' parser str = do return res -- | Parse raw line block up to and including blank lines. -lineClump :: Stream [Char] m Char => ParserT [Char] st m String +lineClump :: Monad m => ParserT [Char] st m String lineClump = blanklines - <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) + <|> (unlines <$> many1 (notFollowedBy blankline >> anyLine)) -- | Parse a string of characters between an open character -- and a close character, including text between balanced @@ -520,7 +521,7 @@ uriScheme :: Stream s m Char => ParserT s st m String uriScheme = oneOfStringsCI (Set.toList schemes) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) +uri :: Monad m => ParserT [Char] st m (String, String) uri = try $ do scheme <- uriScheme char ':' @@ -625,7 +626,9 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. -withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char]) +withRaw :: Monad m + => ParsecT [Char] st m a + -> ParsecT [Char] st m (a, [Char]) withRaw parser = do pos1 <- getPosition inp <- getInput @@ -786,7 +789,7 @@ charRef = do c <- characterReference return $ Str [c] -lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String +lineBlockLine :: Monad m => ParserT [Char] st m String lineBlockLine = try $ do char '|' char ' ' @@ -796,11 +799,11 @@ lineBlockLine = try $ do continuations <- many (try $ char ' ' >> anyLine) return $ white ++ unwords (line : continuations) -blankLineBlockLine :: Stream [Char] m Char => ParserT [Char] st m Char +blankLineBlockLine :: Stream s m Char => ParserT s st m Char blankLineBlockLine = try (char '|' >> blankline) -- | Parses an RST-style line block and returns a list of strings. -lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String] +lineBlockLines :: Monad m => ParserT [Char] st m [String] lineBlockLines = try $ do lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine)) skipMany1 $ blankline <|> blankLineBlockLine @@ -870,7 +873,7 @@ widthsFromIndices numColumns' indices = -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: (Stream [Char] m Char, HasReaderOptions st, +gridTableWith :: (Monad m, HasReaderOptions st, Functor mf, Applicative mf, Monad mf) => ParserT [Char] st m (mf Blocks) -- ^ Block list parser -> Bool -- ^ Headerless table @@ -879,7 +882,7 @@ gridTableWith blocks headless = tableWith (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter -gridTableWith' :: (Stream [Char] m Char, HasReaderOptions st, +gridTableWith' :: (Monad m, HasReaderOptions st, Functor mf, Applicative mf, Monad mf) => ParserT [Char] st m (mf Blocks) -- ^ Block list parser -> Bool -- ^ Headerless table @@ -919,7 +922,7 @@ gridTableSep :: Stream s m Char => Char -> ParserT s st m Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. -gridTableHeader :: (Stream [Char] m Char, Functor mf, Applicative mf, Monad mf) +gridTableHeader :: (Monad m, Functor mf, Applicative mf, Monad mf) => Bool -- ^ Headerless table -> ParserT [Char] st m (mf Blocks) -> ParserT [Char] st m (mf [Blocks], [Alignment], [Int]) @@ -952,7 +955,7 @@ gridTableRawLine indices = do return (gridTableSplitLine indices line) -- | Parse row of grid table. -gridTableRow :: (Stream [Char] m Char, Functor mf, Applicative mf, Monad mf) +gridTableRow :: (Monad m, Functor mf, Applicative mf, Monad mf) => ParserT [Char] st m (mf Blocks) -> [Int] -> ParserT [Char] st m (mf [Blocks]) @@ -981,8 +984,8 @@ gridTableFooter = blanklines --- -- | Removes the ParsecT layer from the monad transformer stack -readWithM :: (Monad m) - => ParserT [Char] st m a -- ^ parser +readWithM :: Monad m + => ParserT [Char] st m a -- ^ parser -> st -- ^ initial state -> String -- ^ input -> m (Either PandocError a) @@ -998,7 +1001,7 @@ readWith :: Parser [Char] st a readWith p t inp = runIdentity $ readWithM p t inp -- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) +testStringWith :: Show a => ParserT [Char] ParserState Identity a -> [Char] -> IO () diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 0374d27d5..78a2038a4 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -58,6 +58,7 @@ module Text.Pandoc.Readers , readNative , readJSON , readTWiki + , readTikiWiki , readTxt2Tags , readEPUB , readMuse @@ -92,6 +93,7 @@ import Text.Pandoc.Readers.Org import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.TWiki +import Text.Pandoc.Readers.TikiWiki import Text.Pandoc.Readers.Txt2Tags import Text.Pandoc.Shared (mapLeft) import Text.Parsec.Error @@ -126,6 +128,7 @@ readers = [ ("native" , TextReader readNative) ,("latex" , TextReader readLaTeX) ,("haddock" , TextReader readHaddock) ,("twiki" , TextReader readTWiki) + ,("tikiwiki" , TextReader readTikiWiki) ,("docx" , ByteStringReader readDocx) ,("odt" , ByteStringReader readOdt) ,("t2t" , TextReader readTxt2Tags) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 734973e33..3a0d6eb14 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -188,6 +188,7 @@ block = do , pBody , pDiv , pPlain + , pFigure , pRawHtmlBlock ] trace (take 60 $ show $ B.toList res) @@ -553,6 +554,25 @@ pPara = do contents <- trimInlines <$> pInTags "p" inline return $ B.para contents +pFigure :: PandocMonad m => TagParser m Blocks +pFigure = do + TagOpen _ _ <- pSatisfy (matchTagOpen "figure" []) + skipMany pBlank + let pImg = pOptInTag "p" pImage <* skipMany pBlank + pCapt = option mempty $ pInTags "figcaption" inline <* skipMany pBlank + pImgCapt = do + img <- pImg + cap <- pCapt + return (img, cap) + pCaptImg = do + cap <- pCapt + img <- pImg + return (img, cap) + (imgMany, caption) <- pImgCapt <|> pCaptImg + TagClose _ <- pSatisfy (matchTagClose "figure") + let (Image attr _ (url, tit)):_ = B.toList imgMany + return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption + pCodeBlock :: PandocMonad m => TagParser m Blocks pCodeBlock = try $ do TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index a9bafb03b..5877bbbe1 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -37,13 +37,13 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, applyMacros, rawLaTeXInline, rawLaTeXBlock, - macro, inlineCommand ) where import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) +import Control.Monad.Trans (lift) import Data.Char (chr, isAlphaNum, isLetter, ord, isDigit) import Data.Default import Data.Text (Text) @@ -199,77 +199,45 @@ withVerbatimMode parser = do updateState $ \st -> st{ sVerbatimMode = False } return result -rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m String -rawLaTeXBlock = do - lookAhead (try (char '\\' >> letter)) +rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => LP m a -> ParserT String s m String +rawLaTeXParser parser = do inp <- getInput let toks = tokenize $ T.pack inp - let rawblock = do - (_, raw) <- try $ - withRaw (environment <|> macroDef <|> blockCommand) - return raw pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } - res <- runParserT rawblock lstate "source" toks + res <- lift $ runParserT ((,) <$> try (snd <$> withRaw parser) <*> getState) + lstate "source" toks case res of Left _ -> mzero - Right raw -> takeP (T.length (untokenize raw)) - -macro :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m Blocks -macro = do - guardEnabled Ext_latex_macros - lookAhead (char '\\' *> oneOfStrings ["new", "renew", "provide"] *> - oneOfStrings ["command", "environment"]) - inp <- getInput - let toks = tokenize $ T.pack inp - let rawblock = do - (_, raw) <- withRaw $ try macroDef - st <- getState - return (raw, st) - pstate <- getState - let lstate = def{ sOptions = extractReaderOptions pstate - , sMacros = extractMacros pstate } - res <- runParserT rawblock lstate "source" toks - case res of - Left _ -> mzero Right (raw, st) -> do - updateState (updateMacros (const $ sMacros st)) - mempty <$ takeP (T.length (untokenize raw)) + updateState (updateMacros ((sMacros st) <>)) + takeP (T.length (untokenize raw)) applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) => String -> ParserT String s m String -applyMacros s = do - (guardEnabled Ext_latex_macros >> - do let retokenize = doMacros 0 *> (toksToString <$> getInput) +applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> + do let retokenize = doMacros 0 *> + (toksToString <$> many (satisfyTok (const True))) pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate , sMacros = extractMacros pstate } res <- runParserT retokenize lstate "math" (tokenize (T.pack s)) case res of Left e -> fail (show e) - Right s' -> return s') <|> return s + Right s' -> return s' -rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) +rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) => ParserT String s m String +rawLaTeXBlock = do + lookAhead (try (char '\\' >> letter)) + rawLaTeXParser (environment <|> macroDef <|> blockCommand) + +rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m String rawLaTeXInline = do lookAhead (try (char '\\' >> letter) <|> char '$') - inp <- getInput - let toks = tokenize $ T.pack inp - let rawinline = do - (_, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand') - st <- getState - return (raw, st) - pstate <- getState - let lstate = def{ sOptions = extractReaderOptions pstate - , sMacros = extractMacros pstate } - res <- runParserT rawinline lstate "source" toks - case res of - Left _ -> mzero - Right (raw, s) -> do - updateState $ updateMacros (const $ sMacros s) - takeP (T.length (untokenize raw)) + rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines inlineCommand = do @@ -607,6 +575,16 @@ mkImage options src = do return $ imageWith attr (addExtension src defaultExt) "" alt _ -> return $ imageWith attr src "" alt +doxspace :: PandocMonad m => LP m Inlines +doxspace = do + (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty + where startsWithLetter (Tok _ Word t) = + case T.uncons t of + Just (c, _) | isLetter c -> True + _ -> False + startsWithLetter _ = False + + -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" dosiunitx :: PandocMonad m => LP m Inlines dosiunitx = do @@ -1339,13 +1317,28 @@ inlineCommands = M.fromList $ -- fontawesome , ("faCheck", lit "\10003") , ("faClose", lit "\10007") + -- xspace + , ("xspace", doxspace) + -- etoolbox + , ("ifstrequal", ifstrequal) ] +ifstrequal :: PandocMonad m => LP m Inlines +ifstrequal = do + str1 <- tok + str2 <- tok + ifequal <- braced + ifnotequal <- braced + if str1 == str2 + then getInput >>= setInput . (ifequal ++) + else getInput >>= setInput . (ifnotequal ++) + return mempty + coloredInline :: PandocMonad m => String -> LP m Inlines coloredInline stylename = do - skipopts - color <- braced - spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok + skipopts + color <- braced + spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok ttfamily :: PandocMonad m => LP m Inlines ttfamily = (code . stringify . toList) <$> tok @@ -1359,14 +1352,20 @@ rawInlineOr name' fallback = do getRawCommand :: PandocMonad m => Text -> LP m String getRawCommand txt = do - (_, rawargs) <- withRaw - ((if txt == "\\write" - then () <$ satisfyTok isWordTok -- digits - else return ()) *> - skipangles *> - skipopts *> - option "" (try (optional sp *> dimenarg)) *> - many braced) + (_, rawargs) <- withRaw $ + case txt of + "\\write" -> do + void $ satisfyTok isWordTok -- digits + void braced + "\\titleformat" -> do + void braced + skipopts + void $ count 4 braced + _ -> do + skipangles + skipopts + option "" (try (optional sp *> dimenarg)) + void $ many braced return $ T.unpack (txt <> untokenize rawargs) isBlockCommand :: Text -> Bool @@ -1394,6 +1393,7 @@ treatAsBlock = Set.fromList , "newpage" , "clearpage" , "pagebreak" + , "titleformat" ] isInlineCommand :: Text -> Bool @@ -1453,22 +1453,14 @@ begin_ :: PandocMonad m => Text -> LP m () begin_ t = (try $ do controlSeq "begin" spaces - symbol '{' - spaces - Tok _ Word txt <- satisfyTok isWordTok - spaces - symbol '}' + txt <- untokenize <$> braced guard (t == txt)) <?> ("\\begin{" ++ T.unpack t ++ "}") end_ :: PandocMonad m => Text -> LP m () end_ t = (try $ do controlSeq "end" spaces - symbol '{' - spaces - Tok _ Word txt <- satisfyTok isWordTok - spaces - symbol '}' + txt <- untokenize <$> braced guard $ t == txt) <?> ("\\end{" ++ T.unpack t ++ "}") preamble :: PandocMonad m => LP m Blocks @@ -1523,17 +1515,18 @@ authors = try $ do macroDef :: PandocMonad m => LP m Blocks macroDef = do - guardEnabled Ext_latex_macros mempty <$ ((commandDef <|> environmentDef) <* doMacros 0) where commandDef = do (name, macro') <- newcommand - updateState $ \s -> s{ sMacros = M.insert name macro' (sMacros s) } + guardDisabled Ext_latex_macros <|> + updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) }) environmentDef = do (name, macro1, macro2) <- newenvironment - updateState $ \s -> s{ sMacros = - M.insert name macro1 (sMacros s) } - updateState $ \s -> s{ sMacros = - M.insert ("end" <> name) macro2 (sMacros s) } + guardDisabled Ext_latex_macros <|> + do updateState $ \s -> s{ sMacros = + M.insert name macro1 (sMacros s) } + updateState $ \s -> s{ sMacros = + M.insert ("end" <> name) macro2 (sMacros s) } -- @\newenvironment{envname}[n-args][default]{begin}{end}@ -- is equivalent to -- @\newcommand{\envname}[n-args][default]{begin}@ @@ -1568,11 +1561,8 @@ newenvironment = do controlSeq "renewenvironment" <|> controlSeq "provideenvironment" optional $ symbol '*' - symbol '{' - spaces - Tok _ Word name <- satisfyTok isWordTok spaces - symbol '}' + name <- untokenize <$> braced spaces numargs <- option 0 $ try bracketedNum spaces @@ -1640,9 +1630,25 @@ blockCommand = try $ do star <- option "" ("*" <$ symbol '*' <* optional sp) let name' = name <> star let names = ordNub [name', name] - let raw = do - guard $ isBlockCommand name || not (isInlineCommand name) + let rawDefiniteBlock = do + guard $ isBlockCommand name rawBlock "latex" <$> getRawCommand (txt <> star) + -- heuristic: if it could be either block or inline, we + -- treat it if block if we have a sequence of block + -- commands followed by a newline. But we stop if we + -- hit a \startXXX, since this might start a raw ConTeXt + -- environment (this is important because this parser is + -- used by the Markdown reader). + let startCommand = try $ do + Tok _ (CtrlSeq n) _ <- anyControlSeq + guard $ "start" `T.isPrefixOf` n + let rawMaybeBlock = try $ do + guard $ not $ isInlineCommand name + curr <- rawBlock "latex" <$> getRawCommand (txt <> star) + rest <- many $ notFollowedBy startCommand *> blockCommand + lookAhead $ blankline <|> startCommand + return $ curr <> mconcat rest + let raw = rawDefiniteBlock <|> rawMaybeBlock lookupListDefault raw names blockCommands closing :: PandocMonad m => LP m Blocks @@ -1879,16 +1885,12 @@ addImageCaption = walkM go go x = return x coloredBlock :: PandocMonad m => String -> LP m Blocks -coloredBlock stylename = do - skipopts +coloredBlock stylename = try $ do + skipopts color <- braced + notFollowedBy (grouped inline) let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) - inlineContents <|> constructor <$> blockContents - where inlineContents = do - ils <- grouped inline - rest <- inlines - return (para (ils <> rest)) - blockContents = grouped block + constructor <$> grouped block graphicsPath :: PandocMonad m => LP m Blocks graphicsPath = do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ab6a32b78..d7e59c7fd 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -61,8 +61,8 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) -import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline, applyMacros, - macro) +import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, + rawLaTeXInline, applyMacros) import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) @@ -507,7 +507,6 @@ block = do , htmlBlock , table , codeBlockIndented - , latexMacro , rawTeXBlock , lineBlock , blockQuote @@ -1096,13 +1095,6 @@ rawVerbatimBlock = htmlInBalanced isVerbTag isVerbTag (TagOpen "script" _) = True isVerbTag _ = False -latexMacro :: PandocMonad m => MarkdownParser m (F Blocks) -latexMacro = try $ do - guardEnabled Ext_latex_macros - skipNonindentSpaces - res <- macro - return $ return res - rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 1ae73c148..9d967a9de 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -115,11 +115,10 @@ htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String) htmlElement tag = try $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) content <- manyTill anyChar (endtag <|> endofinput) - return (htmlAttrToPandoc attr, trim content) + return (htmlAttrToPandoc attr, content) where endtag = void $ htmlTag (~== TagClose tag) endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof - trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse htmlAttrToPandoc :: [Attribute String] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) @@ -132,7 +131,7 @@ parseHtmlContentWithAttrs :: PandocMonad m => String -> MuseParser m a -> MuseParser m (Attr, [a]) parseHtmlContentWithAttrs tag parser = do (attr, content) <- htmlElement tag - parsedContent <- try $ parseContent content + parsedContent <- try $ parseContent (content ++ "\n") return (attr, parsedContent) where parseContent = parseFromString $ nested $ manyTill parser endOfContent diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs new file mode 100644 index 000000000..4acbaa30b --- /dev/null +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -0,0 +1,658 @@ +{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} + +{- | + Module : Text.Pandoc.Readers.TikiWiki + Copyright : Copyright (C) 2017 Robin Lee Powell + License : GPLv2 + + Maintainer : Robin Lee Powell <robinleepowell@gmail.com> + Stability : alpha + Portability : portable + +Conversion of TikiWiki text to 'Pandoc' document. +-} + +module Text.Pandoc.Readers.TikiWiki ( readTikiWiki + ) where + +import Control.Monad +import Control.Monad.Except (throwError) +import Text.Pandoc.Definition +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (enclosed, nested) +import Text.Printf (printf) +import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.Class (PandocMonad(..), CommonState(..)) +import Text.Pandoc.Shared (crFilter) +import Text.Pandoc.Logging (Verbosity(..)) +import Data.Maybe (fromMaybe) +import Data.List (intercalate) +import qualified Data.Foldable as F +import Data.Text (Text) +import qualified Data.Text as T + +-- | Read TikiWiki from an input string and return a Pandoc document. +readTikiWiki :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readTikiWiki opts s = do + res <- readWithM parseTikiWiki def{ stateOptions = opts } + (T.unpack (crFilter s) ++ "\n\n") + case res of + Left e -> throwError e + Right d -> return d + +type TikiWikiParser = ParserT [Char] ParserState + +-- +-- utility functions +-- + +tryMsg :: PandocMonad m => String -> TikiWikiParser m a -> TikiWikiParser m a +tryMsg msg p = try p <?> msg + +skip :: PandocMonad m => TikiWikiParser m a -> TikiWikiParser m () +skip parser = parser >> return () + +nested :: PandocMonad m => TikiWikiParser m a -> TikiWikiParser m a +nested p = do + nestlevel <- stateMaxNestingLevel <$> getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + +-- +-- main parser +-- + +parseTikiWiki :: PandocMonad m => TikiWikiParser m Pandoc +parseTikiWiki = do + bs <- mconcat <$> many block + spaces + eof + return $ B.doc bs + +block :: PandocMonad m => TikiWikiParser m B.Blocks +block = do + verbosity <- getsCommonState stVerbosity + pos <- getPosition + res <- mempty <$ skipMany1 blankline + <|> blockElements + <|> para + skipMany blankline + when (verbosity >= INFO) $ do + trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) + return res + +blockElements :: PandocMonad m => TikiWikiParser m B.Blocks +blockElements = choice [ table + , hr + , header + , mixedList + , definitionList + , codeMacro + ] + +-- top +-- ---- +-- bottom +-- +-- ---- +-- +hr :: PandocMonad m => TikiWikiParser m B.Blocks +hr = try $ do + string "----" + many (char '-') + newline + return $ B.horizontalRule + +-- ! header +-- +-- !! header level two +-- +-- !!! header level 3 +-- +header :: PandocMonad m => TikiWikiParser m B.Blocks +header = tryMsg "header" $ do + level <- many1 (char '!') >>= return . length + guard $ level <= 6 + skipSpaces + content <- B.trimInlines . mconcat <$> manyTill inline newline + attr <- registerHeader nullAttr content + return $ B.headerWith attr level $ content + +tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks] +tableRow = try $ do +-- row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n")) +-- return $ map (B.plain . mconcat) row + row <- sepBy1 ((many1 $ noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n")) + return $ map B.plain row + where + parseColumn x = do + parsed <- parseFromString (many1 inline) x + return $ mconcat parsed + + + +-- Tables: +-- +-- ||foo|| +-- +-- ||row1-column1|row1-column2||row2-column1|row2-column2|| +-- +-- ||row1-column1|row1-column2 +-- row2-column1|row2-column2|| +-- +-- ||row1-column1|row1-column2 +-- row2-column1|row2-column2||row3-column1|row3-column2|| +-- +-- || Orange | Apple | more +-- Bread | Pie | more +-- Butter | Ice cream | and more || +-- +table :: PandocMonad m => TikiWikiParser m B.Blocks +table = try $ do + string "||" + rows <- sepBy1 tableRow (try $ string "\n" <|> (string "||" <* notFollowedBy (string "\n"))) + string "||" + newline + -- return $ B.simpleTable (headers rows) $ trace ("rows: " ++ (show rows)) rows + return $ B.simpleTable (headers rows) $ rows + where + -- The headers are as many empty srings as the number of columns + -- in the first row + headers rows = map (B.plain . B.str) $ take (length $ rows !! 0) $ repeat "" + +para :: PandocMonad m => TikiWikiParser m B.Blocks +para = many1Till inline endOfParaElement >>= return . result . mconcat + where + endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + newBlockElement = try $ blankline >> skip blockElements + result content = if F.all (==Space) content + then mempty + else B.para $ B.trimInlines content + +-- ;item 1: definition 1 +-- ;item 2: definition 2-1 +-- + definition 2-2 +-- ;item ''3'': definition ''3'' +-- +definitionList :: PandocMonad m => TikiWikiParser m B.Blocks +definitionList = tryMsg "definitionList" $ do + elements <- many1 $ parseDefinitionListItem + return $ B.definitionList elements + where + parseDefinitionListItem :: PandocMonad m => TikiWikiParser m (B.Inlines, [B.Blocks]) + parseDefinitionListItem = do + skipSpaces >> char ';' <* skipSpaces + term <- many1Till inline $ char ':' <* skipSpaces + line <- listItemLine 1 + return $ (mconcat term, [B.plain line]) + +data ListType = None | Numbered | Bullet deriving (Ord, Eq, Show) + +data ListNesting = LN { lntype :: ListType, lnnest :: Int } deriving (Ord, Eq, Show) + +-- The first argument is a stack (most recent == head) of our list +-- nesting status; the list type and the nesting level; if we're in +-- a number list in a bullet list it'd be +-- [LN Numbered 2, LN Bullet 1] +-- +-- Mixed list example: +-- +-- # one +-- # two +-- ** two point one +-- ** two point two +-- # three +-- # four +-- +mixedList :: PandocMonad m => TikiWikiParser m B.Blocks +mixedList = try $ do + items <- try $ many1 listItem + return $ mconcat $ fixListNesting $ spanFoldUpList (LN None 0) items + +-- See the "Handling Lists" section of DESIGN-CODE for why this +-- function exists. It's to post-process the lists and do some +-- mappends. +-- +-- We need to walk the tree two items at a time, so we can see what +-- we're going to join *to* before we get there. +-- +-- Because of that, it seemed easier to do it by hand than to try to +-- figre out a fold or something. +fixListNesting :: [B.Blocks] -> [B.Blocks] +fixListNesting [] = [] +fixListNesting (first:[]) = [recurseOnList first] +-- fixListNesting nestall | trace ("\n\nfixListNesting: " ++ (show nestall)) False = undefined +-- fixListNesting nestall@(first:second:rest) = +fixListNesting (first:second:rest) = + let secondBlock = head $ B.toList second in + case secondBlock of + BulletList _ -> fixListNesting $ [(mappend (recurseOnList first) (recurseOnList second))] ++ rest + OrderedList _ _ -> fixListNesting $ [(mappend (recurseOnList first) (recurseOnList second))] ++ rest + _ -> [recurseOnList first] ++ fixListNesting (second:rest) + +-- This function walks the Block structure for fixListNesting, +-- because it's a bit complicated, what with converting to and from +-- lists and so on. +recurseOnList :: B.Blocks -> B.Blocks +-- recurseOnList item | trace ("rOL: " ++ (show $ length $ B.toList item) ++ ", " ++ (show $ B.toList item)) False = undefined +recurseOnList items + | (length $ B.toList items) == 1 = + let itemBlock = head $ B.toList items in + case itemBlock of + BulletList listItems -> B.bulletList $ fixListNesting $ map B.fromList listItems + OrderedList _ listItems -> B.orderedList $ fixListNesting $ map B.fromList listItems + _ -> items + + -- The otherwise works because we constructed the blocks, and we + -- know for a fact that no mappends have been run on them; each + -- Blocks consists of exactly one Block. + -- + -- Anything that's not like that has already been processed by + -- fixListNesting; don't bother to process it again. + | otherwise = items + + +-- Turn the list if list items into a tree by breaking off the first +-- item, splitting the remainder of the list into items that are in +-- the tree of the first item and those that aren't, wrapping the +-- tree of the first item in its list time, and recursing on both +-- sections. +spanFoldUpList :: ListNesting -> [(ListNesting, B.Blocks)] -> [B.Blocks] +spanFoldUpList _ [] = [] +spanFoldUpList ln (first:[]) = + listWrap ln (fst first) [snd first] +spanFoldUpList ln (first:rest) = + let (span1, span2) = span (splitListNesting (fst first)) rest + newTree1 = listWrap ln (fst first) $ [snd first] ++ spanFoldUpList (fst first) span1 + newTree2 = spanFoldUpList ln span2 + in + newTree1 ++ newTree2 + +-- Decide if the second item should be in the tree of the first +-- item, which is true if the second item is at a deeper nesting +-- level and of the same type. +splitListNesting :: ListNesting -> (ListNesting, B.Blocks) -> Bool +splitListNesting ln1 (ln2, _) = + if (lnnest ln1) < (lnnest ln2) then + True + else + if ln1 == ln2 then + True + else + False + +-- If we've moved to a deeper nesting level, wrap the new level in +-- the appropriate type of list. +listWrap :: ListNesting -> ListNesting -> [B.Blocks] -> [B.Blocks] +listWrap upperLN curLN retTree = + if upperLN == curLN then + retTree + else + case lntype curLN of + None -> [] + Bullet -> [B.bulletList retTree] + Numbered -> [B.orderedList retTree] + +listItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks) +listItem = choice [ + bulletItem + , numberedItem + ] + + +-- * Start each line +-- * with an asterisk (*). +-- ** More asterisks gives deeper +-- *** and deeper levels. +-- +bulletItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks) +bulletItem = try $ do + prefix <- many1 $ char '*' + many1 $ char ' ' + content <- listItemLine (length prefix) + return $ (LN Bullet (length prefix), B.plain content) + +-- # Start each line +-- # with a number (1.). +-- ## More number signs gives deeper +-- ### and deeper +-- +numberedItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks) +numberedItem = try $ do + prefix <- many1 $ char '#' + many1 $ char ' ' + content <- listItemLine (length prefix) + return $ (LN Numbered (length prefix), B.plain content) + +listItemLine :: PandocMonad m => Int -> TikiWikiParser m B.Inlines +listItemLine nest = lineContent >>= parseContent >>= return + where + lineContent = do + content <- anyLine + continuation <- optionMaybe listContinuation + return $ filterSpaces content ++ "\n" ++ (maybe "" id continuation) + filterSpaces = reverse . dropWhile (== ' ') . reverse + listContinuation = string (take nest (repeat '+')) >> lineContent + parseContent x = do + parsed <- parseFromString (many1 inline) x + return $ mconcat parsed + +-- Turn the CODE macro attributes into Pandoc code block attributes. +mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)]) +mungeAttrs rawAttrs = ("", classes, rawAttrs) + where + -- "colors" is TikiWiki CODE macro for "name of language to do + -- highlighting for"; turn the value into a class + color = fromMaybe "" $ lookup "colors" rawAttrs + -- ln = 1 means line numbering. It's also the default. So we + -- emit numberLines as a class unless ln = 0 + lnRaw = fromMaybe "1" $ lookup "ln" rawAttrs + ln = if lnRaw == "0" then + "" + else + "numberLines" + classes = filter (/= "") [color, ln] + +codeMacro :: PandocMonad m => TikiWikiParser m B.Blocks +codeMacro = try $ do + string "{CODE(" + rawAttrs <- macroAttrs + string ")}" + body <- manyTill anyChar (try (string "{CODE}")) + newline + if length rawAttrs > 0 + then + return $ B.codeBlockWith (mungeAttrs rawAttrs) body + else + return $ B.codeBlock body + + +-- +-- inline parsers +-- + +inline :: PandocMonad m => TikiWikiParser m B.Inlines +inline = choice [ whitespace + , noparse + , strong + , emph + , nbsp + , image + , htmlComment + , strikeout + , code + , wikiLink + , notExternalLink + , externalLink + , superTag + , superMacro + , subTag + , subMacro + , escapedChar + , colored + , centered + , underlined + , boxed + , breakChars + , str + , symbol + ] <?> "inline" + +whitespace :: PandocMonad m => TikiWikiParser m B.Inlines +whitespace = (lb <|> regsp) >>= return + where lb = try $ skipMany spaceChar >> linebreak >> return B.space + regsp = try $ skipMany1 spaceChar >> return B.space + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +nbsp :: PandocMonad m => TikiWikiParser m B.Inlines +nbsp = try $ do + string "~hs~" + return $ B.str " NOT SUPPORTED BEGIN: ~hs~ (non-breaking space) :END " + +-- UNSUPPORTED, as the desired behaviour (that the data be +-- *retained* and stored as a comment) doesn't exist in calibre, and +-- silently throwing data out seemed bad. +htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines +htmlComment = try $ do + string "~hc~" + inner <- many1 $ noneOf "~" + string "~/hc~" + return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END " + +linebreak :: PandocMonad m => TikiWikiParser m B.Inlines +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = eof >> return mempty + innerNewline = return B.space + +between :: (Monoid c, PandocMonad m, Show b) => TikiWikiParser m a -> TikiWikiParser m b -> (TikiWikiParser m b -> TikiWikiParser m c) -> TikiWikiParser m c +between start end p = + mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) + +enclosed :: (Monoid b, PandocMonad m, Show a) => TikiWikiParser m a -> (TikiWikiParser m a -> TikiWikiParser m b) -> TikiWikiParser m b +enclosed sep p = between sep (try $ sep <* endMarker) p + where + endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|'_") <|> eof + endSpace = (spaceChar <|> newline) >> return B.space + + +nestedInlines :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m B.Inlines +nestedInlines end = innerSpace <|> nestedInline + where + innerSpace = try $ whitespace <* (notFollowedBy end) + nestedInline = notFollowedBy whitespace >> nested inline + +-- {img attId="39" imalign="right" link="http://info.tikiwiki.org" alt="Panama Hat"} +-- +-- {img attId="37", thumb="mouseover", styleimage="border", desc="150"} +-- +-- {img src="img/wiki_up/393px-Pears.jpg" thumb="y" imalign="center" stylebox="border" button="y" desc="Pretty pears" max="200" rel="box"} +-- +image :: PandocMonad m => TikiWikiParser m B.Inlines +image = try $ do + string "{img " + rawAttrs <- sepEndBy1 imageAttr spaces + string "}" + let src = fromMaybe "" $ lookup "src" rawAttrs + let title = fromMaybe src $ lookup "desc" rawAttrs + let alt = fromMaybe title $ lookup "alt" rawAttrs + let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs + if length src > 0 + then + return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt) + else + return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ (printAttrs rawAttrs) ++ "} :END " + where + printAttrs attrs = intercalate " " $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs + +imageAttr :: PandocMonad m => TikiWikiParser m (String, String) +imageAttr = try $ do + key <- many1 (noneOf "=} \t\n") + char '=' + optional $ char '"' + value <- many1 (noneOf "}\"\n") + optional $ char '"' + optional $ char ',' + return (key, value) + + +-- __strong__ +strong :: PandocMonad m => TikiWikiParser m B.Inlines +strong = try $ enclosed (string "__") nestedInlines >>= return . B.strong + +-- ''emph'' +emph :: PandocMonad m => TikiWikiParser m B.Inlines +emph = try $ enclosed (string "''") nestedInlines >>= return . B.emph + +-- ~246~ +escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines +escapedChar = try $ do + string "~" + inner <- many1 $ oneOf "0123456789" + string "~" + return $ B.str $ [(toEnum ((read inner) :: Int)) :: Char] + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +centered :: PandocMonad m => TikiWikiParser m B.Inlines +centered = try $ do + string "::" + inner <- many1 $ noneOf ":\n" + string "::" + return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END " + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +colored :: PandocMonad m => TikiWikiParser m B.Inlines +colored = try $ do + string "~~" + inner <- many1 $ noneOf "~\n" + string "~~" + return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END " + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +underlined :: PandocMonad m => TikiWikiParser m B.Inlines +underlined = try $ do + string "===" + inner <- many1 $ noneOf "=\n" + string "===" + return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END " + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +boxed :: PandocMonad m => TikiWikiParser m B.Inlines +boxed = try $ do + string "^" + inner <- many1 $ noneOf "^\n" + string "^" + return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END " + +-- --text-- +strikeout :: PandocMonad m => TikiWikiParser m B.Inlines +strikeout = try $ enclosed (string "--") nestedInlines >>= return . B.strikeout + +nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String +nestedString end = innerSpace <|> (count 1 nonspaceChar) + where + innerSpace = try $ many1 spaceChar <* notFollowedBy end + +breakChars :: PandocMonad m => TikiWikiParser m B.Inlines +breakChars = try $ string "%%%" >> return B.linebreak + +-- superscript: foo{TAG(tag=>sup)}super{TAG}foo / bar{SUP()}super2{SUP}bar +superTag :: PandocMonad m => TikiWikiParser m B.Inlines +superTag = try $ between (string "{TAG(tag=>sup)}") (string "{TAG}") nestedString >>= return . B.superscript . B.text . fromEntities + +superMacro :: PandocMonad m => TikiWikiParser m B.Inlines +superMacro = try $ do + string "{SUP(" + manyTill anyChar (string ")}") + body <- manyTill anyChar (string "{SUP}") + return $ B.superscript $ B.text body + +-- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux +subTag :: PandocMonad m => TikiWikiParser m B.Inlines +subTag = try $ between (string "{TAG(tag=>sub)}") (string "{TAG}") nestedString >>= return . B.subscript . B.text . fromEntities + +subMacro :: PandocMonad m => TikiWikiParser m B.Inlines +subMacro = try $ do + string "{SUB(" + manyTill anyChar (string ")}") + body <- manyTill anyChar (string "{SUB}") + return $ B.subscript $ B.text body + +-- -+text+- +code :: PandocMonad m => TikiWikiParser m B.Inlines +code = try $ between (string "-+") (string "+-") nestedString >>= return . B.code . fromEntities + +macroAttr :: PandocMonad m => TikiWikiParser m (String, String) +macroAttr = try $ do + key <- many1 (noneOf "=)") + char '=' + optional $ char '"' + value <- many1 (noneOf " )\"") + optional $ char '"' + return (key, value) + +macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)] +macroAttrs = try $ do + attrs <- sepEndBy macroAttr spaces + return attrs + +-- ~np~ __not bold__ ~/np~ +noparse :: PandocMonad m => TikiWikiParser m B.Inlines +noparse = try $ do + string "~np~" + body <- manyTill anyChar (string "~/np~") + return $ B.str body + +str :: PandocMonad m => TikiWikiParser m B.Inlines +str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str + +symbol :: PandocMonad m => TikiWikiParser m B.Inlines +symbol = count 1 nonspaceChar >>= return . B.str + +-- [[not a link] +notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines +notExternalLink = try $ do + start <- string "[[" + body <- many (noneOf "\n[]") + end <- string "]" + return $ B.text (start ++ body ++ end) + +-- [http://www.somesite.org url|Some Site title] +-- ((internal link)) +-- +-- The ((...)) wiki links and [...] external links are handled +-- exactly the same; this abstracts that out +makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines +makeLink start middle end = try $ do + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (url, title, anchor) <- wikiLinkText start middle end + parsedTitle <- parseFromString (many1 inline) title + setState $ st{ stateAllowLinks = True } + return $ B.link (url++anchor) "" $ mconcat $ parsedTitle + +wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String) +wikiLinkText start middle end = do + string start + url <- many1 (noneOf $ middle ++ "\n") + seg1 <- option url linkContent + seg2 <- option "" linkContent + string end + if seg2 /= "" + then + return (url, seg2, seg1) + else + return (url, seg1, "") + where + linkContent = do + (char '|') + mystr <- many (noneOf middle) + return $ mystr + +externalLink :: PandocMonad m => TikiWikiParser m B.Inlines +externalLink = makeLink "[" "]|" "]" + +-- NB: this wiki linking is unlikely to work for anyone besides me +-- (rlpowell); it happens to work for me because my Hakyll code has +-- post-processing that treats pandoc .md titles as valid link +-- targets, so something like +-- [see also this other post](My Other Page) is perfectly valid. +wikiLink :: PandocMonad m => TikiWikiParser m B.Inlines +wikiLink = makeLink "((" ")|" "))" + diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 1a26b7168..516cc4b2f 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -38,28 +38,28 @@ module Text.Pandoc.Templates ( module Text.DocTemplates , getDefaultTemplate ) where -import qualified Control.Exception as E (IOException, try) import Control.Monad.Except (throwError) import Data.Aeson (ToJSON (..)) import qualified Data.Text as T import System.FilePath ((<.>), (</>)) import Text.DocTemplates (Template, TemplateTarget, applyTemplate, compileTemplate, renderTemplate, varListToJSON) -import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Class (PandocMonad(readDataFile)) import Text.Pandoc.Error -import Text.Pandoc.Shared (readDataFileUTF8) +import qualified Text.Pandoc.UTF8 as UTF8 -- | Get default template for the specified writer. -getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first +getDefaultTemplate :: PandocMonad m + => (Maybe FilePath) -- ^ User data directory to search 1st -> String -- ^ Name of writer - -> IO (Either E.IOException String) + -> m String getDefaultTemplate user writer = do let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions case format of - "native" -> return $ Right "" - "json" -> return $ Right "" - "docx" -> return $ Right "" - "fb2" -> return $ Right "" + "native" -> return "" + "json" -> return "" + "docx" -> return "" + "fb2" -> return "" "odt" -> getDefaultTemplate user "opendocument" "html" -> getDefaultTemplate user "html5" "docbook" -> getDefaultTemplate user "docbook5" @@ -70,7 +70,7 @@ getDefaultTemplate user writer = do "markdown_mmd" -> getDefaultTemplate user "markdown" "markdown_phpextra" -> getDefaultTemplate user "markdown" _ -> let fname = "templates" </> "default" <.> format - in E.try $ readDataFileUTF8 user fname + in UTF8.toString <$> readDataFile user fname -- | Like 'applyTemplate', but runs in PandocMonad and -- raises an error if compilation fails. diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 2047285eb..3f612f40a 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -84,7 +84,7 @@ metaToJSON' blockWriter inlineWriter (Meta metamap) = do renderedMap <- Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap - return $ M.foldWithKey defField (Object H.empty) renderedMap + return $ M.foldrWithKey defField (Object H.empty) renderedMap -- | Add variables to JSON object, replacing any existing values. -- Also include @meta-json@, a field containing a string representation |