diff options
Diffstat (limited to 'src')
68 files changed, 4316 insertions, 3415 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index d83fa85e7..3d28dbfb9 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-} +{-# LANGUAGE ScopedTypeVariables, FlexibleInstances, GADTs #-} {- Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> @@ -38,12 +38,11 @@ inline links: > module Main where > import Text.Pandoc > -> markdownToRST :: String -> String +> markdownToRST :: String -> Either PandocError String > markdownToRST = -> writeRST def {writerReferenceLinks = True} . -> handleError . readMarkdown def +> writeRST def {writerReferenceLinks = True} . readMarkdown def > -> main = getContents >>= putStrLn . markdownToRST +> main = getContents >>= either error return markdownToRST >>= putStrLn Note: all of the readers assume that the input text has @'\n'@ line endings. So if you get your input text from a web form, @@ -59,14 +58,19 @@ module Text.Pandoc , module Text.Pandoc.Generic -- * Options , module Text.Pandoc.Options + -- * Typeclass + , PandocMonad + , runIO + , runPure + , runIOorExplode -- * Error handling , module Text.Pandoc.Error -- * Lists of readers and writers , readers + -- , writers , writers -- * Readers: converting /to/ Pandoc format , Reader (..) - , mkStringReader , readDocx , readOdt , readMarkdown @@ -84,22 +88,30 @@ module Text.Pandoc , readJSON , readTWiki , readTxt2Tags - , readTxt2TagsNoMacros , readEPUB -- * Writers: converting /from/ Pandoc format - , Writer (..) + , Writer(..) , writeNative , writeJSON , writeMarkdown , writePlain , writeRST , writeLaTeX + , writeBeamer , writeConTeXt , writeTexinfo - , writeHtml - , writeHtmlString + , writeHtml4 + , writeHtml4String + , writeHtml5 + , writeHtml5String + , writeRevealJs + , writeS5 + , writeSlidy + , writeSlideous + , writeDZSlides , writeICML - , writeDocbook + , writeDocbook4 + , writeDocbook5 , writeOPML , writeOpenDocument , writeMan @@ -110,7 +122,8 @@ module Text.Pandoc , writeRTF , writeODT , writeDocx - , writeEPUB + , writeEPUB2 + , writeEPUB3 , writeFB2 , writeOrg , writeAsciiDoc @@ -124,13 +137,11 @@ module Text.Pandoc , getReader , getWriter , getDefaultExtensions - , ToJsonFilter(..) , pandocVersion ) where import Text.Pandoc.Definition import Text.Pandoc.Generic -import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.CommonMark import Text.Pandoc.Readers.MediaWiki @@ -177,20 +188,19 @@ import Text.Pandoc.Writers.Custom import Text.Pandoc.Writers.TEI import Text.Pandoc.Templates import Text.Pandoc.Options -import Text.Pandoc.Shared (safeRead, warn, mapLeft, pandocVersion) -import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Shared (safeRead, mapLeft, pandocVersion) import Text.Pandoc.Error +import Text.Pandoc.Class import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) -import Data.Set (Set) -import qualified Data.Set as Set import Text.Parsec import Text.Parsec.Error import qualified Text.Pandoc.UTF8 as UTF8 +import Control.Monad.Except (throwError) parseFormatSpec :: String - -> Either ParseError (String, Set Extension -> Set Extension) + -> Either ParseError (String, Extensions -> Extensions) parseFormatSpec = parse formatSpec "" where formatSpec = do name <- formatName @@ -206,146 +216,131 @@ parseFormatSpec = parse formatSpec "" | name == "lhs" -> return Ext_literate_haskell | otherwise -> fail $ "Unknown extension: " ++ name return $ case polarity of - '-' -> Set.delete ext - _ -> Set.insert ext + '-' -> disableExtension ext + _ -> enableExtension ext - -data Reader = StringReader (ReaderOptions -> String -> IO (Either PandocError Pandoc)) - | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Either PandocError (Pandoc,MediaBag))) - -mkStringReader :: (ReaderOptions -> String -> Either PandocError Pandoc) -> Reader -mkStringReader r = StringReader (\o s -> return $ r o s) - -mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader -mkStringReaderWithWarnings r = StringReader $ \o s -> - case r o s of - Left err -> return $ Left err - Right (doc, warnings) -> do - mapM_ warn warnings - return (Right doc) - -mkBSReader :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)) -> Reader -mkBSReader r = ByteStringReader (\o s -> return $ r o s) - -mkBSReaderWithWarnings :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag, [String])) -> Reader -mkBSReaderWithWarnings r = ByteStringReader $ \o s -> - case r o s of - Left err -> return $ Left err - Right (doc, mediaBag, warnings) -> do - mapM_ warn warnings - return $ Right (doc, mediaBag) +data Reader m = StringReader (ReaderOptions -> String -> m Pandoc) + | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc) -- | Association list of formats and readers. -readers :: [(String, Reader)] -readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) - ,("json" , mkStringReader readJSON ) - ,("markdown" , mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("markdown_strict" , mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("commonmark" , mkStringReader readCommonMark) - ,("rst" , mkStringReaderWithWarnings readRSTWithWarnings ) - ,("mediawiki" , mkStringReader readMediaWiki) - ,("docbook" , mkStringReader readDocBook) - ,("opml" , mkStringReader readOPML) - ,("org" , mkStringReader readOrg) - ,("textile" , mkStringReader readTextile) -- TODO : textile+lhs - ,("html" , mkStringReader readHtml) - ,("latex" , mkStringReader readLaTeX) - ,("haddock" , mkStringReader readHaddock) - ,("twiki" , mkStringReader readTWiki) - ,("docx" , mkBSReaderWithWarnings readDocxWithWarnings) - ,("odt" , mkBSReader readOdt) - ,("t2t" , mkStringReader readTxt2TagsNoMacros) - ,("epub" , mkBSReader readEPUB) +readers :: PandocMonad m => [(String, Reader m)] +readers = [ ("native" , StringReader readNative) + ,("json" , StringReader $ \o s -> + case readJSON o s of + Right doc -> return doc + Left _ -> throwError $ PandocParseError "JSON parse error") + ,("markdown" , StringReader readMarkdown) + ,("markdown_strict" , StringReader readMarkdown) + ,("markdown_phpextra" , StringReader readMarkdown) + ,("markdown_github" , StringReader readMarkdown) + ,("markdown_mmd", StringReader readMarkdown) + ,("commonmark" , StringReader readCommonMark) + ,("rst" , StringReader readRST) + ,("mediawiki" , StringReader readMediaWiki) + ,("docbook" , StringReader readDocBook) + ,("opml" , StringReader readOPML) + ,("org" , StringReader readOrg) + ,("textile" , StringReader readTextile) -- TODO : textile+lhs + ,("html" , StringReader readHtml) + ,("latex" , StringReader readLaTeX) + ,("haddock" , StringReader readHaddock) + ,("twiki" , StringReader readTWiki) + ,("docx" , ByteStringReader readDocx) + ,("odt" , ByteStringReader readOdt) + ,("t2t" , StringReader readTxt2Tags) + ,("epub" , ByteStringReader readEPUB) ] -data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) - | IOStringWriter (WriterOptions -> Pandoc -> IO String) - | IOByteStringWriter (WriterOptions -> Pandoc -> IO BL.ByteString) +data Writer m = StringWriter (WriterOptions -> Pandoc -> m String) + | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString) -- | Association list of formats and writers. -writers :: [ ( String, Writer ) ] +writers :: PandocMonad m => [ ( String, Writer m) ] writers = [ - ("native" , PureStringWriter writeNative) - ,("json" , PureStringWriter writeJSON) - ,("docx" , IOByteStringWriter writeDocx) - ,("odt" , IOByteStringWriter writeODT) - ,("epub" , IOByteStringWriter $ \o -> - writeEPUB o{ writerEpubVersion = Just EPUB2 }) - ,("epub3" , IOByteStringWriter $ \o -> - writeEPUB o{ writerEpubVersion = Just EPUB3 }) - ,("fb2" , IOStringWriter writeFB2) - ,("html" , PureStringWriter writeHtmlString) - ,("html5" , PureStringWriter $ \o -> - writeHtmlString o{ writerHtml5 = True }) - ,("icml" , IOStringWriter writeICML) - ,("s5" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = S5Slides - , writerTableOfContents = False }) - ,("slidy" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = SlidySlides }) - ,("slideous" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = SlideousSlides }) - ,("dzslides" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = DZSlides - , writerHtml5 = True }) - ,("revealjs" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = RevealJsSlides - , writerHtml5 = True }) - ,("docbook" , PureStringWriter writeDocbook) - ,("docbook5" , PureStringWriter $ \o -> - writeDocbook o{ writerDocbook5 = True }) - ,("opml" , PureStringWriter writeOPML) - ,("opendocument" , PureStringWriter writeOpenDocument) - ,("latex" , PureStringWriter writeLaTeX) - ,("beamer" , PureStringWriter $ \o -> - writeLaTeX o{ writerBeamer = True }) - ,("context" , PureStringWriter writeConTeXt) - ,("texinfo" , PureStringWriter writeTexinfo) - ,("man" , PureStringWriter writeMan) - ,("markdown" , PureStringWriter writeMarkdown) - ,("markdown_strict" , PureStringWriter writeMarkdown) - ,("markdown_phpextra" , PureStringWriter writeMarkdown) - ,("markdown_github" , PureStringWriter writeMarkdown) - ,("markdown_mmd" , PureStringWriter writeMarkdown) - ,("plain" , PureStringWriter writePlain) - ,("rst" , PureStringWriter writeRST) - ,("mediawiki" , PureStringWriter writeMediaWiki) - ,("dokuwiki" , PureStringWriter writeDokuWiki) - ,("zimwiki" , PureStringWriter writeZimWiki) - ,("textile" , PureStringWriter writeTextile) - ,("rtf" , IOStringWriter writeRTFWithEmbeddedImages) - ,("org" , PureStringWriter writeOrg) - ,("asciidoc" , PureStringWriter writeAsciiDoc) - ,("haddock" , PureStringWriter writeHaddock) - ,("commonmark" , PureStringWriter writeCommonMark) - ,("tei" , PureStringWriter writeTEI) + ("native" , StringWriter writeNative) + ,("json" , StringWriter $ \o d -> return $ writeJSON o d) + ,("docx" , ByteStringWriter writeDocx) + ,("odt" , ByteStringWriter writeODT) + ,("epub" , ByteStringWriter writeEPUB2) + ,("epub2" , ByteStringWriter writeEPUB2) + ,("epub3" , ByteStringWriter writeEPUB3) + ,("fb2" , StringWriter writeFB2) + ,("html" , StringWriter writeHtml5String) + ,("html4" , StringWriter writeHtml4String) + ,("html5" , StringWriter writeHtml5String) + ,("icml" , StringWriter writeICML) + ,("s5" , StringWriter writeS5) + ,("slidy" , StringWriter writeSlidy) + ,("slideous" , StringWriter writeSlideous) + ,("dzslides" , StringWriter writeDZSlides) + ,("revealjs" , StringWriter writeRevealJs) + ,("docbook" , StringWriter writeDocbook5) + ,("docbook4" , StringWriter writeDocbook4) + ,("docbook5" , StringWriter writeDocbook5) + ,("opml" , StringWriter writeOPML) + ,("opendocument" , StringWriter writeOpenDocument) + ,("latex" , StringWriter writeLaTeX) + ,("beamer" , StringWriter writeBeamer) + ,("context" , StringWriter writeConTeXt) + ,("texinfo" , StringWriter writeTexinfo) + ,("man" , StringWriter writeMan) + ,("markdown" , StringWriter writeMarkdown) + ,("markdown_strict" , StringWriter writeMarkdown) + ,("markdown_phpextra" , StringWriter writeMarkdown) + ,("markdown_github" , StringWriter writeMarkdown) + ,("markdown_mmd" , StringWriter writeMarkdown) + ,("plain" , StringWriter writePlain) + ,("rst" , StringWriter writeRST) + ,("mediawiki" , StringWriter writeMediaWiki) + ,("dokuwiki" , StringWriter writeDokuWiki) + ,("zimwiki" , StringWriter writeZimWiki) + ,("textile" , StringWriter writeTextile) + ,("rtf" , StringWriter writeRTF) + ,("org" , StringWriter writeOrg) + ,("asciidoc" , StringWriter writeAsciiDoc) + ,("haddock" , StringWriter writeHaddock) + ,("commonmark" , StringWriter writeCommonMark) + ,("tei" , StringWriter writeTEI) ] -getDefaultExtensions :: String -> Set Extension +getDefaultExtensions :: String -> Extensions getDefaultExtensions "markdown_strict" = strictExtensions getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = plainExtensions -getDefaultExtensions "org" = Set.fromList [Ext_citations, - Ext_auto_identifiers] -getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers] -getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers, - Ext_native_divs, - Ext_native_spans] +getDefaultExtensions "org" = extensionsFromList + [Ext_citations, + Ext_auto_identifiers] +getDefaultExtensions "html" = extensionsFromList + [Ext_auto_identifiers, + Ext_native_divs, + Ext_native_spans] +getDefaultExtensions "html4" = getDefaultExtensions "html" getDefaultExtensions "html5" = getDefaultExtensions "html" -getDefaultExtensions "epub" = Set.fromList [Ext_raw_html, - Ext_native_divs, - Ext_native_spans, - Ext_epub_html_exts] -getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] +getDefaultExtensions "epub" = extensionsFromList + [Ext_raw_html, + Ext_native_divs, + Ext_native_spans, + Ext_epub_html_exts] +getDefaultExtensions "epub2" = getDefaultExtensions "epub" +getDefaultExtensions "epub3" = getDefaultExtensions "epub" +getDefaultExtensions "latex" = extensionsFromList + [Ext_smart, + Ext_auto_identifiers] +getDefaultExtensions "context" = extensionsFromList + [Ext_smart, + Ext_auto_identifiers] +getDefaultExtensions "textile" = extensionsFromList + [Ext_old_dashes, + Ext_smart, + Ext_auto_identifiers] +getDefaultExtensions _ = extensionsFromList + [Ext_auto_identifiers] -- | Retrieve reader based on formatSpec (format+extensions). -getReader :: String -> Either String Reader +getReader :: PandocMonad m => String -> Either String (Reader m) getReader s = case parseFormatSpec s of Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] @@ -359,32 +354,22 @@ getReader s = r o{ readerExtensions = setExts $ getDefaultExtensions readerName } --- | Retrieve writer based on formatSpec (format+extensions). -getWriter :: String -> Either String Writer +getWriter :: PandocMonad m => String -> Either String (Writer m) getWriter s = case parseFormatSpec s of Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] Right (writerName, setExts) -> case lookup writerName writers of Nothing -> Left $ "Unknown writer: " ++ writerName - Just (PureStringWriter r) -> Right $ PureStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (IOStringWriter r) -> Right $ IOStringWriter $ + Just (StringWriter r) -> Right $ StringWriter $ \o -> r o{ writerExtensions = setExts $ getDefaultExtensions writerName } - Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $ + Just (ByteStringWriter r) -> Right $ ByteStringWriter $ \o -> r o{ writerExtensions = setExts $ getDefaultExtensions writerName } -{-# DEPRECATED toJsonFilter "Use 'toJSONFilter' from 'Text.Pandoc.JSON' instead" #-} --- | Deprecated. Use @toJSONFilter@ from @Text.Pandoc.JSON@ instead. -class ToJSONFilter a => ToJsonFilter a - where toJsonFilter :: a -> IO () - toJsonFilter = toJSONFilter - readJSON :: ReaderOptions -> String -> Either PandocError Pandoc -readJSON _ = mapLeft ParseFailure . eitherDecode' . UTF8.fromStringLazy +readJSON _ = mapLeft PandocParseError . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs new file mode 100644 index 000000000..348da71ba --- /dev/null +++ b/src/Text/Pandoc/Class.hs @@ -0,0 +1,585 @@ +{-# LANGUAGE DeriveFunctor, DeriveDataTypeable, TypeSynonymInstances, +FlexibleInstances, GeneralizedNewtypeDeriving, FlexibleContexts #-} + +{- +Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Class + Copyright : Copyright (C) 2016 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +Typeclass for pandoc readers and writers, allowing both IO and pure instances. +-} + +module Text.Pandoc.Class ( PandocMonad(..) + , CommonState(..) + , PureState(..) + , getPureState + , getsPureState + , putPureState + , modifyPureState + , getPOSIXTime + , getZonedTime + , warning + , warningWithPos + , report + , getLog + , setVerbosity + , getMediaBag + , setMediaBag + , insertMedia + , insertDeferredMedia + , fetchItem + , getInputFiles + , getOutputFile + , PandocIO(..) + , PandocPure(..) + , FileTree(..) + , FileInfo(..) + , runIO + , runIOorExplode + , runPure + , withMediaBag + ) where + +import Prelude hiding (readFile) +import System.Random (StdGen, next, mkStdGen) +import qualified System.Random as IO (newStdGen) +import Codec.Archive.Zip (Archive, fromArchive, emptyArchive) +import Data.Unique (hashUnique) +import qualified Data.Unique as IO (newUnique) +import qualified Text.Pandoc.Shared as IO ( readDataFile + , openURL ) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Compat.Time (UTCTime) +import Text.Pandoc.Options (Verbosity(..)) +import Text.Pandoc.Parsing (ParserT, SourcePos) +import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime) +import Text.Pandoc.MIME (MimeType, getMimeType) +import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds + , posixSecondsToUTCTime + , POSIXTime ) +import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc) +import Network.URI ( escapeURIString, nonStrictRelativeTo, + unEscapeString, parseURIReference, isAllowedInURI, + parseURI, URI(..) ) +import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia) +import qualified Text.Pandoc.MediaBag as MB +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified System.Environment as IO (lookupEnv) +import System.FilePath.Glob (match, compile) +import System.FilePath ((</>), takeExtension, dropExtension) +import qualified System.FilePath.Glob as IO (glob) +import qualified System.Directory as IO (getModificationTime) +import Control.Monad as M (fail) +import Control.Monad.Reader (ReaderT) +import Control.Monad.State +import Control.Monad.Except +import Control.Monad.Writer (WriterT) +import Control.Monad.RWS (RWST) +import Data.Word (Word8) +import Data.Default +import System.IO.Error +import System.IO (stderr) +import qualified Data.Map as M +import Text.Pandoc.Error +import Data.Monoid +import Data.Maybe (catMaybes) +import Text.Printf (printf) + +class (Functor m, Applicative m, Monad m, MonadError PandocError m) + => PandocMonad m where + lookupEnv :: String -> m (Maybe String) + getCurrentTime :: m UTCTime + getCurrentTimeZone :: m TimeZone + newStdGen :: m StdGen + newUniqueHash :: m Int + openURL :: String -> m (B.ByteString, Maybe MimeType) + readFileLazy :: FilePath -> m BL.ByteString + readFileStrict :: FilePath -> m B.ByteString + readDataFile :: Maybe FilePath + -> FilePath + -> m B.ByteString + glob :: String -> m [FilePath] + getModificationTime :: FilePath -> m UTCTime + getCommonState :: m CommonState + putCommonState :: CommonState -> m () + + getsCommonState :: (CommonState -> a) -> m a + getsCommonState f = f <$> getCommonState + + modifyCommonState :: (CommonState -> CommonState) -> m () + modifyCommonState f = getCommonState >>= putCommonState . f + + logOutput :: Verbosity -> String -> m () + +-- Functions defined for all PandocMonad instances + +setVerbosity :: PandocMonad m => Verbosity -> m () +setVerbosity verbosity = + modifyCommonState $ \st -> st{ stVerbosity = verbosity } + +getLog :: PandocMonad m => m [(Verbosity, String)] +getLog = reverse <$> getsCommonState stLog + +warning :: PandocMonad m => String -> m () +warning msg = report WARNING msg + +warningWithPos :: PandocMonad m + => SourcePos + -> String + -> ParserT s st m () +warningWithPos pos msg = lift $ warning $ msg ++ " " ++ show pos + +report :: PandocMonad m => Verbosity -> String -> m () +report level msg = do + verbosity <- getsCommonState stVerbosity + when (level <= verbosity) $ do + logOutput verbosity msg + unless (level == DEBUG) $ + modifyCommonState $ \st -> st{ stLog = (level, msg) : stLog st } + +setMediaBag :: PandocMonad m => MediaBag -> m () +setMediaBag mb = modifyCommonState $ + \st -> st{stDeferredMediaBag = DeferredMediaBag mb mempty} + +getMediaBag :: PandocMonad m => m MediaBag +getMediaBag = do + fetchDeferredMedia + DeferredMediaBag mb' _ <- getsCommonState stDeferredMediaBag + return mb' + +fetchDeferredMedia :: PandocMonad m => m () +fetchDeferredMedia = do + (DeferredMediaBag mb defMedia) <- getsCommonState stDeferredMediaBag + fetchedMedia <- catMaybes <$> mapM fetchMediaItem defMedia + setMediaBag $ foldr + (\(fp, bs, mbMime) mb' -> MB.insertMedia fp mbMime (BL.fromStrict bs) mb') + mb fetchedMedia + +insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m () +insertMedia fp mime bs = do + (DeferredMediaBag mb dm) <- getsCommonState stDeferredMediaBag + let mb' = MB.insertMedia fp mime bs mb + modifyCommonState $ \st -> st{stDeferredMediaBag =DeferredMediaBag mb' dm } + +insertDeferredMedia :: PandocMonad m => FilePath -> m () +insertDeferredMedia fp = do + (DeferredMediaBag mb dm) <- getsCommonState stDeferredMediaBag + modifyCommonState $ + \st -> st{stDeferredMediaBag = DeferredMediaBag mb ((DeferredMediaPath fp) : dm)} + +getInputFiles :: PandocMonad m => m (Maybe [FilePath]) +getInputFiles = getsCommonState stInputFiles + +getOutputFile :: PandocMonad m => m (Maybe FilePath) +getOutputFile = getsCommonState stOutputFile + +getPOSIXTime :: (PandocMonad m) => m POSIXTime +getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime + +getZonedTime :: (PandocMonad m) => m ZonedTime +getZonedTime = do + t <- getCurrentTime + tz <- getCurrentTimeZone + return $ utcToZonedTime tz t + +-- + +newtype DeferredMediaPath = DeferredMediaPath {unDefer :: String} + deriving (Show, Eq) + +data DeferredMediaBag = DeferredMediaBag MediaBag [DeferredMediaPath] + deriving (Show) + +instance Monoid DeferredMediaBag where + mempty = DeferredMediaBag mempty mempty + mappend (DeferredMediaBag mb lst) (DeferredMediaBag mb' lst') = + DeferredMediaBag (mb <> mb') (lst <> lst') + +-- the internal function for downloading individual items. We want to +-- catch errors and return a Nothing with a warning, so we can +-- continue without erroring out. +fetchMediaItem :: PandocMonad m + => DeferredMediaPath + -> m (Maybe (FilePath, B.ByteString, Maybe MimeType)) +fetchMediaItem dfp = + (do (bs, mbmime) <- downloadOrRead Nothing (unDefer dfp) + return $ Just $ (unDefer dfp, bs, mbmime)) + `catchError` + (const $ do warning ("Couldn't access media at " ++ unDefer dfp) + return Nothing) + +data CommonState = CommonState { stLog :: [(Verbosity, String)] + , stDeferredMediaBag :: DeferredMediaBag + , stInputFiles :: Maybe [FilePath] + , stOutputFile :: Maybe FilePath + , stVerbosity :: Verbosity + } + +instance Default CommonState where + def = CommonState { stLog = [] + , stDeferredMediaBag = mempty + , stInputFiles = Nothing + , stOutputFile = Nothing + , stVerbosity = WARNING + } + +runIO :: PandocIO a -> IO (Either PandocError a) +runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma + +withMediaBag :: PandocMonad m => m a -> m (a, MediaBag) +withMediaBag ma = ((,)) <$> ma <*> getMediaBag + +runIOorExplode :: PandocIO a -> IO a +runIOorExplode ma = runIO ma >>= handleError + +newtype PandocIO a = PandocIO { + unPandocIO :: ExceptT PandocError (StateT CommonState IO) a + } deriving ( MonadIO + , Functor + , Applicative + , Monad + , MonadError PandocError + ) + +instance PandocMonad PandocIO where + lookupEnv = liftIO . IO.lookupEnv + getCurrentTime = liftIO IO.getCurrentTime + getCurrentTimeZone = liftIO IO.getCurrentTimeZone + newStdGen = liftIO IO.newStdGen + newUniqueHash = hashUnique <$> (liftIO IO.newUnique) + openURL u = do + eitherRes <- liftIO $ (tryIOError $ IO.openURL u) + case eitherRes of + Right (Right res) -> return res + Right (Left _) -> throwError $ PandocFileReadError u + Left _ -> throwError $ PandocFileReadError u + readFileLazy s = do + eitherBS <- liftIO (tryIOError $ BL.readFile s) + case eitherBS of + Right bs -> return bs + Left _ -> throwError $ PandocFileReadError s + readFileStrict s = do + eitherBS <- liftIO (tryIOError $ B.readFile s) + case eitherBS of + Right bs -> return bs + Left _ -> throwError $ PandocFileReadError s + -- TODO: Make this more sensitive to the different sorts of failure + readDataFile mfp fname = do + eitherBS <- liftIO (tryIOError $ IO.readDataFile mfp fname) + case eitherBS of + Right bs -> return bs + Left _ -> throwError $ PandocFileReadError fname + glob = liftIO . IO.glob + getModificationTime fp = do + eitherMtime <- liftIO (tryIOError $ IO.getModificationTime fp) + case eitherMtime of + Right mtime -> return mtime + Left _ -> throwError $ PandocFileReadError fp + getCommonState = PandocIO $ lift get + putCommonState x = PandocIO $ lift $ put x + logOutput level msg = + liftIO $ UTF8.hPutStrLn stderr $ printf "%-7s %s" (show level) msg + +-- | Specialized version of parseURIReference that disallows +-- single-letter schemes. Reason: these are usually windows absolute +-- paths. +parseURIReference' :: String -> Maybe URI +parseURIReference' s = + case parseURIReference s of + Just u + | length (uriScheme u) > 2 -> Just u + | null (uriScheme u) -> Just u -- protocol-relative + _ -> Nothing + +-- | Fetch an image or other item from the local filesystem or the net. +-- Returns raw content and maybe mime type. +fetchItem :: PandocMonad m + => Maybe String + -> String + -> m (B.ByteString, Maybe MimeType) +fetchItem sourceURL s = do + mediabag <- getMediaBag + case lookupMedia s mediabag of + Just (mime, bs) -> return $ (BL.toStrict bs, Just mime) + Nothing -> downloadOrRead sourceURL s + +downloadOrRead :: PandocMonad m + => Maybe String + -> String + -> m (B.ByteString, Maybe MimeType) +downloadOrRead sourceURL s = do + case (sourceURL >>= parseURIReference' . + ensureEscaped, ensureEscaped s) of + (Just u, s') -> -- try fetching from relative path at source + case parseURIReference' s' of + Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u + Nothing -> openURL s' -- will throw error + (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI + case parseURIReference' s' of + Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon + Nothing -> openURL s' -- will throw error + (Nothing, s') -> + case parseURI s' of -- requires absolute URI + -- We don't want to treat C:/ as a scheme: + Just u' | length (uriScheme u') > 2 -> openURL (show u') + Just u' | uriScheme u' == "file:" -> + readLocalFile $ dropWhile (=='/') (uriPath u') + _ -> readLocalFile fp -- get from local file system + where readLocalFile f = do + cont <- readFileStrict f + return (cont, mime) + httpcolon = URI{ uriScheme = "http:", + uriAuthority = Nothing, + uriPath = "", + uriQuery = "", + uriFragment = "" } + dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#') + fp = unEscapeString $ dropFragmentAndQuery s + mime = case takeExtension fp of + ".gz" -> getMimeType $ dropExtension fp + ".svgz" -> getMimeType $ dropExtension fp ++ ".svg" + x -> getMimeType x + ensureEscaped = escapeURIString isAllowedInURI . map convertSlash + convertSlash '\\' = '/' + convertSlash x = x + +data PureState = PureState { stStdGen :: StdGen + , stWord8Store :: [Word8] -- should be + -- inifinite, + -- i.e. [1..] + , stUniqStore :: [Int] -- should be + -- inifinite and + -- contain every + -- element at most + -- once, e.g. [1..] + , stEnv :: [(String, String)] + , stTime :: UTCTime + , stTimeZone :: TimeZone + , stReferenceDocx :: Archive + , stReferenceODT :: Archive + , stFiles :: FileTree + , stUserDataDir :: FileTree + , stCabalDataDir :: FileTree + , stFontFiles :: [FilePath] + } + +instance Default PureState where + def = PureState { stStdGen = mkStdGen 1848 + , stWord8Store = [1..] + , stUniqStore = [1..] + , stEnv = [("USER", "pandoc-user")] + , stTime = posixSecondsToUTCTime 0 + , stTimeZone = utc + , stReferenceDocx = emptyArchive + , stReferenceODT = emptyArchive + , stFiles = mempty + , stUserDataDir = mempty + , stCabalDataDir = mempty + , stFontFiles = [] + } + + +getPureState :: PandocPure PureState +getPureState = PandocPure $ lift $ lift $ get + +getsPureState :: (PureState -> a) -> PandocPure a +getsPureState f = f <$> getPureState + +putPureState :: PureState -> PandocPure () +putPureState ps= PandocPure $ lift $ lift $ put ps + +modifyPureState :: (PureState -> PureState) -> PandocPure () +modifyPureState f = PandocPure $ lift $ lift $ modify f + + +data FileInfo = FileInfo { infoFileMTime :: UTCTime + , infoFileContents :: B.ByteString + } + +newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo} + deriving (Monoid) + +getFileInfo :: FilePath -> FileTree -> Maybe FileInfo +getFileInfo fp tree = M.lookup fp $ unFileTree tree + + +newtype PandocPure a = PandocPure { + unPandocPure :: ExceptT PandocError + (StateT CommonState (State PureState)) a + } deriving ( Functor + , Applicative + , Monad + , MonadError PandocError + ) + +runPure :: PandocPure a -> Either PandocError a +runPure x = flip evalState def $ + flip evalStateT def $ + runExceptT $ + unPandocPure x + +instance PandocMonad PandocPure where + lookupEnv s = do + env <- getsPureState stEnv + return (lookup s env) + + getCurrentTime = getsPureState stTime + + getCurrentTimeZone = getsPureState stTimeZone + + newStdGen = do + g <- getsPureState stStdGen + let (_, nxtGen) = next g + modifyPureState $ \st -> st { stStdGen = nxtGen } + return g + + newUniqueHash = do + uniqs <- getsPureState stUniqStore + case uniqs of + u : us -> do + modifyPureState $ \st -> st { stUniqStore = us } + return u + _ -> M.fail "uniq store ran out of elements" + openURL _ = throwError $ PandocSomeError "Cannot open URL in PandocPure" + readFileLazy fp = do + fps <- getsPureState stFiles + case infoFileContents <$> getFileInfo fp fps of + Just bs -> return (BL.fromStrict bs) + Nothing -> throwError $ PandocFileReadError fp + readFileStrict fp = do + fps <- getsPureState stFiles + case infoFileContents <$> getFileInfo fp fps of + Just bs -> return bs + Nothing -> throwError $ PandocFileReadError fp + readDataFile Nothing "reference.docx" = do + (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx + readDataFile Nothing "reference.odt" = do + (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT + readDataFile Nothing fname = do + let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname + readFileStrict fname' + readDataFile (Just userDir) fname = do + userDirFiles <- getsPureState stUserDataDir + case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of + Just bs -> return bs + Nothing -> readDataFile Nothing fname + + glob s = do + fontFiles <- getsPureState stFontFiles + return (filter (match (compile s)) fontFiles) + + getModificationTime fp = do + fps <- getsPureState stFiles + case infoFileMTime <$> (getFileInfo fp fps) of + Just tm -> return tm + Nothing -> throwError $ PandocFileReadError fp + + getCommonState = PandocPure $ lift $ get + putCommonState x = PandocPure $ lift $ put x + + logOutput _level _msg = return () + +instance PandocMonad m => PandocMonad (ParserT s st m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + readDataFile mbuserdir = lift . readDataFile mbuserdir + glob = lift . glob + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + logOutput lvl = lift . logOutput lvl + +instance PandocMonad m => PandocMonad (ReaderT r m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + readDataFile mbuserdir = lift . readDataFile mbuserdir + glob = lift . glob + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + logOutput lvl = lift . logOutput lvl + +instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + readDataFile mbuserdir = lift . readDataFile mbuserdir + glob = lift . glob + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + logOutput lvl = lift . logOutput lvl + +instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + readDataFile mbuserdir = lift . readDataFile mbuserdir + glob = lift . glob + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + logOutput lvl = lift . logOutput lvl + +instance PandocMonad m => PandocMonad (StateT st m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + readDataFile mbuserdir = lift . readDataFile mbuserdir + glob = lift . glob + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + logOutput lvl = lift . logOutput lvl + diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 5e26771fe..b624f4cb0 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -33,27 +33,39 @@ module Text.Pandoc.Error (PandocError(..), handleError) where import Text.Parsec.Error import Text.Parsec.Pos hiding (Line) -import GHC.Generics (Generic) import Data.Generics (Typeable) +import GHC.Generics (Generic) import Control.Exception (Exception) +import Text.Pandoc.Shared (err) type Input = String -data PandocError = -- | Generic parse failure - ParseFailure String - -- | Error thrown by a Parsec parser - | ParsecError Input ParseError +data PandocError = PandocFileReadError FilePath + | PandocShouldNeverHappenError String + | PandocSomeError String + | PandocParseError String + | PandocParsecError Input ParseError deriving (Show, Typeable, Generic) + +-- data PandocError = -- | Generic parse failure +-- ParseFailure String +-- -- | Error thrown by a Parsec parser +-- | ParsecError Input ParseError +-- deriving (Show, Typeable, Generic) + instance Exception PandocError --- | An unsafe method to handle `PandocError`s. -handleError :: Either PandocError a -> a -handleError (Right r) = r -handleError (Left err) = - case err of - ParseFailure string -> error string - ParsecError input err' -> +-- | Handle PandocError by exiting with an error message. +handleError :: Either PandocError a -> IO a +handleError (Right r) = return r +handleError (Left e) = + case e of + PandocFileReadError fp -> err 61 $ "problem reading " ++ fp + PandocShouldNeverHappenError s -> err 62 s + PandocSomeError s -> err 63 s + PandocParseError s -> err 64 s + PandocParsecError input err' -> let errPos = errorPos err' errLine = sourceLine errPos errColumn = sourceColumn errPos @@ -63,6 +75,5 @@ handleError (Left err) = ,"\n", replicate (errColumn - 1) ' ' ,"^"] else "" - in error $ "\nError at " ++ show err' - ++ errorInFile + in err 65 $ "\nError at " ++ show err' ++ errorInFile diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs new file mode 100644 index 000000000..d5e59e8e1 --- /dev/null +++ b/src/Text/Pandoc/Extensions.hs @@ -0,0 +1,267 @@ +{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} +{- +Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Extensions + Copyright : Copyright (C) 2012-2016 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Data structures and functions for representing markup extensions. +-} +module Text.Pandoc.Extensions ( Extension(..) + , Extensions + , emptyExtensions + , extensionsFromList + , extensionEnabled + , enableExtension + , disableExtension + , pandocExtensions + , plainExtensions + , strictExtensions + , phpMarkdownExtraExtensions + , githubMarkdownExtensions + , multimarkdownExtensions ) +where +import Data.Bits (testBit, setBit, clearBit) +import Data.Data (Data) +import Data.Typeable (Typeable) +import GHC.Generics (Generic) + +newtype Extensions = Extensions Integer + deriving (Show, Read, Eq, Ord, Data, Typeable, Generic) + +extensionsFromList :: [Extension] -> Extensions +extensionsFromList = foldr enableExtension emptyExtensions + +emptyExtensions :: Extensions +emptyExtensions = Extensions 0 + +extensionEnabled :: Extension -> Extensions -> Bool +extensionEnabled x (Extensions exts) = testBit exts (fromEnum x) + +enableExtension :: Extension -> Extensions -> Extensions +enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x)) + +disableExtension :: Extension -> Extensions -> Extensions +disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x)) + +-- | Individually selectable syntax extensions. +data Extension = + Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes + | Ext_inline_notes -- ^ Pandoc-style inline notes + | Ext_pandoc_title_block -- ^ Pandoc title block + | Ext_yaml_metadata_block -- ^ YAML metadata block + | Ext_mmd_title_block -- ^ Multimarkdown metadata block + | Ext_table_captions -- ^ Pandoc-style table captions + | Ext_implicit_figures -- ^ A paragraph with just an image is a figure + | Ext_simple_tables -- ^ Pandoc-style simple tables + | Ext_multiline_tables -- ^ Pandoc-style multiline tables + | Ext_grid_tables -- ^ Grid tables (pandoc, reST) + | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) + | Ext_citations -- ^ Pandoc/citeproc citations + | Ext_raw_tex -- ^ Allow raw TeX (other than math) + | Ext_raw_html -- ^ Allow raw HTML + | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ + | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] + | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] + | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) + | Ext_fenced_code_blocks -- ^ Parse fenced code blocks + | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks + | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks + | Ext_inline_code_attributes -- ^ Allow attributes on inline code + | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks + | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags + | Ext_native_spans -- ^ Use Span inlines for contents of <span> + | Ext_bracketed_spans -- ^ Bracketed spans with attributes + | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown + -- iff container has attribute 'markdown' + | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak + | Ext_link_attributes -- ^ link and image attributes + | Ext_mmd_link_attributes -- ^ MMD style reference link attributes + | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links + | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters + | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank + | Ext_startnum -- ^ Make start number of ordered list significant + | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php + | Ext_compact_definition_lists -- ^ Definition lists without + -- space between items, and disallow laziness + | Ext_example_lists -- ^ Markdown-style numbered examples + | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable + | Ext_angle_brackets_escapable -- ^ Make < and > escapable + | Ext_intraword_underscores -- ^ Treat underscore inside word as literal + | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote + | Ext_blank_before_header -- ^ Require blank line before a header + | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax + | Ext_superscript -- ^ Superscript using ^this^ syntax + | Ext_subscript -- ^ Subscript using ~this~ syntax + | Ext_hard_line_breaks -- ^ All newlines become hard line breaks + | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored + | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between + -- East Asian wide characters + | Ext_literate_haskell -- ^ Enable literate Haskell conventions + | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions + | Ext_emoji -- ^ Support emoji like :smile: + | Ext_auto_identifiers -- ^ Automatic identifiers for headers + | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers + | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} + | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] + | Ext_implicit_header_references -- ^ Implicit reference links for headers + | Ext_line_blocks -- ^ RST style line blocks + | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML + | Ext_shortcut_reference_links -- ^ Shortcut reference links + | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes + | Ext_old_dashes -- ^ -- = em, - before number = en + deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) + +pandocExtensions :: Extensions +pandocExtensions = extensionsFromList + [ Ext_footnotes + , Ext_inline_notes + , Ext_pandoc_title_block + , Ext_yaml_metadata_block + , Ext_table_captions + , Ext_implicit_figures + , Ext_simple_tables + , Ext_multiline_tables + , Ext_grid_tables + , Ext_pipe_tables + , Ext_citations + , Ext_raw_tex + , Ext_raw_html + , Ext_tex_math_dollars + , Ext_latex_macros + , Ext_fenced_code_blocks + , Ext_fenced_code_attributes + , Ext_backtick_code_blocks + , Ext_inline_code_attributes + , Ext_markdown_in_html_blocks + , Ext_native_divs + , Ext_native_spans + , Ext_bracketed_spans + , Ext_escaped_line_breaks + , Ext_fancy_lists + , Ext_startnum + , Ext_definition_lists + , Ext_example_lists + , Ext_all_symbols_escapable + , Ext_intraword_underscores + , Ext_blank_before_blockquote + , Ext_blank_before_header + , Ext_strikeout + , Ext_superscript + , Ext_subscript + , Ext_auto_identifiers + , Ext_header_attributes + , Ext_link_attributes + , Ext_implicit_header_references + , Ext_line_blocks + , Ext_shortcut_reference_links + , Ext_smart + ] + +plainExtensions :: Extensions +plainExtensions = extensionsFromList + [ Ext_table_captions + , Ext_implicit_figures + , Ext_simple_tables + , Ext_multiline_tables + , Ext_grid_tables + , Ext_latex_macros + , Ext_fancy_lists + , Ext_startnum + , Ext_definition_lists + , Ext_example_lists + , Ext_intraword_underscores + , Ext_blank_before_blockquote + , Ext_blank_before_header + , Ext_strikeout + ] + +phpMarkdownExtraExtensions :: Extensions +phpMarkdownExtraExtensions = extensionsFromList + [ Ext_footnotes + , Ext_pipe_tables + , Ext_raw_html + , Ext_markdown_attribute + , Ext_fenced_code_blocks + , Ext_definition_lists + , Ext_intraword_underscores + , Ext_header_attributes + , Ext_link_attributes + , Ext_abbreviations + , Ext_shortcut_reference_links + ] + +githubMarkdownExtensions :: Extensions +githubMarkdownExtensions = extensionsFromList + [ Ext_angle_brackets_escapable + , Ext_pipe_tables + , Ext_raw_html + , Ext_fenced_code_blocks + , Ext_auto_identifiers + , Ext_ascii_identifiers + , Ext_backtick_code_blocks + , Ext_autolink_bare_uris + , Ext_intraword_underscores + , Ext_strikeout + , Ext_hard_line_breaks + , Ext_emoji + , Ext_lists_without_preceding_blankline + , Ext_shortcut_reference_links + ] + +multimarkdownExtensions :: Extensions +multimarkdownExtensions = extensionsFromList + [ Ext_pipe_tables + , Ext_raw_html + , Ext_markdown_attribute + , Ext_mmd_link_attributes + -- , Ext_raw_tex + -- Note: MMD's raw TeX syntax requires raw TeX to be + -- enclosed in HTML comment + , Ext_tex_math_double_backslash + , Ext_intraword_underscores + , Ext_mmd_title_block + , Ext_footnotes + , Ext_definition_lists + , Ext_all_symbols_escapable + , Ext_implicit_header_references + , Ext_auto_identifiers + , Ext_mmd_header_identifiers + , Ext_implicit_figures + -- Note: MMD's syntax for superscripts and subscripts + -- is a bit more permissive than pandoc's, allowing + -- e^2 and a~1 instead of e^2^ and a~1~, so even with + -- these options we don't have full support for MMD + -- superscripts and subscripts, but there's no reason + -- not to include these: + , Ext_superscript + , Ext_subscript + ] + +strictExtensions :: Extensions +strictExtensions = extensionsFromList + [ Ext_raw_html + , Ext_shortcut_reference_links + ] + diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index e46c91eda..cc22c06ca 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -53,7 +53,7 @@ import Control.Monad import Data.Bits import Data.Binary import Data.Binary.Get -import Text.Pandoc.Shared (safeRead, hush) +import Text.Pandoc.Shared (safeRead) import Data.Default (Default) import Numeric (showFFloat) import Text.Pandoc.Definition @@ -240,7 +240,7 @@ pngSize img = do ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) - _ -> (hush . Left) "PNG parse error" + _ -> Nothing -- "PNG parse error" let (dpix, dpiy) = findpHYs rest'' return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } @@ -269,7 +269,7 @@ gifSize img = do dpiX = 72, dpiY = 72 } - _ -> (hush . Left) "GIF parse error" + _ -> Nothing -- "GIF parse error" jpegSize :: ByteString -> Either String ImageSize jpegSize img = diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index eea25fadf..fe99be5fe 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -42,6 +42,7 @@ import System.Directory (createDirectoryIfMissing) import qualified Data.Map as M import qualified Data.ByteString.Lazy as BL import Control.Monad (when) +import Control.Monad.Trans (MonadIO(..)) import Text.Pandoc.MIME (MimeType, getMimeTypeDef) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Maybe (fromMaybe) @@ -88,11 +89,14 @@ mediaDirectory (MediaBag mediamap) = -- | Extract contents of MediaBag to a given directory. Print informational -- messages if 'verbose' is true. -extractMediaBag :: Bool +-- TODO: eventually we may want to put this into PandocMonad +-- In PandocPure, it could write to the fake file system... +extractMediaBag :: MonadIO m + => Bool -> FilePath -> MediaBag - -> IO () -extractMediaBag verbose dir (MediaBag mediamap) = do + -> m () +extractMediaBag verbose dir (MediaBag mediamap) = liftIO $ do sequence_ $ M.foldWithKey (\fp (_ ,contents) -> ((writeMedia verbose dir (Posix.joinPath fp, contents)):)) [] mediamap diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 48bc5f4eb..02ae9f771 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -29,13 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Data structures and functions for representing parser and writer options. -} -module Text.Pandoc.Options ( Extension(..) - , pandocExtensions - , plainExtensions - , strictExtensions - , phpMarkdownExtraExtensions - , githubMarkdownExtensions - , multimarkdownExtensions +module Text.Pandoc.Options ( module Text.Pandoc.Extensions , ReaderOptions(..) , HTMLMathMethod (..) , CiteMethod (..) @@ -43,6 +37,7 @@ module Text.Pandoc.Options ( Extension(..) , HTMLSlideVariant (..) , EPUBVersion (..) , WrapOption (..) + , Verbosity (..) , TopLevelDivision (..) , WriterOptions (..) , TrackChanges (..) @@ -50,246 +45,37 @@ module Text.Pandoc.Options ( Extension(..) , def , isEnabled ) where -import Data.Set (Set) -import qualified Data.Set as Set +import Text.Pandoc.Extensions import Data.Default import Text.Pandoc.Highlighting (Style, pygments) -import Text.Pandoc.MediaBag (MediaBag) import Data.Data (Data) import Data.Typeable (Typeable) import GHC.Generics (Generic) --- | Individually selectable syntax extensions. -data Extension = - Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes - | Ext_inline_notes -- ^ Pandoc-style inline notes - | Ext_pandoc_title_block -- ^ Pandoc title block - | Ext_yaml_metadata_block -- ^ YAML metadata block - | Ext_mmd_title_block -- ^ Multimarkdown metadata block - | Ext_table_captions -- ^ Pandoc-style table captions - | Ext_implicit_figures -- ^ A paragraph with just an image is a figure - | Ext_simple_tables -- ^ Pandoc-style simple tables - | Ext_multiline_tables -- ^ Pandoc-style multiline tables - | Ext_grid_tables -- ^ Grid tables (pandoc, reST) - | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) - | Ext_citations -- ^ Pandoc/citeproc citations - | Ext_raw_tex -- ^ Allow raw TeX (other than math) - | Ext_raw_html -- ^ Allow raw HTML - | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ - | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] - | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] - | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) - | Ext_fenced_code_blocks -- ^ Parse fenced code blocks - | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks - | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks - | Ext_inline_code_attributes -- ^ Allow attributes on inline code - | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks - | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags - | Ext_native_spans -- ^ Use Span inlines for contents of <span> - | Ext_bracketed_spans -- ^ Bracketed spans with attributes - | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown - -- iff container has attribute 'markdown' - | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak - | Ext_link_attributes -- ^ link and image attributes - | Ext_mmd_link_attributes -- ^ MMD style reference link attributes - | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links - | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters - | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank - | Ext_startnum -- ^ Make start number of ordered list significant - | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php - | Ext_compact_definition_lists -- ^ Definition lists without - -- space between items, and disallow laziness - | Ext_example_lists -- ^ Markdown-style numbered examples - | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable - | Ext_angle_brackets_escapable -- ^ Make < and > escapable - | Ext_intraword_underscores -- ^ Treat underscore inside word as literal - | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote - | Ext_blank_before_header -- ^ Require blank line before a header - | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax - | Ext_superscript -- ^ Superscript using ^this^ syntax - | Ext_subscript -- ^ Subscript using ~this~ syntax - | Ext_hard_line_breaks -- ^ All newlines become hard line breaks - | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored - | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between - -- East Asian wide characters - | Ext_literate_haskell -- ^ Enable literate Haskell conventions - | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions - | Ext_emoji -- ^ Support emoji like :smile: - | Ext_auto_identifiers -- ^ Automatic identifiers for headers - | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers - | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} - | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] - | Ext_implicit_header_references -- ^ Implicit reference links for headers - | Ext_line_blocks -- ^ RST style line blocks - | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML - | Ext_shortcut_reference_links -- ^ Shortcut reference links - deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) - -pandocExtensions :: Set Extension -pandocExtensions = Set.fromList - [ Ext_footnotes - , Ext_inline_notes - , Ext_pandoc_title_block - , Ext_yaml_metadata_block - , Ext_table_captions - , Ext_implicit_figures - , Ext_simple_tables - , Ext_multiline_tables - , Ext_grid_tables - , Ext_pipe_tables - , Ext_citations - , Ext_raw_tex - , Ext_raw_html - , Ext_tex_math_dollars - , Ext_latex_macros - , Ext_fenced_code_blocks - , Ext_fenced_code_attributes - , Ext_backtick_code_blocks - , Ext_inline_code_attributes - , Ext_markdown_in_html_blocks - , Ext_native_divs - , Ext_native_spans - , Ext_bracketed_spans - , Ext_escaped_line_breaks - , Ext_fancy_lists - , Ext_startnum - , Ext_definition_lists - , Ext_example_lists - , Ext_all_symbols_escapable - , Ext_intraword_underscores - , Ext_blank_before_blockquote - , Ext_blank_before_header - , Ext_strikeout - , Ext_superscript - , Ext_subscript - , Ext_auto_identifiers - , Ext_header_attributes - , Ext_link_attributes - , Ext_implicit_header_references - , Ext_line_blocks - , Ext_shortcut_reference_links - ] - -plainExtensions :: Set Extension -plainExtensions = Set.fromList - [ Ext_table_captions - , Ext_implicit_figures - , Ext_simple_tables - , Ext_multiline_tables - , Ext_grid_tables - , Ext_latex_macros - , Ext_fancy_lists - , Ext_startnum - , Ext_definition_lists - , Ext_example_lists - , Ext_intraword_underscores - , Ext_blank_before_blockquote - , Ext_blank_before_header - , Ext_strikeout - ] - -phpMarkdownExtraExtensions :: Set Extension -phpMarkdownExtraExtensions = Set.fromList - [ Ext_footnotes - , Ext_pipe_tables - , Ext_raw_html - , Ext_markdown_attribute - , Ext_fenced_code_blocks - , Ext_definition_lists - , Ext_intraword_underscores - , Ext_header_attributes - , Ext_link_attributes - , Ext_abbreviations - , Ext_shortcut_reference_links - ] - -githubMarkdownExtensions :: Set Extension -githubMarkdownExtensions = Set.fromList - [ Ext_angle_brackets_escapable - , Ext_pipe_tables - , Ext_raw_html - , Ext_fenced_code_blocks - , Ext_auto_identifiers - , Ext_ascii_identifiers - , Ext_backtick_code_blocks - , Ext_autolink_bare_uris - , Ext_intraword_underscores - , Ext_strikeout - , Ext_hard_line_breaks - , Ext_emoji - , Ext_lists_without_preceding_blankline - , Ext_shortcut_reference_links - ] - -multimarkdownExtensions :: Set Extension -multimarkdownExtensions = Set.fromList - [ Ext_pipe_tables - , Ext_raw_html - , Ext_markdown_attribute - , Ext_mmd_link_attributes - -- , Ext_raw_tex - -- Note: MMD's raw TeX syntax requires raw TeX to be - -- enclosed in HTML comment - , Ext_tex_math_double_backslash - , Ext_intraword_underscores - , Ext_mmd_title_block - , Ext_footnotes - , Ext_definition_lists - , Ext_all_symbols_escapable - , Ext_implicit_header_references - , Ext_auto_identifiers - , Ext_mmd_header_identifiers - , Ext_implicit_figures - -- Note: MMD's syntax for superscripts and subscripts - -- is a bit more permissive than pandoc's, allowing - -- e^2 and a~1 instead of e^2^ and a~1~, so even with - -- these options we don't have full support for MMD - -- superscripts and subscripts, but there's no reason - -- not to include these: - , Ext_superscript - , Ext_subscript - ] - -strictExtensions :: Set Extension -strictExtensions = Set.fromList - [ Ext_raw_html - , Ext_shortcut_reference_links - ] - data ReaderOptions = ReaderOptions{ - readerExtensions :: Set Extension -- ^ Syntax extensions - , readerSmart :: Bool -- ^ Smart punctuation + readerExtensions :: Extensions -- ^ Syntax extensions , readerStandalone :: Bool -- ^ Standalone document with header , readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX , readerColumns :: Int -- ^ Number of columns in terminal , readerTabStop :: Int -- ^ Tab stop - , readerOldDashes :: Bool -- ^ Use pandoc <= 1.8.2.1 behavior - -- in parsing dashes; -- is em-dash; - -- - before numerial is en-dash , readerApplyMacros :: Bool -- ^ Apply macros to TeX math , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks , readerDefaultImageExtension :: String -- ^ Default extension for images - , readerTrace :: Bool -- ^ Print debugging info , readerTrackChanges :: TrackChanges - , readerFileScope :: Bool -- ^ Parse before combining } deriving (Show, Read, Data, Typeable, Generic) instance Default ReaderOptions where def = ReaderOptions{ - readerExtensions = pandocExtensions - , readerSmart = False + readerExtensions = emptyExtensions , readerStandalone = False , readerParseRaw = False , readerColumns = 80 , readerTabStop = 4 - , readerOldDashes = False , readerApplyMacros = True , readerIndentedCodeClasses = [] , readerDefaultImageExtension = "" - , readerTrace = False , readerTrackChanges = AcceptChanges - , readerFileScope = False } -- @@ -354,20 +140,22 @@ data ReferenceLocation = EndOfBlock -- ^ End of block | EndOfDocument -- ^ at end of document deriving (Show, Read, Eq, Data, Typeable, Generic) +-- | Verbosity level. +data Verbosity = ERROR | WARNING | INFO | DEBUG + deriving (Show, Read, Eq, Data, Enum, Ord, Bounded, Typeable, Generic) + -- | Options for writers data WriterOptions = WriterOptions { writerTemplate :: Maybe String -- ^ Template to use , writerVariables :: [(String, String)] -- ^ Variables to set in template , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs , writerTableOfContents :: Bool -- ^ Include table of contents - , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous? , writerIncremental :: Bool -- ^ True if lists should be incremental , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML - , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) , writerNumberSections :: Bool -- ^ Number sections in LaTeX , writerNumberOffset :: [Int] -- ^ Starting number for section, subsection, ... , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , writerExtensions :: Set Extension -- ^ Markdown extensions that can be used + , writerExtensions :: Extensions -- ^ Markdown extensions that can be used , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions , writerWrapText :: WrapOption -- ^ Option for wrapping text @@ -378,27 +166,19 @@ data WriterOptions = WriterOptions , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites - , writerDocbook5 :: Bool -- ^ Produce DocBook5 - , writerHtml5 :: Bool -- ^ Produce HTML5 , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML - , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show , writerSlideLevel :: Maybe Int -- ^ Force header level of slides , writerTopLevelDivision :: TopLevelDivision -- ^ Type of top-level divisions , writerListings :: Bool -- ^ Use listings package for code - , writerHighlight :: Bool -- ^ Highlight source code - , writerHighlightStyle :: Style -- ^ Style to use for highlighting + , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting + -- (Nothing = no highlighting) , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown - , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex - , writerEpubVersion :: Maybe EPUBVersion -- ^ Nothing or EPUB version , writerEpubMetadata :: String -- ^ Metadata to include in EPUB , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) , writerTOCDepth :: Int -- ^ Number of levels to include in TOC - , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified - , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified - , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader - , writerVerbose :: Bool -- ^ Verbose debugging output + , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine , writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown } deriving (Show, Data, Typeable, Generic) @@ -408,14 +188,12 @@ instance Default WriterOptions where , writerVariables = [] , writerTabStop = 4 , writerTableOfContents = False - , writerSlideVariant = NoSlides , writerIncremental = False , writerHTMLMathMethod = PlainMath - , writerIgnoreNotes = False , writerNumberSections = False , writerNumberOffset = [0,0,0,0,0,0] , writerSectionDivs = False - , writerExtensions = pandocExtensions + , writerExtensions = emptyExtensions , writerReferenceLinks = False , writerDpi = 96 , writerWrapText = WrapAuto @@ -425,31 +203,22 @@ instance Default WriterOptions where , writerSourceURL = Nothing , writerUserDataDir = Nothing , writerCiteMethod = Citeproc - , writerDocbook5 = False - , writerHtml5 = False , writerHtmlQTags = False - , writerBeamer = False , writerSlideLevel = Nothing , writerTopLevelDivision = TopLevelDefault , writerListings = False - , writerHighlight = False - , writerHighlightStyle = pygments + , writerHighlightStyle = Just pygments , writerSetextHeaders = True - , writerTeXLigatures = True - , writerEpubVersion = Nothing , writerEpubMetadata = "" , writerEpubStylesheet = Nothing , writerEpubFonts = [] , writerEpubChapterLevel = 1 , writerTOCDepth = 3 - , writerReferenceODT = Nothing - , writerReferenceDocx = Nothing - , writerMediaBag = mempty - , writerVerbose = False + , writerReferenceDoc = Nothing , writerLaTeXArgs = [] , writerReferenceLocation = EndOfDocument } -- | Returns True if the given extension is enabled. isEnabled :: Extension -> WriterOptions -> Bool -isEnabled ext opts = ext `Set.member` (writerExtensions opts) +isEnabled ext opts = ext `extensionEnabled` (writerExtensions opts) diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 9faff1816..b3bbcb4f5 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -37,7 +37,7 @@ import qualified Data.ByteString as BS import Data.Monoid ((<>)) import System.Exit (ExitCode (..)) import System.FilePath -import System.IO (stderr, stdout) +import System.IO (stdout) import System.IO.Temp (withTempFile) import System.Directory import Data.Digest.Pure.SHA (showDigest, sha1) @@ -48,31 +48,37 @@ import Data.List (isInfixOf) import Data.Maybe (fromMaybe) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition +import Text.Pandoc.MediaBag import Text.Pandoc.Walk (walkM) -import Text.Pandoc.Shared (fetchItem', warn, withTempDir, inDirectory, - stringify) +import Text.Pandoc.Shared (warn, withTempDir, inDirectory, stringify) import Text.Pandoc.Writers.Shared (getField, metaToJSON) -import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..)) +import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), + Verbosity(..)) import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Process (pipeProcess) +import Control.Monad.Trans (MonadIO(..)) import qualified Data.ByteString.Lazy as BL import qualified Codec.Picture as JP #ifdef _WINDOWS import Data.List (intercalate) #endif +import Text.Pandoc.Class (PandocIO, runIOorExplode, fetchItem, setMediaBag, runIO) #ifdef _WINDOWS changePathSeparators :: FilePath -> FilePath changePathSeparators = intercalate "/" . splitDirectories #endif -makePDF :: String -- ^ pdf creator (pdflatex, lualatex, +makePDF :: MonadIO m + => String -- ^ pdf creator (pdflatex, lualatex, -- xelatex, context, wkhtmltopdf) - -> (WriterOptions -> Pandoc -> String) -- ^ writer + -> (WriterOptions -> Pandoc -> PandocIO String) -- ^ writer -> WriterOptions -- ^ options + -> Verbosity -- ^ verbosity level + -> MediaBag -- ^ media -> Pandoc -- ^ document - -> IO (Either ByteString ByteString) -makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do + -> m (Either ByteString ByteString) +makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = liftIO $ do let mathArgs = case writerHTMLMathMethod opts of -- with MathJax, wait til all math is rendered: MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });", @@ -93,34 +99,40 @@ makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do ,("margin-left", fromMaybe (Just "1.25in") (getField "margin-left" meta')) ] - let source = writer opts doc - html2pdf (writerVerbose opts) args source -makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do - doc' <- handleImages opts tmpdir doc - let source = writer opts doc' - args = writerLaTeXArgs opts - case takeBaseName program of - "context" -> context2pdf (writerVerbose opts) tmpdir source - prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] - -> tex2pdf' (writerVerbose opts) args tmpdir program source - _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program + source <- runIOorExplode $ writer opts doc + html2pdf verbosity args source +makePDF program writer opts verbosity mediabag doc = + liftIO $ withTempDir "tex2pdf." $ \tmpdir -> do + doc' <- handleImages opts mediabag tmpdir doc + source <- runIOorExplode $ writer opts doc' + let args = writerLaTeXArgs opts + case takeBaseName program of + "context" -> context2pdf verbosity tmpdir source + prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] + -> tex2pdf' verbosity args tmpdir program source + _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program handleImages :: WriterOptions + -> MediaBag -> FilePath -- ^ temp dir to store images -> Pandoc -- ^ document -> IO Pandoc -handleImages opts tmpdir = walkM (convertImages tmpdir) <=< walkM (handleImage' opts tmpdir) +handleImages opts mediabag tmpdir = + walkM (convertImages tmpdir) <=< walkM (handleImage' opts mediabag tmpdir) handleImage' :: WriterOptions + -> MediaBag -> FilePath -> Inline -> IO Inline -handleImage' opts tmpdir (Image attr ils (src,tit)) = do +handleImage' opts mediabag tmpdir (Image attr ils (src,tit)) = do exists <- doesFileExist src if exists then return $ Image attr ils (src,tit) else do - res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src + res <- runIO $ do + setMediaBag mediabag + fetchItem (writerSourceURL opts) src case res of Right (contents, Just mime) -> do let ext = fromMaybe (takeExtension src) $ @@ -133,7 +145,7 @@ handleImage' opts tmpdir (Image attr ils (src,tit)) = do warn $ "Could not find image `" ++ src ++ "', skipping..." -- return alt text return $ Emph ils -handleImage' _ _ x = return x +handleImage' _ _ _ x = return x convertImages :: FilePath -> Inline -> IO Inline convertImages tmpdir (Image attr ils (src, tit)) = do @@ -164,17 +176,17 @@ convertImage tmpdir fname = mime = getMimeType fname doNothing = return (Right fname) -tex2pdf' :: Bool -- ^ Verbose output +tex2pdf' :: Verbosity -- ^ Verbosity level -> [String] -- ^ Arguments to the latex-engine -> FilePath -- ^ temp directory for output -> String -- ^ tex program -> String -- ^ tex source -> IO (Either ByteString ByteString) -tex2pdf' verbose args tmpDir program source = do +tex2pdf' verbosity args tmpDir program source = do let numruns = if "\\tableofcontents" `isInfixOf` source then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks - (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source + (exit, log', mbPdf) <- runTeXProgram verbosity program args 1 numruns tmpDir source case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -212,9 +224,9 @@ extractConTeXtMsg log' = do -- Run a TeX program on an input bytestring and return (exit code, -- contents of stdout, contents of produced PDF if any). Rerun -- a fixed number of times to resolve references. -runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String - -> IO (ExitCode, ByteString, Maybe ByteString) -runTeXProgram verbose program args runNumber numRuns tmpDir source = do +runTeXProgram :: Verbosity -> String -> [String] -> Int -> Int -> FilePath + -> String -> IO (ExitCode, ByteString, Maybe ByteString) +runTeXProgram verbosity program args runNumber numRuns tmpDir source = do let file = tmpDir </> "input.tex" exists <- doesFileExist file unless exists $ UTF8.writeFile file source @@ -234,7 +246,7 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] - when (verbose && runNumber == 1) $ do + when (verbosity >= INFO && runNumber == 1) $ do putStrLn "[makePDF] temp dir:" putStrLn tmpDir' putStrLn "[makePDF] Command line:" @@ -246,14 +258,13 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do putStrLn $ "[makePDF] Contents of " ++ file' ++ ":" B.readFile file' >>= B.putStr putStr "\n" - (exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty - when verbose $ do + (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty + when (verbosity >= INFO) $ do putStrLn $ "[makePDF] Run #" ++ show runNumber B.hPutStr stdout out - B.hPutStr stderr err putStr "\n" if runNumber <= numRuns - then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source + then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source else do let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir pdfExists <- doesFileExist pdfFile @@ -263,19 +274,19 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do -- See https://github.com/jgm/pandoc/issues/1192. then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing - return (exit, out <> err, pdf) + return (exit, out, pdf) -html2pdf :: Bool -- ^ Verbose output +html2pdf :: Verbosity -- ^ Verbosity level -> [String] -- ^ Args to wkhtmltopdf -> String -- ^ HTML5 source -> IO (Either ByteString ByteString) -html2pdf verbose args source = do +html2pdf verbosity args source = do file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp UTF8.writeFile file source let programArgs = args ++ [file, pdfFile] env' <- getEnvironment - when verbose $ do + when (verbosity >= INFO) $ do putStrLn "[makePDF] Command line:" putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs) putStr "\n" @@ -285,12 +296,10 @@ html2pdf verbose args source = do putStrLn $ "[makePDF] Contents of " ++ file ++ ":" B.readFile file >>= B.putStr putStr "\n" - (exit, out, err) <- pipeProcess (Just env') "wkhtmltopdf" - programArgs BL.empty + (exit, out) <- pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty removeFile file - when verbose $ do + when (verbosity >= INFO) $ do B.hPutStr stdout out - B.hPutStr stderr err putStr "\n" pdfExists <- doesFileExist pdfFile mbPdf <- if pdfExists @@ -302,17 +311,16 @@ html2pdf verbose args source = do removeFile pdfFile return res else return Nothing - let log' = out <> err return $ case (exit, mbPdf) of - (ExitFailure _, _) -> Left log' + (ExitFailure _, _) -> Left out (ExitSuccess, Nothing) -> Left "" (ExitSuccess, Just pdf) -> Right pdf -context2pdf :: Bool -- ^ Verbose output +context2pdf :: Verbosity -- ^ Verbosity level -> FilePath -- ^ temp directory for output -> String -- ^ ConTeXt source -> IO (Either ByteString ByteString) -context2pdf verbose tmpDir source = inDirectory tmpDir $ do +context2pdf verbosity tmpDir source = inDirectory tmpDir $ do let file = "input.tex" UTF8.writeFile file source #ifdef _WINDOWS @@ -328,7 +336,7 @@ context2pdf verbose tmpDir source = inDirectory tmpDir $ do $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] - when verbose $ do + when (verbosity >= INFO) $ do putStrLn "[makePDF] temp dir:" putStrLn tmpDir' putStrLn "[makePDF] Command line:" @@ -340,10 +348,9 @@ context2pdf verbose tmpDir source = inDirectory tmpDir $ do putStrLn $ "[makePDF] Contents of " ++ file ++ ":" B.readFile file >>= B.putStr putStr "\n" - (exit, out, err) <- pipeProcess (Just env'') "context" programArgs BL.empty - when verbose $ do + (exit, out) <- pipeProcess (Just env'') "context" programArgs BL.empty + when (verbosity >= INFO) $ do B.hPutStr stdout out - B.hPutStr stderr err putStr "\n" let pdfFile = replaceExtension file ".pdf" pdfExists <- doesFileExist pdfFile @@ -353,10 +360,9 @@ context2pdf verbose tmpDir source = inDirectory tmpDir $ do -- See https://github.com/jgm/pandoc/issues/1192. then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing - let log' = out <> err case (exit, mbPdf) of (ExitFailure _, _) -> do - let logmsg = extractConTeXtMsg log' + let logmsg = extractConTeXtMsg out return $ Left logmsg (ExitSuccess, Nothing) -> return $ Left "" (ExitSuccess, Just pdf) -> return $ Right pdf diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 110e34c6a..5e9ff7fd1 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -3,7 +3,9 @@ , GeneralizedNewtypeDeriving , TypeSynonymInstances , MultiParamTypeClasses -, FlexibleInstances #-} +, FlexibleInstances +, IncoherentInstances #-} + {- Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> @@ -65,7 +67,6 @@ module Text.Pandoc.Parsing ( anyLine, widthsFromIndices, gridTableWith, readWith, - readWithWarnings, readWithM, testStringWith, guardEnabled, @@ -163,7 +164,6 @@ module Text.Pandoc.Parsing ( anyLine, setSourceColumn, setSourceLine, newPos, - addWarning, (<+?>), extractIdClass ) @@ -740,11 +740,11 @@ lineBlockLines = try $ do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. tableWith :: Stream s m Char - => ParserT s ParserState m ([[Block]], [Alignment], [Int]) - -> ([Int] -> ParserT s ParserState m [[Block]]) + => ParserT s ParserState m ([Blocks], [Alignment], [Int]) + -> ([Int] -> ParserT s ParserState m [Blocks]) -> ParserT s ParserState m sep -> ParserT s ParserState m end - -> ParserT s ParserState m Block + -> ParserT s ParserState m Blocks tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser lines' <- rowParser indices `sepEndBy1` lineParser @@ -753,7 +753,7 @@ tableWith headerParser rowParser lineParser footerParser = try $ do let widths = if (indices == []) then replicate (length aligns) 0.0 else widthsFromIndices numColumns indices - return $ Table [] aligns widths heads lines' + return $ B.table mempty (zip aligns widths) heads lines' -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal @@ -787,9 +787,9 @@ widthsFromIndices numColumns' indices = -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). gridTableWith :: Stream [Char] m Char - => ParserT [Char] ParserState m [Block] -- ^ Block list parser + => ParserT [Char] ParserState m Blocks -- ^ Block list parser -> Bool -- ^ Headerless table - -> ParserT [Char] ParserState m Block + -> ParserT [Char] ParserState m Blocks gridTableWith blocks headless = tableWith (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter @@ -818,8 +818,8 @@ gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Stream [Char] m Char => Bool -- ^ Headerless table - -> ParserT [Char] ParserState m [Block] - -> ParserT [Char] ParserState m ([[Block]], [Alignment], [Int]) + -> ParserT [Char] ParserState m Blocks + -> ParserT [Char] ParserState m ([Blocks], [Alignment], [Int]) gridTableHeader headless blocks = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -850,9 +850,9 @@ gridTableRawLine indices = do -- | Parse row of grid table. gridTableRow :: Stream [Char] m Char - => ParserT [Char] ParserState m [Block] + => ParserT [Char] ParserState m Blocks -> [Int] - -> ParserT [Char] ParserState m [[Block]] + -> ParserT [Char] ParserState m [Blocks] gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ @@ -867,7 +867,7 @@ removeOneLeadingSpace xs = where startsWithSpace "" = True startsWithSpace (y:_) = y == ' ' -compactifyCell :: [Block] -> [Block] +compactifyCell :: Blocks -> Blocks compactifyCell bs = head $ compactify [bs] -- | Parse footer for a grid table. @@ -883,7 +883,7 @@ readWithM :: (Monad m) -> String -- ^ input -> m (Either PandocError a) readWithM parser state input = - mapLeft (ParsecError input) `liftM` runParserT parser state "source" input + mapLeft (PandocParsecError input) `liftM` runParserT parser state "source" input -- | Parse a string with a given parser and state @@ -893,15 +893,6 @@ readWith :: Parser [Char] st a -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp -readWithWarnings :: Parser [Char] ParserState a - -> ParserState - -> String - -> Either PandocError (a, [String]) -readWithWarnings p = readWith $ do - doc <- p - warnings <- stateWarnings <$> getState - return (doc, warnings) - -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a) => ParserT [Char] ParserState Identity a @@ -938,8 +929,8 @@ data ParserState = ParserState -- roles), 3) Additional classes (rest of Attr is unused)). stateCaption :: Maybe Inlines, -- ^ Caption in current environment stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed - stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context - stateWarnings :: [String] -- ^ Warnings generated by the parser + stateContainers :: [String], -- ^ parent include files + stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context } instance Default ParserState where @@ -1034,16 +1025,17 @@ defaultParserState = stateRstCustomRoles = M.empty, stateCaption = Nothing, stateInHtmlBlock = Nothing, - stateMarkdownAttribute = False, - stateWarnings = []} + stateContainers = [], + stateMarkdownAttribute = False + } -- | Succeed only if the extension is enabled. guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () -guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext +guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext -- | Succeed only if the extension is disabled. guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () -guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext +guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext -- | Update the position on which the last string ended. updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m () @@ -1098,10 +1090,10 @@ registerHeader (ident,classes,kvs) header' = do ids <- extractIdentifierList <$> getState exts <- getOption readerExtensions let insert' = M.insertWith (\_new old -> old) - if null ident && Ext_auto_identifiers `Set.member` exts + if null ident && Ext_auto_identifiers `extensionEnabled` exts then do let id' = uniqueIdent (B.toList header') ids - let id'' = if Ext_ascii_identifiers `Set.member` exts + let id'' = if Ext_ascii_identifiers `extensionEnabled` exts then catMaybes $ map toAsciiChar id' else id' updateState $ updateIdentifierList $ Set.insert id' @@ -1113,15 +1105,11 @@ registerHeader (ident,classes,kvs) header' = do updateState $ updateHeaderMap $ insert' header' ident return (ident,classes,kvs) --- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m () -failUnlessSmart = getOption readerSmart >>= guard - smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) => ParserT s st m Inlines -> ParserT s st m Inlines smartPunctuation inlineParser = do - failUnlessSmart + guardEnabled Ext_smart choice [ quoted inlineParser, apostrophe, dash, ellipses ] apostrophe :: Stream s m Char => ParserT s st m Inlines @@ -1195,7 +1183,7 @@ ellipses = try (string "..." >> return (B.str "\8230")) dash :: (HasReaderOptions st, Stream s m Char) => ParserT s st m Inlines dash = try $ do - oldDashes <- getOption readerOldDashes + oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions if oldDashes then do char '-' @@ -1272,12 +1260,6 @@ applyMacros' target = do return $ applyMacros macros target else return target --- | Append a warning to the log. -addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState () -addWarning mbpos msg = - updateState $ \st -> st{ - stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : - stateWarnings st } infixr 5 <+?> (<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index bc71f1392..294a38a1b 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -42,9 +42,9 @@ Version of 'System.Process.readProcessWithExitCode' that uses lazy bytestrings instead of strings and allows setting environment variables. @readProcessWithExitCode@ creates an external process, reads its -standard output and standard error strictly, waits until the process -terminates, and then returns the 'ExitCode' of the process, -the standard output, and the standard error. +standard output strictly, waits until the process +terminates, and then returns the 'ExitCode' of the process +and the standard output. stderr is inherited from the parent. If an asynchronous exception is thrown to the thread executing @readProcessWithExitCode@, the forked process will be terminated and @@ -57,25 +57,21 @@ pipeProcess -> FilePath -- ^ Filename of the executable (see 'proc' for details) -> [String] -- ^ any arguments -> BL.ByteString -- ^ standard input - -> IO (ExitCode,BL.ByteString,BL.ByteString) -- ^ exitcode, stdout, stderr + -> IO (ExitCode,BL.ByteString) -- ^ exitcode, stdout pipeProcess mbenv cmd args input = mask $ \restore -> do - (Just inh, Just outh, Just errh, pid) <- createProcess (proc cmd args) + (Just inh, Just outh, Nothing, pid) <- createProcess (proc cmd args) { env = mbenv, std_in = CreatePipe, std_out = CreatePipe, - std_err = CreatePipe } + std_err = Inherit } flip onException - (do hClose inh; hClose outh; hClose errh; + (do hClose inh; hClose outh; terminateProcess pid; waitForProcess pid) $ restore $ do -- fork off a thread to start consuming stdout out <- BL.hGetContents outh waitOut <- forkWait $ evaluate $ BL.length out - -- fork off a thread to start consuming stderr - err <- BL.hGetContents errh - waitErr <- forkWait $ evaluate $ BL.length err - -- now write and flush any input let writeInput = do unless (BL.null input) $ do @@ -87,15 +83,13 @@ pipeProcess mbenv cmd args input = -- wait on the output waitOut - waitErr hClose outh - hClose errh -- wait on the process ex <- waitForProcess pid - return (ex, out, err) + return (ex, out) forkWait :: IO a -> IO (IO a) forkWait a = do diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index d20d386e7..b0bcbd580 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -37,12 +37,13 @@ import Data.Text (unpack, pack) import Data.List (groupBy) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad) -- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc -readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack - where opts' = if readerSmart opts +readCommonMark :: PandocMonad m => ReaderOptions -> String -> m Pandoc +readCommonMark opts s = return $ + nodeToPandoc $ commonmarkToNode opts' $ pack s + where opts' = if extensionEnabled Ext_smart (readerExtensions opts) then [optNormalize, optSmart] else [optNormalize] diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 68552ccb3..bef256a93 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -13,10 +13,9 @@ import Control.Monad.State import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) -import Text.Pandoc.Error (PandocError) -import Control.Monad.Except import Data.Default import Data.Foldable (asum) +import Text.Pandoc.Class (PandocMonad) {- @@ -502,7 +501,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] ?asciidoc-br? - line break from asciidoc docbook output -} -type DB = ExceptT PandocError (State DBState) +type DB m = StateT DBState m data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType @@ -523,10 +522,11 @@ instance Default DBState where , dbContent = [] } -readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc -readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs - where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree - tree = normalizeTree . parseXML . handleInstructions $ inp +readDocBook :: PandocMonad m => ReaderOptions -> String -> m Pandoc +readDocBook _ inp = do + let tree = normalizeTree . parseXML . handleInstructions $ inp + (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree + return $ Pandoc (dbMeta st') (toList . mconcat $ bs) -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. @@ -538,7 +538,7 @@ handleInstructions xs = case break (=='<') xs of ([], '<':zs) -> '<' : handleInstructions zs (ys, zs) -> ys ++ handleInstructions zs -getFigure :: Element -> DB Blocks +getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do tit <- case filterChild (named "title") e of Just t -> getInlines t @@ -579,20 +579,20 @@ named s e = qName (elName e) == s -- -acceptingMetadata :: DB a -> DB a +acceptingMetadata :: PandocMonad m => DB m a -> DB m a acceptingMetadata p = do modify (\s -> s { dbAcceptsMeta = True } ) res <- p modify (\s -> s { dbAcceptsMeta = False }) return res -checkInMeta :: Monoid a => DB () -> DB a +checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a checkInMeta p = do accepts <- dbAcceptsMeta <$> get when accepts p return mempty -addMeta :: ToMetaValue a => String -> a -> DB () +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () addMeta field val = modify (setMeta field val) instance HasMeta DBState where @@ -631,7 +631,7 @@ addToStart toadd bs = -- function that is used by both mediaobject (in parseBlock) -- and inlinemediaobject (in parseInline) -- A DocBook mediaobject is a wrapper around a set of alternative presentations -getMediaobject :: Element -> DB Inlines +getMediaobject :: PandocMonad m => Element -> DB m Inlines getMediaobject e = do (imageUrl, attr) <- case filterChild (named "imageobject") e of @@ -658,11 +658,11 @@ getMediaobject e = do else (return figTitle, "fig:") liftM (imageWith attr imageUrl title) caption -getBlocks :: Element -> DB Blocks +getBlocks :: PandocMonad m => Element -> DB m Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) -parseBlock :: Content -> DB Blocks +parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty @@ -902,7 +902,7 @@ parseBlock (Elem e) = lineItems = mapM getInlines $ filterChildren (named "line") e metaBlock = acceptingMetadata (getBlocks e) >> return mempty -getInlines :: Element -> DB Inlines +getInlines :: PandocMonad m => Element -> DB m Inlines getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') strContentRecursive :: Element -> String @@ -913,7 +913,7 @@ elementToStr :: Content -> Content elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x -parseInline :: Content -> DB Inlines +parseInline :: PandocMonad m => Content -> DB m Inlines parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 595c805bf..490fdf878 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -64,7 +64,7 @@ implemented, [-] means partially implemented): - [X] Math - [X] Link (links to an arbitrary bookmark create a span with the target as id and "anchor" class) - - [X] Image + - [X] Image - [X] Note (Footnotes and Endnotes are silently combined.) -} @@ -82,7 +82,7 @@ import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Combine import Text.Pandoc.Shared -import Text.Pandoc.MediaBag (insertMedia, MediaBag) +import Text.Pandoc.MediaBag (MediaBag) import Data.List (delete, intersect) import Text.TeXMath (writeTeX) import Data.Default (Default) @@ -96,27 +96,29 @@ import qualified Data.Sequence as Seq (null) #if !(MIN_VERSION_base(4,8,0)) import Data.Traversable (traverse) #endif - import Text.Pandoc.Error -import Control.Monad.Except +import Control.Monad.Except (throwError) +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P -readDocxWithWarnings :: ReaderOptions - -> B.ByteString - -> Either PandocError (Pandoc, MediaBag, [String]) -readDocxWithWarnings opts bytes +readDocx :: PandocMonad m + => ReaderOptions + -> B.ByteString + -> m Pandoc +readDocx opts bytes | Right archive <- toArchiveOrFail bytes , Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do - (meta, blks, mediaBag, warnings) <- docxToOutput opts docx - return (Pandoc meta blks, mediaBag, parserWarnings ++ warnings) -readDocxWithWarnings _ _ = - Left (ParseFailure "couldn't parse docx file") - -readDocx :: ReaderOptions + mapM_ P.warning parserWarnings + (meta, blks) <- docxToOutput opts docx + return $ Pandoc meta blks +readDocx _ _ = + throwError $ PandocSomeError "couldn't parse docx file" + +readDocxWithWarnings :: PandocMonad m + => ReaderOptions -> B.ByteString - -> Either PandocError (Pandoc, MediaBag) -readDocx opts bytes = do - (pandoc, mediaBag, _) <- readDocxWithWarnings opts bytes - return (pandoc, mediaBag) + -> m Pandoc +readDocxWithWarnings = readDocx data DState = DState { docxAnchorMap :: M.Map String String , docxMediaBag :: MediaBag @@ -137,15 +139,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions instance Default DEnv where def = DEnv def False -type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState)) +type DocxContext m = ReaderT DEnv (StateT DState m) -evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a -evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx - -addDocxWarning :: String -> DocxContext () -addDocxWarning msg = do - warnings <- gets docxWarnings - modify $ \s -> s {docxWarnings = msg : warnings} +evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a +evalDocxContext ctx env st = flip evalStateT st $ flip runReaderT env $ ctx -- This is empty, but we put it in for future-proofing. spansToKeep :: [String] @@ -179,7 +176,7 @@ isEmptyPar (Paragraph _ parParts) = isEmptyElem _ = True isEmptyPar _ = False -bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue) +bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue) bodyPartsToMeta' [] = return M.empty bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp @@ -195,7 +192,7 @@ bodyPartsToMeta' (bp : bps) return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps -bodyPartsToMeta :: [BodyPart] -> DocxContext Meta +bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta bodyPartsToMeta bps = do mp <- bodyPartsToMeta' bps let mp' = @@ -297,7 +294,7 @@ runStyleToTransform rPr emph . (runStyleToTransform rPr {rUnderline = Nothing}) | otherwise = id -runToInlines :: Run -> DocxContext Inlines +runToInlines :: PandocMonad m => Run -> DocxContext m Inlines runToInlines (Run rs runElems) | Just (s, _) <- rStyle rs , s `elem` codeStyles = @@ -318,8 +315,7 @@ runToInlines (Endnote bps) = do blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) return $ note blksList runToInlines (InlineDrawing fp title alt bs ext) = do - mediaBag <- gets docxMediaBag - modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } + (lift . lift) $ P.insertMedia fp Nothing bs return $ imageWith (extentToAttr ext) fp title $ text alt runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" @@ -330,7 +326,7 @@ extentToAttr (Just (w, h)) = showDim d = show (d / 914400) ++ "in" extentToAttr _ = nullAttr -blocksToInlinesWarn :: String -> Blocks -> DocxContext Inlines +blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines blocksToInlinesWarn cmtId blks = do let blkList = toList blks notParaOrPlain :: Block -> Bool @@ -338,10 +334,10 @@ blocksToInlinesWarn cmtId blks = do notParaOrPlain (Plain _) = False notParaOrPlain _ = True when (not $ null $ filter notParaOrPlain blkList) - (addDocxWarning $ "Docx comment " ++ cmtId ++ " will not retain formatting") + ((lift . lift) $ P.warning $ "Docx comment " ++ cmtId ++ " will not retain formatting") return $ fromList $ blocksToInlines blkList -parPartToInlines :: ParPart -> DocxContext Inlines +parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines parPartToInlines (PlainRun r) = runToInlines r parPartToInlines (Insertion _ author date runs) = do opts <- asks docxOptions @@ -403,8 +399,7 @@ parPartToInlines (BookMark _ anchor) = (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) return $ spanWith (newAnchor, ["anchor"], []) mempty parPartToInlines (Drawing fp title alt bs ext) = do - mediaBag <- gets docxMediaBag - modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } + (lift . lift) $ P.insertMedia fp Nothing bs return $ imageWith (extentToAttr ext) fp title $ text alt parPartToInlines Chart = do return $ spanWith ("", ["chart"], []) $ text "[CHART]" @@ -426,10 +421,10 @@ isAnchorSpan _ = False dummyAnchors :: [String] dummyAnchors = ["_GoBack"] -makeHeaderAnchor :: Blocks -> DocxContext Blocks +makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks makeHeaderAnchor bs = traverse makeHeaderAnchor' bs -makeHeaderAnchor' :: Block -> DocxContext Block +makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block -- If there is an anchor already there (an anchor span in the header, -- to be exact), we rename and associate the new id with the old one. makeHeaderAnchor' (Header n (ident, classes, kvs) ils) @@ -463,12 +458,12 @@ singleParaToPlain blks singleton $ Plain ils singleParaToPlain blks = blks -cellToBlocks :: Cell -> DocxContext Blocks +cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks cellToBlocks (Cell bps) = do blks <- smushBlocks <$> mapM bodyPartToBlocks bps return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks -rowToBlocksList :: Row -> DocxContext [Blocks] +rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks] rowToBlocksList (Row cells) = do blksList <- mapM cellToBlocks cells return $ map singleParaToPlain blksList @@ -518,7 +513,7 @@ parStyleToTransform pPr False -> parStyleToTransform pPr' parStyleToTransform _ = id -bodyPartToBlocks :: BodyPart -> DocxContext Blocks +bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks bodyPartToBlocks (Paragraph pPr parparts) | not $ null $ codeDivs `intersect` (pStyle pPr) = return @@ -559,7 +554,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do ] blks <- bodyPartToBlocks (Paragraph pPr parparts) return $ divWith ("", ["list-item"], kvs) blks -bodyPartToBlocks (ListItem pPr _ _ _ parparts) = +bodyPartToBlocks (ListItem pPr _ _ _ parparts) = let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)} in bodyPartToBlocks $ Paragraph pPr' parparts @@ -597,7 +592,7 @@ bodyPartToBlocks (OMathPara e) = do -- replace targets with generated anchors. -rewriteLink' :: Inline -> DocxContext Inline +rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline rewriteLink' l@(Link attr ils ('#':target, title)) = do anchorMap <- gets docxAnchorMap return $ case M.lookup target anchorMap of @@ -605,23 +600,21 @@ rewriteLink' l@(Link attr ils ('#':target, title)) = do Nothing -> l rewriteLink' il = return il -rewriteLinks :: [Block] -> DocxContext [Block] +rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block] rewriteLinks = mapM (walkM rewriteLink') -bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag, [String]) +bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block]) bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks - mediaBag <- gets docxMediaBag - warnings <- gets docxWarnings - return $ (meta, - blks', - mediaBag, - warnings) - -docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag, [String]) + return $ (meta, blks') + +docxToOutput :: PandocMonad m + => ReaderOptions + -> Docx + -> m (Meta, [Block]) docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index deb2caccf..6cd3a49b6 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -690,7 +690,7 @@ elemToParPart ns element , Just drawingElem <- findChild (elemName ns "w" "drawing") element , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart" , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem - = return Chart + = return Chart elemToParPart ns element | isElem ns "w" "r" element = elemToRun ns element >>= (\r -> return $ PlainRun r) diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 4c31bf1ae..f24adb5b1 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -11,13 +11,12 @@ module Text.Pandoc.Readers.EPUB import Text.XML.Light import Text.Pandoc.Definition hiding (Attr) import Text.Pandoc.Readers.HTML (readHtml) -import Text.Pandoc.Error import Text.Pandoc.Walk (walk, query) -import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) +import Text.Pandoc.Options ( ReaderOptions(..), Verbosity(..)) import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField) import Network.URI (unEscapeString) import Text.Pandoc.MediaBag (MediaBag, insertMedia) -import Control.Monad.Except (MonadError, throwError, runExcept, Except) +import Control.Monad.Except (throwError) import Text.Pandoc.MIME (MimeType) import qualified Text.Pandoc.Builder as B import Codec.Archive.Zip ( Archive (..), toArchiveOrFail, fromEntry @@ -27,29 +26,30 @@ import System.FilePath ( takeFileName, (</>), dropFileName, normalise , dropFileName , splitFileName ) import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) -import Control.Monad (guard, liftM, when) +import Control.Monad (guard, liftM) import Data.List (isPrefixOf, isInfixOf) import Data.Maybe (mapMaybe, fromMaybe) import qualified Data.Map as M (Map, lookup, fromList, elems) import Data.Monoid ((<>)) import Control.DeepSeq (deepseq, NFData) - -import Debug.Trace (trace) +import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P type Items = M.Map String (FilePath, MimeType) -readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag) +readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc readEPUB opts bytes = case toArchiveOrFail bytes of - Right archive -> runEPUB $ archiveToEPUB opts $ archive - Left _ -> Left $ ParseFailure "Couldn't extract ePub file" + Right archive -> archiveToEPUB opts $ archive + Left _ -> throwError $ PandocParseError "Couldn't extract ePub file" -runEPUB :: Except PandocError a -> Either PandocError a -runEPUB = runExcept +-- runEPUB :: Except PandocError a -> Either PandocError a +-- runEPUB = runExcept -- Note that internal reference are aggresively normalised so that all ids -- are of the form "filename#id" -- -archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) +archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc archiveToEPUB os archive = do -- root is path to folder with manifest file in (root, content) <- getManifest archive @@ -63,24 +63,21 @@ archiveToEPUB os archive = do foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine)) `liftM` parseSpineElem root b) mempty spine let ast = coverDoc <> (Pandoc meta bs) - let mediaBag = fetchImages (M.elems items) root archive ast - return $ (ast, mediaBag) + P.setMediaBag $ fetchImages (M.elems items) root archive ast + return ast where os' = os {readerParseRaw = True} - parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc + parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do - when (readerTrace os) (traceM path) + report DEBUG ("parseSpineElem called with path " ++ show path) doc <- mimeToReader mime r path let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc - mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc + mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (unEscapeString -> root) (unEscapeString -> path) = do fname <- findEntryByPathE (root </> path) archive - html <- either throwError return . - readHtml os' . - UTF8.toStringLazy $ - fromEntry fname + html <- readHtml os' . UTF8.toStringLazy $ fromEntry fname return $ fixInternalReferences path html mimeToReader s _ (unEscapeString -> path) | s `elem` imageMimes = return $ imageToPandoc path @@ -121,7 +118,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"] type CoverImage = FilePath -parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items) +parseManifest :: (PandocMonad m) => Element -> m (Maybe CoverImage, Items) parseManifest content = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest @@ -137,7 +134,7 @@ parseManifest content = do mime <- findAttrE (emptyName "media-type") e return (uid, (href, mime)) -parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)] +parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do spine <- findElementE (dfName "spine") e let itemRefs = findChildren (dfName "itemref") spine @@ -148,7 +145,7 @@ parseSpine is e = do guard linear findAttr (emptyName "idref") ref -parseMeta :: MonadError PandocError m => Element -> m Meta +parseMeta :: PandocMonad m => Element -> m Meta parseMeta content = do meta <- findElementE (dfName "metadata") content let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True @@ -166,7 +163,7 @@ renameMeta :: String -> String renameMeta "creator" = "author" renameMeta s = s -getManifest :: MonadError PandocError m => Archive -> m (String, Element) +getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry @@ -242,9 +239,6 @@ foldM' f z (x:xs) = do uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d uncurry3 f (a, b, c) = f a b c -traceM :: Monad m => String -> m () -traceM = flip trace (return ()) - -- Utility stripNamespace :: QName -> String @@ -268,18 +262,18 @@ emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: MonadError PandocError m => QName -> Element -> m String +findAttrE :: PandocMonad m => QName -> Element -> m String findAttrE q e = mkE "findAttr" $ findAttr q e -findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry +findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry findEntryByPathE (normalise -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: MonadError PandocError m => String -> m Element +parseXMLDocE :: PandocMonad m => String -> m Element parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc -findElementE :: MonadError PandocError m => QName -> Element -> m Element +findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x -mkE :: MonadError PandocError m => String -> Maybe a -> m a -mkE s = maybe (throwError . ParseFailure $ s) return +mkE :: PandocMonad m => String -> Maybe a -> m a +mkE s = maybe (throwError . PandocParseError $ s) return diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index abe5f66ce..0bb837ba9 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -44,9 +44,9 @@ import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField - , escapeURI, safeRead, mapLeft ) -import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) - , Extension (Ext_epub_html_exts, + , escapeURI, safeRead ) +import Text.Pandoc.Options (ReaderOptions(readerParseRaw), + Verbosity(..), Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk @@ -54,46 +54,52 @@ import qualified Data.Map as M import Data.Maybe ( fromMaybe, isJust) import Data.List ( intercalate, isInfixOf, isPrefixOf ) import Data.Char ( isDigit ) -import Control.Monad ( guard, when, mzero, void, unless ) +import Control.Monad ( guard, mzero, void, unless ) import Control.Arrow ((***)) import Control.Applicative ( (<|>) ) import Data.Monoid (First (..)) import Text.Printf (printf) -import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) -import Control.Monad.Reader (Reader,ask, asks, local, runReader) +import Control.Monad.Reader (ask, asks, local, ReaderT, runReaderT, lift) import Network.URI (URI, parseURIReference, nonStrictRelativeTo) -import Text.Pandoc.Error import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Data.Monoid ((<>)) import Text.Parsec.Error import qualified Data.Set as Set +import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad, report) +import Control.Monad.Except (throwError) + -- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: ReaderOptions -- ^ Reader options +readHtml :: PandocMonad m + => ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Either PandocError Pandoc -readHtml opts inp = - mapLeft (ParseFailure . getError) . flip runReader def $ - runParserT parseDoc - (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty) - "source" tags - where tags = stripPrefixes . canonicalizeTags $ - parseTagsOptions parseOptions{ optTagPosition = True } inp - parseDoc = do - blocks <- (fixPlains False) . mconcat <$> manyTill block eof - meta <- stateMeta . parserState <$> getState - bs' <- replaceNotes (B.toList blocks) - return $ Pandoc meta bs' - getError (errorMessages -> ms) = case ms of - [] -> "" - (m:_) -> messageString m - -replaceNotes :: [Block] -> TagParser [Block] + -> m Pandoc +readHtml opts inp = do + let tags = stripPrefixes . canonicalizeTags $ + parseTagsOptions parseOptions{ optTagPosition = True } inp + parseDoc = do + blocks <- (fixPlains False) . mconcat <$> manyTill block eof + meta <- stateMeta . parserState <$> getState + bs' <- replaceNotes (B.toList blocks) + return $ Pandoc meta bs' + getError (errorMessages -> ms) = case ms of + [] -> "" + (m:_) -> messageString m + result <- flip runReaderT def $ + runParserT parseDoc + (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty) + "source" tags + case result of + Right doc -> return doc + Left err -> throwError $ PandocParseError $ getError err + +replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes = walkM replaceNotes' -replaceNotes' :: Inline -> TagParser Inline +replaceNotes' :: PandocMonad m => Inline -> TagParser m Inline replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes where getNotes = noteTable <$> getState @@ -113,20 +119,20 @@ data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext , inPlain :: Bool -- ^ Set if in pPlain } -setInChapter :: HTMLParser s a -> HTMLParser s a +setInChapter :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a setInChapter = local (\s -> s {inChapter = True}) -setInPlain :: HTMLParser s a -> HTMLParser s a +setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a setInPlain = local (\s -> s {inPlain = True}) -type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal) +type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m) -type TagParser = HTMLParser [Tag String] +type TagParser m = HTMLParser m [Tag String] -pBody :: TagParser Blocks +pBody :: PandocMonad m => TagParser m Blocks pBody = pInTags "body" block -pHead :: TagParser Blocks +pHead :: PandocMonad m => TagParser m Blocks pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . trimInlines setTitle t = mempty <$ (updateState $ B.setMeta "title" t) @@ -149,9 +155,8 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag parseURIReference $ fromAttrib "href" bt } return mempty -block :: TagParser Blocks +block :: PandocMonad m => TagParser m Blocks block = do - tr <- getOption readerTrace pos <- getPosition res <- choice [ eSection @@ -172,17 +177,20 @@ block = do , pPlain , pRawHtmlBlock ] - when tr $ trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList res)) (return ()) + report DEBUG $ printf "line %d: %s" + (sourceLine pos) (take 60 $ show $ B.toList res) return res -namespaces :: [(String, TagParser Inlines)] +namespaces :: PandocMonad m => [(String, TagParser m Inlines)] namespaces = [(mathMLNamespace, pMath True)] mathMLNamespace :: String mathMLNamespace = "http://www.w3.org/1998/Math/MathML" -eSwitch :: Monoid a => (Inlines -> a) -> TagParser a -> TagParser a +eSwitch :: (PandocMonad m, Monoid a) + => (Inlines -> a) + -> TagParser m a + -> TagParser m a eSwitch constructor parser = try $ do guardEnabled Ext_epub_html_exts pSatisfy (~== TagOpen "switch" []) @@ -195,7 +203,7 @@ eSwitch constructor parser = try $ do pSatisfy (~== TagClose "switch") return $ maybe fallback constructor cases -eCase :: TagParser (Maybe Inlines) +eCase :: PandocMonad m => TagParser m (Maybe Inlines) eCase = do skipMany pBlank TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" []) @@ -203,7 +211,7 @@ eCase = do Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank)) Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case")) -eFootnote :: TagParser () +eFootnote :: PandocMonad m => TagParser m () eFootnote = try $ do let notes = ["footnote", "rearnote"] guardEnabled Ext_epub_html_exts @@ -213,10 +221,10 @@ eFootnote = try $ do content <- pInTags tag block addNote ident content -addNote :: String -> Blocks -> TagParser () +addNote :: PandocMonad m => String -> Blocks -> TagParser m () addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s)}) -eNoteref :: TagParser Inlines +eNoteref :: PandocMonad m => TagParser m Inlines eNoteref = try $ do guardEnabled Ext_epub_html_exts TagOpen tag attr <- lookAhead $ pAnyTag @@ -227,17 +235,17 @@ eNoteref = try $ do return $ B.rawInline "noteref" ident -- Strip TOC if there is one, better to generate again -eTOC :: TagParser () +eTOC :: PandocMonad m => TagParser m () eTOC = try $ do guardEnabled Ext_epub_html_exts (TagOpen tag attr) <- lookAhead $ pAnyTag guard (maybe False (== "toc") (lookup "type" attr)) void (pInTags tag block) -pList :: TagParser Blocks +pList :: PandocMonad m => TagParser m Blocks pList = pBulletList <|> pOrderedList <|> pDefinitionList -pBulletList :: TagParser Blocks +pBulletList :: PandocMonad m => TagParser m Blocks pBulletList = try $ do pSatisfy (~== TagOpen "ul" []) let nonItem = pSatisfy (\t -> @@ -249,7 +257,7 @@ pBulletList = try $ do items <- manyTill (pListItem nonItem) (pCloses "ul") return $ B.bulletList $ map (fixPlains True) items -pListItem :: TagParser a -> TagParser Blocks +pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks pListItem nonItem = do TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" []) let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr) @@ -271,7 +279,7 @@ parseTypeAttr "A" = UpperAlpha parseTypeAttr "1" = Decimal parseTypeAttr _ = DefaultStyle -pOrderedList :: TagParser Blocks +pOrderedList :: PandocMonad m => TagParser m Blocks pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) let (start, style) = (sta', sty') @@ -302,13 +310,13 @@ pOrderedList = try $ do items <- manyTill (pListItem nonItem) (pCloses "ol") return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items -pDefinitionList :: TagParser Blocks +pDefinitionList :: PandocMonad m => TagParser m Blocks pDefinitionList = try $ do pSatisfy (~== TagOpen "dl" []) items <- manyTill pDefListItem (pCloses "dl") return $ B.definitionList items -pDefListItem :: TagParser (Inlines, [Blocks]) +pDefListItem :: PandocMonad m => TagParser m (Inlines, [Blocks]) pDefListItem = try $ do let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) && not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl")) @@ -334,7 +342,7 @@ fixPlains inList bs = if any isParaish bs' plainToPara x = x bs' = B.toList bs -pRawTag :: TagParser String +pRawTag :: PandocMonad m => TagParser m String pRawTag = do tag <- pAnyTag let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"] @@ -342,7 +350,7 @@ pRawTag = do then return [] else return $ renderTags' [tag] -pDiv :: TagParser Blocks +pDiv :: PandocMonad m => TagParser m Blocks pDiv = try $ do guardEnabled Ext_native_divs let isDivLike "div" = True @@ -356,7 +364,7 @@ pDiv = try $ do else classes return $ B.divWith (ident, classes', kvs) contents -pRawHtmlBlock :: TagParser Blocks +pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag parseRaw <- getOption readerParseRaw @@ -364,21 +372,21 @@ pRawHtmlBlock = do then return $ B.rawBlock "html" raw else return mempty -pHtmlBlock :: String -> TagParser String +pHtmlBlock :: PandocMonad m => String -> TagParser m String pHtmlBlock t = try $ do open <- pSatisfy (~== TagOpen t []) contents <- manyTill pAnyTag (pSatisfy (~== TagClose t)) return $ renderTags' $ [open] ++ contents ++ [TagClose t] -- Sets chapter context -eSection :: TagParser Blocks +eSection :: PandocMonad m => TagParser m Blocks eSection = try $ do let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as) let sectTag = tagOpen (`elem` sectioningContent) matchChapter TagOpen tag _ <- lookAhead $ pSatisfy sectTag setInChapter (pInTags tag block) -headerLevel :: String -> TagParser Int +headerLevel :: PandocMonad m => String -> TagParser m Int headerLevel tagtype = do let level = read (drop 1 tagtype) (try $ do @@ -388,7 +396,7 @@ headerLevel tagtype = do <|> return level -eTitlePage :: TagParser () +eTitlePage :: PandocMonad m => TagParser m () eTitlePage = try $ do let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as) let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section") @@ -396,7 +404,7 @@ eTitlePage = try $ do TagOpen tag _ <- lookAhead $ pSatisfy groupTag () <$ pInTags tag block -pHeader :: TagParser Blocks +pHeader :: PandocMonad m => TagParser m Blocks pHeader = try $ do TagOpen tagtype attr <- pSatisfy $ tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) @@ -412,12 +420,12 @@ pHeader = try $ do then mempty -- skip a representation of the title in the body else B.headerWith attr' level contents -pHrule :: TagParser Blocks +pHrule :: PandocMonad m => TagParser m Blocks pHrule = do pSelfClosing (=="hr") (const True) return B.horizontalRule -pTable :: TagParser Blocks +pTable :: PandocMonad m => TagParser m Blocks pTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank @@ -456,7 +464,7 @@ pTable = try $ do else widths' return $ B.table caption (zip aligns widths) head' rows -pCol :: TagParser Double +pCol :: PandocMonad m => TagParser m Double pCol = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "col" []) skipMany pBlank @@ -472,7 +480,7 @@ pCol = try $ do fromMaybe 0.0 $ safeRead ('0':'.':init x) _ -> 0.0 -pColgroup :: TagParser [Double] +pColgroup :: PandocMonad m => TagParser m [Double] pColgroup = try $ do pSatisfy (~== TagOpen "colgroup" []) skipMany pBlank @@ -485,31 +493,31 @@ noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan" "1" -> True _ -> False -pCell :: String -> TagParser [Blocks] +pCell :: PandocMonad m => String -> TagParser m [Blocks] pCell celltype = try $ do skipMany pBlank res <- pInTags' celltype noColOrRowSpans block skipMany pBlank return [res] -pBlockQuote :: TagParser Blocks +pBlockQuote :: PandocMonad m => TagParser m Blocks pBlockQuote = do contents <- pInTags "blockquote" block return $ B.blockQuote $ fixPlains False contents -pPlain :: TagParser Blocks +pPlain :: PandocMonad m => TagParser m Blocks pPlain = do contents <- setInPlain $ trimInlines . mconcat <$> many1 inline if B.isNull contents then return mempty else return $ B.plain contents -pPara :: TagParser Blocks +pPara :: PandocMonad m => TagParser m Blocks pPara = do contents <- trimInlines <$> pInTags "p" inline return $ B.para contents -pCodeBlock :: TagParser Blocks +pCodeBlock :: PandocMonad m => TagParser m Blocks pCodeBlock = try $ do TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) contents <- manyTill pAnyTag (pCloses "pre" <|> eof) @@ -529,7 +537,7 @@ tagToString (TagText s) = s tagToString (TagOpen "br" _) = "\n" tagToString _ = "" -inline :: TagParser Inlines +inline :: PandocMonad m => TagParser m Inlines inline = choice [ eNoteref , eSwitch id inline @@ -549,30 +557,31 @@ inline = choice , pRawHtmlInline ] -pLocation :: TagParser () +pLocation :: PandocMonad m => TagParser m () pLocation = do (TagPosition r c) <- pSat isTagPosition setPosition $ newPos "input" r c -pSat :: (Tag String -> Bool) -> TagParser (Tag String) +pSat :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String) pSat f = do pos <- getPosition token show (const pos) (\x -> if f x then Just x else Nothing) -pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String) +pSatisfy :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String) pSatisfy f = try $ optional pLocation >> pSat f -pAnyTag :: TagParser (Tag String) +pAnyTag :: PandocMonad m => TagParser m (Tag String) pAnyTag = pSatisfy (const True) -pSelfClosing :: (String -> Bool) -> ([Attribute String] -> Bool) - -> TagParser (Tag String) +pSelfClosing :: PandocMonad m + => (String -> Bool) -> ([Attribute String] -> Bool) + -> TagParser m (Tag String) pSelfClosing f g = do open <- pSatisfy (tagOpen f g) optional $ pSatisfy (tagClose f) return open -pQ :: TagParser Inlines +pQ :: PandocMonad m => TagParser m Inlines pQ = do context <- asks quoteContext let quoteType = case context of @@ -587,19 +596,19 @@ pQ = do withQuoteContext innerQuoteContext $ pInlinesInTags "q" constructor -pEmph :: TagParser Inlines +pEmph :: PandocMonad m => TagParser m Inlines pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph -pStrong :: TagParser Inlines +pStrong :: PandocMonad m => TagParser m Inlines pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong -pSuperscript :: TagParser Inlines +pSuperscript :: PandocMonad m => TagParser m Inlines pSuperscript = pInlinesInTags "sup" B.superscript -pSubscript :: TagParser Inlines +pSubscript :: PandocMonad m => TagParser m Inlines pSubscript = pInlinesInTags "sub" B.subscript -pStrikeout :: TagParser Inlines +pStrikeout :: PandocMonad m => TagParser m Inlines pStrikeout = do pInlinesInTags "s" B.strikeout <|> pInlinesInTags "strike" B.strikeout <|> @@ -608,7 +617,7 @@ pStrikeout = do contents <- mconcat <$> manyTill inline (pCloses "span") return $ B.strikeout contents) -pLineBreak :: TagParser Inlines +pLineBreak :: PandocMonad m => TagParser m Inlines pLineBreak = do pSelfClosing (=="br") (const True) return B.linebreak @@ -619,7 +628,7 @@ maybeFromAttrib :: String -> Tag String -> Maybe String maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs maybeFromAttrib _ _ = Nothing -pLink :: TagParser Inlines +pLink :: PandocMonad m => TagParser m Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) let title = fromAttrib "title" tag @@ -639,7 +648,7 @@ pLink = try $ do _ -> url' return $ B.linkWith (uid, cls, []) (escapeURI url) title lab -pImage :: TagParser Inlines +pImage :: PandocMonad m => TagParser m Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") mbBaseHref <- baseHref <$> getState @@ -657,13 +666,13 @@ pImage = do let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) -pCode :: TagParser Inlines +pCode :: PandocMonad m => TagParser m Inlines pCode = try $ do (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) result <- manyTill pAnyTag (pCloses open) return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result -pSpan :: TagParser Inlines +pSpan :: PandocMonad m => TagParser m Inlines pSpan = try $ do guardEnabled Ext_native_spans TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) @@ -674,7 +683,7 @@ pSpan = try $ do let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) return $ tag contents -pRawHtmlInline :: TagParser Inlines +pRawHtmlInline :: PandocMonad m => TagParser m Inlines pRawHtmlInline = do inplain <- asks inPlain result <- pSatisfy (tagComment (const True)) @@ -689,7 +698,7 @@ pRawHtmlInline = do mathMLToTeXMath :: String -> Either String String mathMLToTeXMath s = writeTeX <$> readMathML s -pMath :: Bool -> TagParser Inlines +pMath :: PandocMonad m => Bool -> TagParser m Inlines pMath inCase = try $ do open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True) -- we'll assume math tags are MathML unless specially marked @@ -705,22 +714,25 @@ pMath inCase = try $ do Just "block" -> B.displayMath x _ -> B.math x -pInlinesInTags :: String -> (Inlines -> Inlines) - -> TagParser Inlines +pInlinesInTags :: PandocMonad m => String -> (Inlines -> Inlines) + -> TagParser m Inlines pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline -pInTags :: (Monoid a) => String -> TagParser a -> TagParser a +pInTags :: (PandocMonad m, Monoid a) => String -> TagParser m a -> TagParser m a pInTags tagtype parser = pInTags' tagtype (const True) parser -pInTags' :: (Monoid a) => String -> (Tag String -> Bool) -> TagParser a - -> TagParser a +pInTags' :: (PandocMonad m, Monoid a) + => String + -> (Tag String -> Bool) + -> TagParser m a + -> TagParser m a pInTags' tagtype tagtest parser = try $ do pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t) mconcat <$> manyTill parser (pCloses tagtype <|> eof) -- parses p, preceeded by an optional opening tag -- and followed by an optional closing tags -pOptInTag :: String -> TagParser a -> TagParser a +pOptInTag :: PandocMonad m => String -> TagParser m a -> TagParser m a pOptInTag tagtype p = try $ do skipMany pBlank optional $ pSatisfy (~== TagOpen tagtype []) @@ -731,7 +743,7 @@ pOptInTag tagtype p = try $ do skipMany pBlank return x -pCloses :: String -> TagParser () +pCloses :: PandocMonad m => String -> TagParser m () pCloses tagtype = try $ do t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag case t of @@ -744,23 +756,25 @@ pCloses tagtype = try $ do (TagClose "table") | tagtype == "tr" -> return () _ -> mzero -pTagText :: TagParser Inlines +pTagText :: PandocMonad m => TagParser m Inlines pTagText = try $ do (TagText str) <- pSatisfy isTagText st <- getState qu <- ask - case flip runReader qu $ runParserT (many pTagContents) st "text" str of - Left _ -> fail $ "Could not parse `" ++ str ++ "'" + parsed <- lift $ lift $ + flip runReaderT qu $ runParserT (many pTagContents) st "text" str + case parsed of + Left _ -> throwError $ PandocParseError $ "Could not parse `" ++ str ++ "'" Right result -> return $ mconcat result -pBlank :: TagParser () +pBlank :: PandocMonad m => TagParser m () pBlank = try $ do (TagText str) <- pSatisfy isTagText guard $ all isSpace str -type InlinesParser = HTMLParser String +type InlinesParser m = HTMLParser m String -pTagContents :: InlinesParser Inlines +pTagContents :: PandocMonad m => InlinesParser m Inlines pTagContents = B.displayMath <$> mathDisplay <|> B.math <$> mathInline @@ -770,7 +784,7 @@ pTagContents = <|> pSymbol <|> pBad -pStr :: InlinesParser Inlines +pStr :: PandocMonad m => InlinesParser m Inlines pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) @@ -789,13 +803,13 @@ isSpecial '\8220' = True isSpecial '\8221' = True isSpecial _ = False -pSymbol :: InlinesParser Inlines +pSymbol :: PandocMonad m => InlinesParser m Inlines pSymbol = satisfy isSpecial >>= return . B.str . (:[]) isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML -pBad :: InlinesParser Inlines +pBad :: PandocMonad m => InlinesParser m Inlines pBad = do c <- satisfy isBad let c' = case c of @@ -829,7 +843,7 @@ pBad = do _ -> '?' return $ B.str [c'] -pSpace :: InlinesParser Inlines +pSpace :: PandocMonad m => InlinesParser m Inlines pSpace = many1 (satisfy isSpace) >>= \xs -> if '\n' `elem` xs then return B.softbreak @@ -1070,7 +1084,7 @@ instance HasHeaderMap HTMLState where -- This signature should be more general -- MonadReader HTMLLocal m => HasQuoteContext st m -instance HasQuoteContext st (Reader HTMLLocal) where +instance PandocMonad m => HasQuoteContext st (ReaderT HTMLLocal m) where getQuoteContext = asks quoteContext withQuoteContext q = local (\s -> s{quoteContext = q}) diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 12953bb72..310a04574 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -24,23 +24,29 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Documentation.Haddock.Parser import Documentation.Haddock.Types -import Debug.Trace (trace) - import Text.Pandoc.Error +import Control.Monad.Except (throwError) +import Text.Pandoc.Class (PandocMonad) + -- | Parse Haddock markup and return a 'Pandoc' document. -readHaddock :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse - -> Either PandocError Pandoc -readHaddock opts = +readHaddock :: PandocMonad m + => ReaderOptions + -> String + -> m Pandoc +readHaddock opts s = case readHaddockEither opts s of + Right result -> return result + Left e -> throwError e + +readHaddockEither :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse + -> Either PandocError Pandoc +readHaddockEither _opts = #if MIN_VERSION_haddock_library(1,2,0) - Right . B.doc . docHToBlocks . trace' . _doc . parseParas + Right . B.doc . docHToBlocks . _doc . parseParas #else - Right . B.doc . docHToBlocks . trace' . parseParas + Right . B.doc . docHToBlocks . parseParas #endif - where trace' x = if readerTrace opts - then trace (show x) x - else x docHToBlocks :: DocH String Identifier -> Blocks docHToBlocks d' = diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index edcf35e51..86ff2b83a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -32,7 +32,6 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, rawLaTeXBlock, inlineCommand, - handleIncludes ) where import Text.Pandoc.Definition @@ -48,22 +47,28 @@ import Control.Monad import Text.Pandoc.Builder import Control.Applicative ((<|>), many, optional) import Data.Maybe (fromMaybe, maybeToList) -import System.Environment (getEnv) import System.FilePath (replaceExtension, (</>), takeExtension, addExtension) import Data.List (intercalate) import qualified Data.Map as M -import qualified Control.Exception as E import Text.Pandoc.Highlighting (fromListingsLanguage) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Error +import Control.Monad.Except (throwError, catchError) +import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, readFileLazy, + warning, warningWithPos) -- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: ReaderOptions -- ^ Reader options +readLaTeX :: PandocMonad m + => ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Either PandocError Pandoc -readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } - -parseLaTeX :: LP Pandoc + -> m Pandoc +readLaTeX opts ltx = do + parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx + case parsed of + Right result -> return result + Left e -> throwError e + +parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do bs <- blocks eof @@ -72,9 +77,9 @@ parseLaTeX = do let (Pandoc _ bs') = doc bs return $ Pandoc meta bs' -type LP = Parser String ParserState +type LP m = ParserT String ParserState m -anyControlSeq :: LP String +anyControlSeq :: PandocMonad m => LP m String anyControlSeq = do char '\\' next <- option '\n' anyChar @@ -83,7 +88,7 @@ anyControlSeq = do c | isLetter c -> (c:) <$> (many letter <* optional sp) | otherwise -> return [c] -controlSeq :: String -> LP String +controlSeq :: PandocMonad m => String -> LP m String controlSeq name = try $ do char '\\' case name of @@ -92,26 +97,26 @@ controlSeq name = try $ do cs -> string cs <* notFollowedBy letter <* optional sp return name -dimenarg :: LP String +dimenarg :: PandocMonad m => LP m String dimenarg = try $ do ch <- option "" $ string "=" num <- many1 digit dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"] return $ ch ++ num ++ dim -sp :: LP () +sp :: PandocMonad m => LP m () sp = whitespace <|> endline -whitespace :: LP () +whitespace :: PandocMonad m => LP m () whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') -endline :: LP () +endline :: PandocMonad m => LP m () endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline) isLowerHex :: Char -> Bool isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' -tildeEscape :: LP Char +tildeEscape :: PandocMonad m => LP m Char tildeEscape = try $ do string "^^" c <- satisfy (\x -> x >= '\0' && x <= '\128') @@ -124,29 +129,29 @@ tildeEscape = try $ do | otherwise -> return $ chr (x + 64) else return $ chr $ read ('0':'x':c:d) -comment :: LP () +comment :: PandocMonad m => LP m () comment = do char '%' skipMany (satisfy (/='\n')) optional newline return () -bgroup :: LP () +bgroup :: PandocMonad m => LP m () bgroup = try $ do skipMany (spaceChar <|> try (newline <* notFollowedBy blankline)) () <$ char '{' <|> () <$ controlSeq "bgroup" <|> () <$ controlSeq "begingroup" -egroup :: LP () +egroup :: PandocMonad m => LP m () egroup = () <$ char '}' <|> () <$ controlSeq "egroup" <|> () <$ controlSeq "endgroup" -grouped :: Monoid a => LP a -> LP a +grouped :: PandocMonad m => Monoid a => LP m a -> LP m a grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup) -braced :: LP String +braced :: PandocMonad m => LP m String braced = bgroup *> (concat <$> manyTill ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{')) <|> try (string "\\}") @@ -156,16 +161,16 @@ braced = bgroup *> (concat <$> manyTill <|> count 1 anyChar ) egroup) -bracketed :: Monoid a => LP a -> LP a +bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']')) -mathDisplay :: LP String -> LP Inlines +mathDisplay :: PandocMonad m => LP m String -> LP m Inlines mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim) -mathInline :: LP String -> LP Inlines +mathInline :: PandocMonad m => LP m String -> LP m Inlines mathInline p = math <$> (try p >>= applyMacros') -mathChars :: LP String +mathChars :: PandocMonad m => LP m String mathChars = concat <$> many (escapedChar <|> (snd <$> withRaw braced) @@ -179,10 +184,10 @@ mathChars = isOrdChar '\\' = False isOrdChar _ = True -quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines +quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines quoted' f starter ender = do startchs <- starter - smart <- getOption readerSmart + smart <- extensionEnabled Ext_smart <$> getOption readerExtensions if smart then do ils <- many (notFollowedBy ender >> inline) @@ -194,7 +199,7 @@ quoted' f starter ender = do _ -> startchs) else lit startchs -doubleQuote :: LP Inlines +doubleQuote :: PandocMonad m => LP m Inlines doubleQuote = do quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") <|> quoted' doubleQuoted (string "“") (void $ char '”') @@ -202,15 +207,15 @@ doubleQuote = do <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") <|> quoted' doubleQuoted (string "\"") (void $ char '"') -singleQuote :: LP Inlines +singleQuote :: PandocMonad m => LP m Inlines singleQuote = do - smart <- getOption readerSmart + smart <- extensionEnabled Ext_smart <$> getOption readerExtensions if smart then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) else str <$> many1 (oneOf "`\'‘’") -inline :: LP Inlines +inline :: PandocMonad m => LP m Inlines inline = (mempty <$ comment) <|> (space <$ whitespace) <|> (softbreak <$ endline) @@ -231,14 +236,15 @@ inline = (mempty <$ comment) <|> mathInline (char '$' *> mathChars <* char '$') <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) <|> (str . (:[]) <$> tildeEscape) - <|> (str . (:[]) <$> oneOf "[]") - <|> (str . (:[]) <$> oneOf "#&~^'`\"[]") -- TODO print warning? - -- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters + <|> (do res <- oneOf "#&~^'`\"[]" + pos <- getPosition + warningWithPos pos ("Parsing unescaped '" ++ [res] ++ "'") + return $ str [res]) -inlines :: LP Inlines +inlines :: PandocMonad m => LP m Inlines inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) -inlineGroup :: LP Inlines +inlineGroup :: PandocMonad m => LP m Inlines inlineGroup = do ils <- grouped inline if isNull ils @@ -247,10 +253,11 @@ inlineGroup = do -- we need the span so we can detitlecase bibtex entries; -- we need to know when something is {C}apitalized -block :: LP Blocks +block :: PandocMonad m => LP m Blocks block = (mempty <$ comment) <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) <|> environment + <|> include <|> macro <|> blockCommand <|> paragraph @@ -258,10 +265,10 @@ block = (mempty <$ comment) <|> (mempty <$ char '&') -- loose & in table environment -blocks :: LP Blocks +blocks :: PandocMonad m => LP m Blocks blocks = mconcat <$> many block -getRawCommand :: String -> LP String +getRawCommand :: PandocMonad m => String -> LP m String getRawCommand name' = do rawargs <- withRaw (many (try (optional sp *> opt)) *> option "" (try (optional sp *> dimenarg)) *> @@ -273,7 +280,7 @@ lookupListDefault d = (fromMaybe d .) . lookupList where lookupList l m = msum $ map (`M.lookup` m) l -blockCommand :: LP Blocks +blockCommand :: PandocMonad m => LP m Blocks blockCommand = try $ do name <- anyControlSeq guard $ name /= "begin" && name /= "end" @@ -291,21 +298,21 @@ inBrackets :: Inlines -> Inlines inBrackets x = str "[" <> x <> str "]" -- eat an optional argument and one or more arguments in braces -ignoreInlines :: String -> (String, LP Inlines) +ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines) ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawInline "latex" . (contseq ++) . snd) <$> (getOption readerParseRaw >>= guard >> withRaw optargs) -ignoreBlocks :: String -> (String, LP Blocks) +ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks) ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawBlock "latex" . (contseq ++) . snd) <$> (getOption readerParseRaw >>= guard >> withRaw optargs) -blockCommands :: M.Map String (LP Blocks) +blockCommands :: PandocMonad m => M.Map String (LP m Blocks) blockCommands = M.fromList $ [ ("par", mempty <$ skipopts) , ("title", mempty <$ (skipopts *> @@ -346,8 +353,6 @@ blockCommands = M.fromList $ , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) , ("caption", skipopts *> setCaption) - , ("PandocStartInclude", startInclude) - , ("PandocEndInclude", endInclude) , ("bibliography", mempty <$ (skipopts *> braced >>= addMeta "bibliography" . splitBibs)) , ("addbibresource", mempty <$ (skipopts *> braced >>= @@ -370,14 +375,14 @@ blockCommands = M.fromList $ , "newpage" ] -addMeta :: ToMetaValue a => String -> a -> LP () +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () addMeta field val = updateState $ \st -> st{ stateMeta = addMetaField field val $ stateMeta st } splitBibs :: String -> [Inlines] splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') -setCaption :: LP Blocks +setCaption :: PandocMonad m => LP m Blocks setCaption = do ils <- tok mblabel <- option Nothing $ @@ -389,10 +394,10 @@ setCaption = do updateState $ \st -> st{ stateCaption = Just ils' } return mempty -resetCaption :: LP () +resetCaption :: PandocMonad m => LP m () resetCaption = updateState $ \st -> st{ stateCaption = Nothing } -authors :: LP () +authors :: PandocMonad m => LP m () authors = try $ do char '{' let oneAuthor = mconcat <$> @@ -403,7 +408,7 @@ authors = try $ do char '}' addMeta "author" (map trimInlines auths) -section :: Attr -> Int -> LP Blocks +section :: PandocMonad m => Attr -> Int -> LP m Blocks section (ident, classes, kvs) lvl = do hasChapters <- stateHasChapters `fmap` getState let lvl' = if hasChapters then lvl + 1 else lvl @@ -413,7 +418,7 @@ section (ident, classes, kvs) lvl = do attr' <- registerHeader (lab, classes, kvs) contents return $ headerWith attr' lvl' contents -inlineCommand :: LP Inlines +inlineCommand :: PandocMonad m => LP m Inlines inlineCommand = try $ do name <- anyControlSeq guard $ name /= "begin" && name /= "end" @@ -435,14 +440,14 @@ inlineCommand = try $ do optional (try (string "{}"))) <|> raw -unlessParseRaw :: LP () +unlessParseRaw :: PandocMonad m => LP m () unlessParseRaw = getOption readerParseRaw >>= guard . not isBlockCommand :: String -> Bool -isBlockCommand s = s `M.member` blockCommands +isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks)) -inlineEnvironments :: M.Map String (LP Inlines) +inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines) inlineEnvironments = M.fromList [ ("displaymath", mathEnv id Nothing "displaymath") , ("math", math <$> verbEnv "math") @@ -460,7 +465,7 @@ inlineEnvironments = M.fromList , ("alignat*", mathEnv id (Just "aligned") "alignat*") ] -inlineCommands :: M.Map String (LP Inlines) +inlineCommands :: PandocMonad m => M.Map String (LP m Inlines) inlineCommands = M.fromList $ [ ("emph", extractSpaces emph <$> tok) , ("textit", extractSpaces emph <$> tok) @@ -621,7 +626,7 @@ inlineCommands = M.fromList $ -- in which case they will appear as raw latex blocks: [ "index" ] -mkImage :: [(String, String)] -> String -> LP Inlines +mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines mkImage options src = do let replaceTextwidth (k,v) = case numUnit v of Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") @@ -645,7 +650,7 @@ unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs unescapeURL (x:xs) = x:unescapeURL xs unescapeURL [] = "" -enquote :: LP Inlines +enquote :: PandocMonad m => LP m Inlines enquote = do skipopts context <- stateQuoteContext <$> getState @@ -653,18 +658,18 @@ enquote = do then singleQuoted <$> withQuoteContext InSingleQuote tok else doubleQuoted <$> withQuoteContext InDoubleQuote tok -doverb :: LP Inlines +doverb :: PandocMonad m => LP m Inlines doverb = do marker <- anyChar code <$> manyTill (satisfy (/='\n')) (char marker) -doLHSverb :: LP Inlines +doLHSverb :: PandocMonad m => LP m Inlines doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|') -lit :: String -> LP Inlines +lit :: String -> LP m Inlines lit = pure . str -accent :: (Char -> String) -> Inlines -> LP Inlines +accent :: (Char -> String) -> Inlines -> LP m Inlines accent f ils = case toList ils of (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) @@ -870,53 +875,53 @@ breve 'U' = "Ŭ" breve 'u' = "ŭ" breve c = [c] -tok :: LP Inlines +tok :: PandocMonad m => LP m Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar -opt :: LP Inlines +opt :: PandocMonad m => LP m Inlines opt = bracketed inline -rawopt :: LP String +rawopt :: PandocMonad m => LP m String rawopt = do contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|> try (string "\\[") <|> rawopt) optional sp return $ "[" ++ contents ++ "]" -skipopts :: LP () +skipopts :: PandocMonad m => LP m () skipopts = skipMany rawopt -- opts in angle brackets are used in beamer -rawangle :: LP () +rawangle :: PandocMonad m => LP m () rawangle = try $ do char '<' skipMany (noneOf ">") char '>' return () -skipangles :: LP () +skipangles :: PandocMonad m => LP m () skipangles = skipMany rawangle -inlineText :: LP Inlines +inlineText :: PandocMonad m => LP m Inlines inlineText = str <$> many1 inlineChar -inlineChar :: LP Char +inlineChar :: PandocMonad m => LP m Char inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n" -environment :: LP Blocks +environment :: PandocMonad m => LP m Blocks environment = do controlSeq "begin" name <- braced M.findWithDefault mzero name environments <|> rawEnv name -inlineEnvironment :: LP Inlines +inlineEnvironment :: PandocMonad m => LP m Inlines inlineEnvironment = try $ do controlSeq "begin" name <- braced M.findWithDefault mzero name inlineEnvironments -rawEnv :: String -> LP Blocks +rawEnv :: PandocMonad m => String -> LP m Blocks rawEnv name = do parseRaw <- getOption readerParseRaw rawOptions <- mconcat <$> many rawopt @@ -928,50 +933,7 @@ rawEnv name = do ---- -type IncludeParser = ParserT String [String] IO String - --- | Replace "include" commands with file contents. -handleIncludes :: String -> IO (Either PandocError String) -handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s - -includeParser' :: IncludeParser -includeParser' = - concat <$> many (comment' <|> escaped' <|> blob' <|> include' - <|> startMarker' <|> endMarker' - <|> verbCmd' <|> verbatimEnv' <|> backslash') - -comment' :: IncludeParser -comment' = do - char '%' - xs <- manyTill anyChar newline - return ('%':xs ++ "\n") - -escaped' :: IncludeParser -escaped' = try $ string "\\%" <|> string "\\\\" - -verbCmd' :: IncludeParser -verbCmd' = fmap snd <$> - withRaw $ try $ do - string "\\verb" - c <- anyChar - manyTill anyChar (char c) - -verbatimEnv' :: IncludeParser -verbatimEnv' = fmap snd <$> - withRaw $ try $ do - string "\\begin" - name <- braced' - guard $ name `elem` ["verbatim", "Verbatim", "BVerbatim", - "lstlisting", "minted", "alltt", "comment"] - manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}") - -blob' :: IncludeParser -blob' = try $ many1 (noneOf "\\%") - -backslash' :: IncludeParser -backslash' = string "\\" - -braced' :: IncludeParser +braced' :: PandocMonad m => LP m String braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') maybeAddExtension :: String -> FilePath -> FilePath @@ -980,8 +942,8 @@ maybeAddExtension ext fp = then addExtension fp ext else fp -include' :: IncludeParser -include' = do +include :: PandocMonad m => LP m Blocks +include = do fs' <- try $ do char '\\' name <- try (string "include") @@ -993,59 +955,45 @@ include' = do return $ if name == "usepackage" then map (maybeAddExtension ".sty") fs else map (maybeAddExtension ".tex") fs - pos <- getPosition - containers <- getState - let fn = case containers of - (f':_) -> f' - [] -> "input" + oldPos <- getPosition + oldInput <- getInput -- now process each include file in order... - rest <- getInput - results' <- forM fs' (\f -> do + mconcat <$> forM fs' (\f -> do + containers <- stateContainers <$> getState when (f `elem` containers) $ - fail "Include file loop!" + throwError $ PandocParseError $ "Include file loop at " ++ show oldPos + updateState $ \s -> s{ stateContainers = f : stateContainers s } contents <- lift $ readTeXFile f - return $ "\\PandocStartInclude{" ++ f ++ "}" ++ - contents ++ "\\PandocEndInclude{" ++ - fn ++ "}{" ++ show (sourceLine pos) ++ "}{" - ++ show (sourceColumn pos) ++ "}") - setInput $ concat results' ++ rest - return "" - -startMarker' :: IncludeParser -startMarker' = try $ do - string "\\PandocStartInclude" - fn <- braced' - updateState (fn:) - setPosition $ newPos fn 1 1 - return $ "\\PandocStartInclude{" ++ fn ++ "}" - -endMarker' :: IncludeParser -endMarker' = try $ do - string "\\PandocEndInclude" - fn <- braced' - ln <- braced' - co <- braced' - updateState tail - setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co) - return $ "\\PandocEndInclude{" ++ fn ++ "}{" ++ ln ++ "}{" ++ - co ++ "}" - -readTeXFile :: FilePath -> IO String + setPosition $ newPos f 1 1 + setInput contents + bs <- blocks + setInput oldInput + setPosition oldPos + updateState $ \s -> s{ stateContainers = tail $ stateContainers s } + return bs) + +readTeXFile :: PandocMonad m => FilePath -> m String readTeXFile f = do - texinputs <- E.catch (getEnv "TEXINPUTS") $ \(_ :: E.SomeException) -> - return "." - let ds = splitBy (==':') texinputs - readFileFromDirs ds f - -readFileFromDirs :: [FilePath] -> FilePath -> IO String -readFileFromDirs [] _ = return "" -readFileFromDirs (d:ds) f = - E.catch (UTF8.readFile $ d </> f) $ \(_ :: E.SomeException) -> - readFileFromDirs ds f + texinputs <- fromMaybe "." <$> lookupEnv "TEXINPUTS" + readFileFromDirs (splitBy (==':') texinputs) f + +readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m String +readFileFromDirs [] f = do + warning $ "Could not load include file " ++ f ++ ", skipping." + return "" +readFileFromDirs (d:ds) f = do + res <- readFileLazy' (d </> f) + case res of + Right s -> return s + Left _ -> readFileFromDirs ds f + +readFileLazy' :: PandocMonad m => FilePath -> m (Either PandocError String) +readFileLazy' f = catchError ((Right . UTF8.toStringLazy) <$> readFileLazy f) $ + \(e :: PandocError) -> return (Left e) ---- -keyval :: LP (String, String) +keyval :: PandocMonad m => LP m (String, String) keyval = try $ do key <- many1 alphaNum val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\') @@ -1055,25 +1003,25 @@ keyval = try $ do return (key, val) -keyvals :: LP [(String, String)] +keyvals :: PandocMonad m => LP m [(String, String)] keyvals = try $ char '[' *> manyTill keyval (char ']') -alltt :: String -> LP Blocks +alltt :: PandocMonad m => String -> LP m Blocks alltt t = walk strToCode <$> parseFromString blocks (substitute " " "\\ " $ substitute "%" "\\%" $ intercalate "\\\\\n" $ lines t) where strToCode (Str s) = Code nullAttr s strToCode x = x -rawLaTeXBlock :: LP String +rawLaTeXBlock :: PandocMonad m => LP m String rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) -rawLaTeXInline :: LP Inline +rawLaTeXInline :: PandocMonad m => LP m Inline rawLaTeXInline = do raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand) RawInline "latex" <$> applyMacros' raw -addImageCaption :: Blocks -> LP Blocks +addImageCaption :: PandocMonad m => Blocks -> LP m Blocks addImageCaption = walkM go where go (Image attr alt (src,tit)) = do mbcapt <- stateCaption <$> getState @@ -1082,7 +1030,7 @@ addImageCaption = walkM go Nothing -> Image attr alt (src,tit) go x = return x -addTableCaption :: Blocks -> LP Blocks +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks addTableCaption = walkM go where go (Table c als ws hs rs) = do mbcapt <- stateCaption <$> getState @@ -1091,7 +1039,7 @@ addTableCaption = walkM go Nothing -> Table c als ws hs rs go x = return x -environments :: M.Map String (LP Blocks) +environments :: PandocMonad m => M.Map String (LP m Blocks) environments = M.fromList [ ("document", env "document" blocks <* skipMany anyChar) , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) @@ -1159,7 +1107,7 @@ environments = M.fromList , ("alignat*", mathEnv para (Just "aligned") "alignat*") ] -letterContents :: LP Blocks +letterContents :: PandocMonad m => LP m Blocks letterContents = do bs <- blocks st <- getState @@ -1170,7 +1118,7 @@ letterContents = do _ -> mempty return $ addr <> bs -- sig added by \closing -closing :: LP Blocks +closing :: PandocMonad m => LP m Blocks closing = do contents <- tok st <- getState @@ -1184,17 +1132,17 @@ closing = do _ -> mempty return $ para (trimInlines contents) <> sigs -item :: LP Blocks +item :: PandocMonad m => LP m Blocks item = blocks *> controlSeq "item" *> skipopts *> blocks -looseItem :: LP Blocks +looseItem :: PandocMonad m => LP m Blocks looseItem = do ctx <- stateParserContext `fmap` getState if ctx == ListItemState then mzero else return mempty -descItem :: LP (Inlines, [Blocks]) +descItem :: PandocMonad m => LP m (Inlines, [Blocks]) descItem = do blocks -- skip blocks before item controlSeq "item" @@ -1203,12 +1151,12 @@ descItem = do bs <- blocks return (ils, [bs]) -env :: String -> LP a -> LP a +env :: PandocMonad m => String -> LP m a -> LP m a env name p = p <* (try (controlSeq "end" *> braced >>= guard . (== name)) <?> ("\\end{" ++ name ++ "}")) -listenv :: String -> LP a -> LP a +listenv :: PandocMonad m => String -> LP m a -> LP m a listenv name p = try $ do oldCtx <- stateParserContext `fmap` getState updateState $ \st -> st{ stateParserContext = ListItemState } @@ -1216,14 +1164,14 @@ listenv name p = try $ do updateState $ \st -> st{ stateParserContext = oldCtx } return res -mathEnv :: (Inlines -> a) -> Maybe String -> String -> LP a +mathEnv :: PandocMonad m => (Inlines -> a) -> Maybe String -> String -> LP m a mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name) where inner x = case innerEnv of Nothing -> x Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ "\\end{" ++ y ++ "}" -verbEnv :: String -> LP String +verbEnv :: PandocMonad m => String -> LP m String verbEnv name = do skipopts optional blankline @@ -1231,7 +1179,7 @@ verbEnv name = do res <- manyTill anyChar endEnv return $ stripTrailingNewlines res -fancyverbEnv :: String -> LP Blocks +fancyverbEnv :: PandocMonad m => String -> LP m Blocks fancyverbEnv name = do options <- option [] keyvals let kvs = [ (if k == "firstnumber" @@ -1242,7 +1190,7 @@ fancyverbEnv name = do let attr = ("",classes,kvs) codeBlockWith attr <$> verbEnv name -orderedList' :: LP Blocks +orderedList' :: PandocMonad m => LP m Blocks orderedList' = do optional sp (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ @@ -1259,19 +1207,20 @@ orderedList' = do bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs -paragraph :: LP Blocks +paragraph :: PandocMonad m => LP m Blocks paragraph = do x <- trimInlines . mconcat <$> many1 inline if x == mempty then return mempty else return $ para x -preamble :: LP Blocks +preamble :: PandocMonad m => LP m Blocks preamble = mempty <$> manyTill preambleBlock beginDoc where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" preambleBlock = void comment <|> void sp <|> void blanklines + <|> void include <|> void macro <|> void blockCommand <|> void anyControlSeq @@ -1292,7 +1241,7 @@ addSuffix s ks@(_:_) = in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] addSuffix _ _ = [] -simpleCiteArgs :: LP [Citation] +simpleCiteArgs :: PandocMonad m => LP m [Citation] simpleCiteArgs = try $ do first <- optionMaybe $ toList <$> opt second <- optionMaybe $ toList <$> opt @@ -1312,7 +1261,7 @@ simpleCiteArgs = try $ do } return $ addPrefix pre $ addSuffix suf $ map conv keys -citationLabel :: LP String +citationLabel :: PandocMonad m => LP m String citationLabel = optional sp *> (many1 (satisfy isBibtexKeyChar) <* optional sp @@ -1320,7 +1269,7 @@ citationLabel = optional sp *> <* optional sp) where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String) -cites :: CitationMode -> Bool -> LP [Citation] +cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] cites mode multi = try $ do cits <- if multi then many1 simpleCiteArgs @@ -1332,12 +1281,12 @@ cites mode multi = try $ do [] -> [] _ -> map (\a -> a {citationMode = mode}) cs -citation :: String -> CitationMode -> Bool -> LP Inlines +citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines citation name mode multi = do (c,raw) <- withRaw $ cites mode multi return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw) -complexNatbibCitation :: CitationMode -> LP Inlines +complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines complexNatbibCitation mode = try $ do let ils = (toList . trimInlines . mconcat) <$> many (notFollowedBy (oneOf "\\};") >> inline) @@ -1359,7 +1308,7 @@ complexNatbibCitation mode = try $ do -- tables -parseAligns :: LP [Alignment] +parseAligns :: PandocMonad m => LP m [Alignment] parseAligns = try $ do char '{' let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) @@ -1375,7 +1324,7 @@ parseAligns = try $ do spaces return aligns' -hline :: LP () +hline :: PandocMonad m => LP m () hline = try $ do spaces' controlSeq "hline" <|> @@ -1389,16 +1338,16 @@ hline = try $ do optional $ bracketed (many1 (satisfy (/=']'))) return () -lbreak :: LP () +lbreak :: PandocMonad m => LP m () lbreak = () <$ try (spaces' *> (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces') -amp :: LP () +amp :: PandocMonad m => LP m () amp = () <$ try (spaces' *> char '&' <* spaces') -parseTableRow :: Int -- ^ number of columns - -> LP [Blocks] +parseTableRow :: PandocMonad m => Int -- ^ number of columns + -> LP m [Blocks] parseTableRow cols = try $ do let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline let minipage = try $ controlSeq "begin" *> string "{minipage}" *> @@ -1415,10 +1364,10 @@ parseTableRow cols = try $ do spaces' return cells'' -spaces' :: LP () +spaces' :: PandocMonad m => LP m () spaces' = spaces *> skipMany (comment *> spaces) -simpTable :: Bool -> LP Blocks +simpTable :: PandocMonad m => Bool -> LP m Blocks simpTable hasWidthParameter = try $ do when hasWidthParameter $ () <$ (spaces' >> tok) skipopts @@ -1442,20 +1391,6 @@ simpTable hasWidthParameter = try $ do lookAhead $ controlSeq "end" -- make sure we're at end return $ table mempty (zip aligns (repeat 0)) header'' rows -startInclude :: LP Blocks -startInclude = do - fn <- braced - setPosition $ newPos fn 1 1 - return mempty - -endInclude :: LP Blocks -endInclude = do - fn <- braced - ln <- braced - co <- braced - setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co) - return mempty - removeDoubleQuotes :: String -> String removeDoubleQuotes ('"':xs) = case reverse xs of diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index cd35a8738..1d8f7c78e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,5 +1,6 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 {-# LANGUAGE ScopedTypeVariables #-} + {- Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> @@ -29,8 +30,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( readMarkdown, - readMarkdownWithWarnings ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown ) where import Data.List ( transpose, sortBy, findIndex, intercalate ) import qualified Data.Map as M @@ -61,28 +61,25 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockT import Control.Monad import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup -import qualified Data.Set as Set import Text.Printf (printf) -import Debug.Trace (trace) import Data.Monoid ((<>)) -import Text.Pandoc.Error +import Control.Monad.Trans (lift) +import Control.Monad.Except (throwError, catchError) +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P -type MarkdownParser = Parser [Char] ParserState +type MarkdownParser m = ParserT [Char] ParserState m -- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ReaderOptions -- ^ Reader options +readMarkdown :: PandocMonad m + => ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readMarkdown opts s = - (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") - --- | Read markdown from an input string and return a pair of a Pandoc document --- and a list of warnings. -readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError (Pandoc, [String]) -readMarkdownWithWarnings opts s = - (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + -> m Pandoc +readMarkdown opts s = do + parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + case parsed of + Right result -> return result + Left e -> throwError e trimInlinesF :: F Inlines -> F Inlines trimInlinesF = liftM trimInlines @@ -117,25 +114,25 @@ isBlank _ = False -- -- | Succeeds when we're in list context. -inList :: MarkdownParser () +inList :: PandocMonad m => MarkdownParser m () inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: Parser [Char] st () +spnl :: PandocMonad m => ParserT [Char] st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -indentSpaces :: MarkdownParser String +indentSpaces :: PandocMonad m => MarkdownParser m String indentSpaces = try $ do tabStop <- getOption readerTabStop count tabStop (char ' ') <|> string "\t" <?> "indentation" -nonindentSpaces :: MarkdownParser String +nonindentSpaces :: PandocMonad m => MarkdownParser m String nonindentSpaces = do tabStop <- getOption readerTabStop sps <- many (char ' ') @@ -144,17 +141,17 @@ nonindentSpaces = do else unexpected "indented line" -- returns number of spaces parsed -skipNonindentSpaces :: MarkdownParser Int +skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int skipNonindentSpaces = do tabStop <- getOption readerTabStop atMostSpaces (tabStop - 1) <* notFollowedBy (char ' ') -atMostSpaces :: Int -> MarkdownParser Int +atMostSpaces :: PandocMonad m => Int -> MarkdownParser m Int atMostSpaces n | n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0 | otherwise = return 0 -litChar :: MarkdownParser Char +litChar :: PandocMonad m => MarkdownParser m Char litChar = escapedChar' <|> characterReference <|> noneOf "\n" @@ -162,14 +159,14 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: MarkdownParser (F Inlines) +inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) inlinesInBalancedBrackets = do char '[' (_, raw) <- withRaw $ charsInBalancedBrackets 1 guard $ not $ null raw parseFromString (trimInlinesF . mconcat <$> many inline) (init raw) -charsInBalancedBrackets :: Int -> MarkdownParser () +charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m () charsInBalancedBrackets 0 = return () charsInBalancedBrackets openBrackets = (char '[' >> charsInBalancedBrackets (openBrackets + 1)) @@ -185,7 +182,7 @@ charsInBalancedBrackets openBrackets = -- document structure -- -rawTitleBlockLine :: MarkdownParser String +rawTitleBlockLine :: PandocMonad m => MarkdownParser m String rawTitleBlockLine = do char '%' skipSpaces @@ -196,13 +193,13 @@ rawTitleBlockLine = do anyLine return $ trim $ unlines (first:rest) -titleLine :: MarkdownParser (F Inlines) +titleLine :: PandocMonad m => MarkdownParser m (F Inlines) titleLine = try $ do raw <- rawTitleBlockLine res <- parseFromString (many inline) raw return $ trimInlinesF $ mconcat res -authorsLine :: MarkdownParser (F [Inlines]) +authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines]) authorsLine = try $ do raw <- rawTitleBlockLine let sep = (char ';' <* spaces) <|> newline @@ -212,16 +209,16 @@ authorsLine = try $ do sep sequence <$> parseFromString pAuthors raw -dateLine :: MarkdownParser (F Inlines) +dateLine :: PandocMonad m => MarkdownParser m (F Inlines) dateLine = try $ do raw <- rawTitleBlockLine res <- parseFromString (many inline) raw return $ trimInlinesF $ mconcat res -titleBlock :: MarkdownParser () +titleBlock :: PandocMonad m => MarkdownParser m () titleBlock = pandocTitleBlock <|> mmdTitleBlock -pandocTitleBlock :: MarkdownParser () +pandocTitleBlock :: PandocMonad m => MarkdownParser m () pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') @@ -239,7 +236,15 @@ pandocTitleBlock = try $ do $ nullMeta updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } -yamlMetaBlock :: MarkdownParser (F Blocks) + +-- Adapted from solution at +-- http://stackoverflow.com/a/29448764/1901888 +foldrWithKeyM :: Monad m => (k -> b -> a -> m a) -> a -> H.HashMap k b -> m a +foldrWithKeyM f acc = H.foldrWithKey f' (return acc) + where + f' k b ma = ma >>= \a -> f k b a + +yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks) yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition @@ -252,18 +257,20 @@ yamlMetaBlock = try $ do optional blanklines opts <- stateOptions <$> getState meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> return $ return $ - H.foldrWithKey (\k v m -> - if ignorable k - then m - else case yamlToMeta opts v of - Left _ -> m - Right v' -> B.setMeta (T.unpack k) v' m) - nullMeta hashmap - Right Yaml.Null -> return $ return nullMeta + Right (Yaml.Object hashmap) -> + foldrWithKeyM + (\k v m -> do + if ignorable k + then return m + else (do v' <- lift $ yamlToMeta opts v + return $ B.setMeta (T.unpack k) v' m) + `catchError` + (\_ -> return m) + ) nullMeta hashmap + Right Yaml.Null -> return nullMeta Right _ -> do - addWarning (Just pos) "YAML header is not an object" - return $ return nullMeta + P.warningWithPos pos "YAML header is not an object" + return nullMeta Left err' -> do case err' of InvalidYaml (Just YamlParseException{ @@ -273,24 +280,24 @@ yamlMetaBlock = try $ do yamlLine = yline , yamlColumn = ycol }}) -> - addWarning (Just $ setSourceLine + P.warningWithPos (setSourceLine (setSourceColumn pos (sourceColumn pos + ycol)) (sourceLine pos + 1 + yline)) $ "Could not parse YAML header: " ++ problem - _ -> addWarning (Just pos) + _ -> P.warningWithPos pos $ "Could not parse YAML header: " ++ show err' - return $ return nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + return nullMeta + updateState $ \st -> st{ stateMeta' = stateMeta' st <> (return meta') } return mempty -- ignore fields ending with _ ignorable :: Text -> Bool ignorable t = (T.pack "_") `T.isSuffixOf` t -toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue +toMetaValue :: PandocMonad m => ReaderOptions -> Text -> m MetaValue toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) where toMeta p = @@ -301,13 +308,13 @@ toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) | otherwise -> MetaInlines xs Pandoc _ bs -> MetaBlocks bs endsWithNewline t = T.pack "\n" `T.isSuffixOf` t - opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} - meta_exts = Set.fromList [ Ext_pandoc_title_block - , Ext_mmd_title_block - , Ext_yaml_metadata_block - ] + opts' = opts{readerExtensions = + disableExtension Ext_pandoc_title_block $ + disableExtension Ext_mmd_title_block $ + disableExtension Ext_yaml_metadata_block $ + readerExtensions opts } -yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue +yamlToMeta :: PandocMonad m => ReaderOptions -> Yaml.Value -> m MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t yamlToMeta _ (Yaml.Number n) -- avoid decimal points for numbers that don't need them: @@ -327,10 +334,10 @@ yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m -> (return M.empty) o yamlToMeta _ _ = return $ MetaString "" -stopLine :: MarkdownParser () +stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () -mmdTitleBlock :: MarkdownParser () +mmdTitleBlock :: PandocMonad m => MarkdownParser m () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block firstPair <- kvPair False @@ -340,7 +347,7 @@ mmdTitleBlock = try $ do updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: Bool -> MarkdownParser (String, MetaValue) +kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue) kvPair allowEmpty = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') val <- trim <$> manyTill anyChar @@ -350,7 +357,7 @@ kvPair allowEmpty = try $ do let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val return (key',val') -parseMarkdown :: MarkdownParser Pandoc +parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc parseMarkdown = do -- markdown allows raw HTML updateState $ \state -> state { stateOptions = @@ -375,7 +382,7 @@ softBreakFilter (x:SoftBreak:y:zs) = _ -> x:SoftBreak:y:zs softBreakFilter xs = xs -referenceKey :: MarkdownParser (F Blocks) +referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do pos <- getPosition skipNonindentSpaces @@ -402,18 +409,18 @@ referenceKey = try $ do let oldkeys = stateKeys st let key = toKey raw case M.lookup key oldkeys of - Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" + Just _ -> P.warningWithPos pos $ "Duplicate link reference `" ++ raw ++ "'" Nothing -> return () updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty -referenceTitle :: MarkdownParser String +referenceTitle :: PandocMonad m => MarkdownParser m String referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar -- A link title in quotes -quotedTitle :: Char -> MarkdownParser String +quotedTitle :: PandocMonad m => Char -> MarkdownParser m String quotedTitle c = try $ do char c notFollowedBy spaces @@ -425,7 +432,7 @@ quotedTitle c = try $ do -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for -- an abbreviation. -abbrevKey :: MarkdownParser (F Blocks) +abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks) abbrevKey = do guardEnabled Ext_abbreviations try $ do @@ -436,23 +443,23 @@ abbrevKey = do blanklines return $ return mempty -noteMarker :: MarkdownParser String +noteMarker :: PandocMonad m => MarkdownParser m String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') -rawLine :: MarkdownParser String +rawLine :: PandocMonad m => MarkdownParser m String rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: MarkdownParser String +rawLines :: PandocMonad m => MarkdownParser m String rawLines = do first <- anyLine rest <- many rawLine return $ unlines (first:rest) -noteBlock :: MarkdownParser (F Blocks) +noteBlock :: PandocMonad m => MarkdownParser m (F Blocks) noteBlock = try $ do pos <- getPosition skipNonindentSpaces @@ -468,7 +475,7 @@ noteBlock = try $ do let newnote = (ref, parsed) oldnotes <- stateNotes' <$> getState case lookup ref oldnotes of - Just _ -> addWarning (Just pos) $ "Duplicate note reference `" ++ ref ++ "'" + Just _ -> P.warningWithPos pos $ "Duplicate note reference `" ++ ref ++ "'" Nothing -> return () updateState $ \s -> s { stateNotes' = newnote : oldnotes } return mempty @@ -477,12 +484,11 @@ noteBlock = try $ do -- parsing blocks -- -parseBlocks :: MarkdownParser (F Blocks) +parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks) parseBlocks = mconcat <$> manyTill block eof -block :: MarkdownParser (F Blocks) +block :: PandocMonad m => MarkdownParser m (F Blocks) block = do - tr <- getOption readerTrace pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced @@ -509,26 +515,25 @@ block = do , para , plain ] <?> "block" - when tr $ do - st <- getState - trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList $ runF res st)) (return ()) + report DEBUG $ printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList $ runF res defaultParserState) return res -- -- header blocks -- -header :: MarkdownParser (F Blocks) +header :: PandocMonad m => MarkdownParser m (F Blocks) header = setextHeader <|> atxHeader <?> "header" -atxChar :: MarkdownParser Char +atxChar :: PandocMonad m => MarkdownParser m Char atxChar = do exts <- getOption readerExtensions - return $ if Set.member Ext_literate_haskell exts - then '=' else '#' + return $ if extensionEnabled Ext_literate_haskell exts + then '=' + else '#' -atxHeader :: MarkdownParser (F Blocks) +atxHeader :: PandocMonad m => MarkdownParser m (F Blocks) atxHeader = try $ do level <- atxChar >>= many1 . char >>= return . length notFollowedBy $ guardEnabled Ext_fancy_lists >> @@ -542,7 +547,7 @@ atxHeader = try $ do <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -atxClosing :: MarkdownParser Attr +atxClosing :: PandocMonad m => MarkdownParser m Attr atxClosing = try $ do attr' <- option nullAttr (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) @@ -553,7 +558,7 @@ atxClosing = try $ do blanklines return attr -setextHeaderEnd :: MarkdownParser Attr +setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr setextHeaderEnd = try $ do attr <- option nullAttr $ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) @@ -561,13 +566,13 @@ setextHeaderEnd = try $ do blanklines return attr -mmdHeaderIdentifier :: MarkdownParser Attr +mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr mmdHeaderIdentifier = do ident <- stripFirstAndLast . snd <$> reference skipSpaces return (ident,[],[]) -setextHeader :: MarkdownParser (F Blocks) +setextHeader :: PandocMonad m => MarkdownParser m (F Blocks) setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. @@ -585,7 +590,7 @@ setextHeader = try $ do <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: String -> Attr -> MarkdownParser () +registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m () registerImplicitHeader raw attr@(ident, _, _) = do let key = toKey $ "[" ++ raw ++ "]" updateState (\s -> s { stateHeaderKeys = @@ -595,7 +600,7 @@ registerImplicitHeader raw attr@(ident, _, _) = do -- hrule block -- -hrule :: Parser [Char] st (F Blocks) +hrule :: PandocMonad m => ParserT [Char] st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -609,12 +614,13 @@ hrule = try $ do -- code blocks -- -indentedLine :: MarkdownParser String +indentedLine :: PandocMonad m => MarkdownParser m String indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") -blockDelimiter :: (Char -> Bool) +blockDelimiter :: PandocMonad m + => (Char -> Bool) -> Maybe Int - -> Parser [Char] st Int + -> ParserT [Char] st m Int blockDelimiter f len = try $ do c <- lookAhead (satisfy f) case len of @@ -622,7 +628,7 @@ blockDelimiter f len = try $ do Nothing -> count 3 (char c) >> many (char c) >>= return . (+ 3) . length -attributes :: MarkdownParser Attr +attributes :: PandocMonad m => MarkdownParser m Attr attributes = try $ do char '{' spnl @@ -630,28 +636,28 @@ attributes = try $ do char '}' return $ foldl (\x f -> f x) nullAttr attrs -attribute :: MarkdownParser (Attr -> Attr) +attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr -identifier :: MarkdownParser String +identifier :: PandocMonad m => MarkdownParser m String identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." return (first:rest) -identifierAttr :: MarkdownParser (Attr -> Attr) +identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) identifierAttr = try $ do char '#' result <- identifier return $ \(_,cs,kvs) -> (result,cs,kvs) -classAttr :: MarkdownParser (Attr -> Attr) +classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) classAttr = try $ do char '.' result <- identifier return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs) -keyValAttr :: MarkdownParser (Attr -> Attr) +keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) keyValAttr = try $ do key <- identifier char '=' @@ -664,12 +670,12 @@ keyValAttr = try $ do "class" -> (id',cs ++ words val,kvs) _ -> (id',cs,kvs ++ [(key,val)]) -specialAttr :: MarkdownParser (Attr -> Attr) +specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -codeBlockFenced :: MarkdownParser (F Blocks) +codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) @@ -690,7 +696,7 @@ toLanguageId = map toLower . go go "objective-c" = "objectivec" go x = x -codeBlockIndented :: MarkdownParser (F Blocks) +codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -701,7 +707,7 @@ codeBlockIndented = do return $ return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: MarkdownParser (F Blocks) +lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks) lhsCodeBlock = do guardEnabled Ext_literate_haskell (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> @@ -709,7 +715,7 @@ lhsCodeBlock = do <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: MarkdownParser String +lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline @@ -717,13 +723,13 @@ lhsCodeBlockLaTeX = try $ do blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: MarkdownParser String +lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: MarkdownParser String +lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: Char -> MarkdownParser String +lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -735,7 +741,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> Parser [Char] st String +birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -746,10 +752,10 @@ birdTrackLine c = try $ do -- block quotes -- -emailBlockQuoteStart :: MarkdownParser Char +emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ') -emailBlockQuote :: MarkdownParser [String] +emailBlockQuote :: PandocMonad m => MarkdownParser m [String] emailBlockQuote = try $ do emailBlockQuoteStart let emailLine = many $ nonEndline <|> try @@ -763,7 +769,7 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: MarkdownParser (F Blocks) +blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: @@ -774,7 +780,7 @@ blockQuote = do -- list blocks -- -bulletListStart :: MarkdownParser () +bulletListStart :: PandocMonad m => MarkdownParser m () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context startpos <- sourceColumn <$> getPosition @@ -786,7 +792,7 @@ bulletListStart = try $ do lookAhead (newline <|> spaceChar) () <$ atMostSpaces (tabStop - (endpos - startpos)) -anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart :: PandocMonad m => MarkdownParser m (Int, ListNumberStyle, ListNumberDelim) anyOrderedListStart = try $ do optional newline -- if preceded by a Plain block in a list context startpos <- sourceColumn <$> getPosition @@ -810,10 +816,10 @@ anyOrderedListStart = try $ do atMostSpaces (tabStop - (endpos - startpos)) return res -listStart :: MarkdownParser () +listStart :: PandocMonad m => MarkdownParser m () listStart = bulletListStart <|> (anyOrderedListStart >> return ()) -listLine :: MarkdownParser String +listLine :: PandocMonad m => MarkdownParser m String listLine = try $ do notFollowedBy' (do indentSpaces many spaceChar @@ -822,7 +828,7 @@ listLine = try $ do optional (() <$ indentSpaces) listLineCommon -listLineCommon :: MarkdownParser String +listLineCommon :: PandocMonad m => MarkdownParser m String listLineCommon = concat <$> manyTill ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') <|> liftM snd (htmlTag isCommentTag) @@ -830,8 +836,9 @@ listLineCommon = concat <$> manyTill ) newline -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: MarkdownParser a - -> MarkdownParser String +rawListItem :: PandocMonad m + => MarkdownParser m a + -> MarkdownParser m String rawListItem start = try $ do start first <- listLineCommon @@ -842,21 +849,21 @@ rawListItem start = try $ do -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: MarkdownParser String +listContinuation :: PandocMonad m => MarkdownParser m String listContinuation = try $ do lookAhead indentSpaces result <- many1 listContinuationLine blanks <- many blankline return $ concat result ++ blanks -notFollowedByHtmlCloser :: MarkdownParser () +notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m () notFollowedByHtmlCloser = do inHtmlBlock <- stateInHtmlBlock <$> getState case inHtmlBlock of Just t -> notFollowedBy' $ htmlTag (~== TagClose t) Nothing -> return () -listContinuationLine :: MarkdownParser String +listContinuationLine :: PandocMonad m => MarkdownParser m String listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart @@ -865,8 +872,9 @@ listContinuationLine = try $ do result <- anyLine return $ result ++ "\n" -listItem :: MarkdownParser a - -> MarkdownParser (F Blocks) +listItem :: PandocMonad m + => MarkdownParser m a + -> MarkdownParser m (F Blocks) listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -882,7 +890,7 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: MarkdownParser (F Blocks) +orderedList :: PandocMonad m => MarkdownParser m (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart unless (style `elem` [DefaultStyle, Decimal, Example] && @@ -901,16 +909,16 @@ orderedList = try $ do atMostSpaces (tabStop - (endpos - startpos)) return res ) start' <- option 1 $ guardEnabled Ext_startnum >> return start - return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items + return $ B.orderedListWith (start', style, delim) <$> fmap compactify items -bulletList :: MarkdownParser (F Blocks) +bulletList :: PandocMonad m => MarkdownParser m (F Blocks) bulletList = do items <- fmap sequence $ many1 $ listItem bulletListStart - return $ B.bulletList <$> fmap compactify' items + return $ B.bulletList <$> fmap compactify items -- definition lists -defListMarker :: MarkdownParser () +defListMarker :: PandocMonad m => MarkdownParser m () defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' @@ -921,7 +929,7 @@ defListMarker = do else mzero return () -definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks])) +definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks])) definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact @@ -930,7 +938,7 @@ definitionListItem compact = try $ do optional blanklines return $ liftM2 (,) term (sequence contents) -defRawBlock :: Bool -> MarkdownParser String +defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String defRawBlock compact = try $ do hasBlank <- option False $ blankline >> return True defListMarker @@ -952,7 +960,7 @@ defRawBlock compact = try $ do return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" -definitionList :: MarkdownParser (F Blocks) +definitionList :: PandocMonad m => MarkdownParser m (F Blocks) definitionList = try $ do lookAhead (anyLine >> optional (blankline >> notFollowedBy (table >> return ())) >> @@ -960,13 +968,13 @@ definitionList = try $ do defListMarker) compactDefinitionList <|> normalDefinitionList -compactDefinitionList :: MarkdownParser (F Blocks) +compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks) compactDefinitionList = do guardEnabled Ext_compact_definition_lists items <- fmap sequence $ many1 $ definitionListItem True - return $ B.definitionList <$> fmap compactify'DL items + return $ B.definitionList <$> fmap compactifyDL items -normalDefinitionList :: MarkdownParser (F Blocks) +normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks) normalDefinitionList = do guardEnabled Ext_definition_lists items <- fmap sequence $ many1 $ definitionListItem False @@ -976,7 +984,7 @@ normalDefinitionList = do -- paragraph block -- -para :: MarkdownParser (F Blocks) +para :: PandocMonad m => MarkdownParser m (F Blocks) para = try $ do exts <- getOption readerExtensions result <- trimInlinesF . mconcat <$> many1 inline @@ -1001,25 +1009,25 @@ para = try $ do result' <- result case B.toList result' of [Image attr alt (src,tit)] - | Ext_implicit_figures `Set.member` exts -> + | Ext_implicit_figures `extensionEnabled` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton $ Image attr alt (src,'f':'i':'g':':':tit) _ -> return $ B.para result' -plain :: MarkdownParser (F Blocks) +plain :: PandocMonad m => MarkdownParser m (F Blocks) plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline -- -- raw html -- -htmlElement :: MarkdownParser String +htmlElement :: PandocMonad m => MarkdownParser m String htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: MarkdownParser (F Blocks) +htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks) htmlBlock = do guardEnabled Ext_raw_html try (do @@ -1044,24 +1052,24 @@ htmlBlock = do <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)) <|> htmlBlock' -htmlBlock' :: MarkdownParser (F Blocks) +htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks) htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines return $ return $ B.rawBlock "html" first -strictHtmlBlock :: MarkdownParser String +strictHtmlBlock :: PandocMonad m => MarkdownParser m String strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: MarkdownParser String +rawVerbatimBlock :: PandocMonad m => MarkdownParser m String rawVerbatimBlock = htmlInBalanced isVerbTag where isVerbTag (TagOpen "pre" _) = True isVerbTag (TagOpen "style" _) = True isVerbTag (TagOpen "script" _) = True isVerbTag _ = False -rawTeXBlock :: MarkdownParser (F Blocks) +rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex result <- (B.rawBlock "latex" . concat <$> @@ -1071,7 +1079,7 @@ rawTeXBlock = do spaces return $ return result -rawHtmlBlocks :: MarkdownParser (F Blocks) +rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag -- try to find closing tag @@ -1101,7 +1109,7 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s -- line block -- -lineBlock :: MarkdownParser (F Blocks) +lineBlock :: PandocMonad m => MarkdownParser m (F Blocks) lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= @@ -1114,8 +1122,9 @@ lineBlock = try $ do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Char - -> Parser [Char] st (Int, Int) +dashedLine :: PandocMonad m + => Char + -> ParserT [Char] st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1125,8 +1134,9 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. -simpleTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) +simpleTableHeader :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m (F [Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -1170,16 +1180,17 @@ alignType strLst len = (False, False) -> AlignDefault -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: MarkdownParser String +tableFooter :: PandocMonad m => MarkdownParser m String tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep :: MarkdownParser Char +tableSep :: PandocMonad m => MarkdownParser m Char tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. -rawTableLine :: [Int] - -> MarkdownParser [String] +rawTableLine :: PandocMonad m + => [Int] + -> MarkdownParser m [String] rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline @@ -1187,14 +1198,16 @@ rawTableLine indices = do splitStringByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). -tableLine :: [Int] - -> MarkdownParser (F [Blocks]) +tableLine :: PandocMonad m + => [Int] + -> MarkdownParser m (F [Blocks]) tableLine indices = rawTableLine indices >>= fmap sequence . mapM (parseFromString (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). -multilineRow :: [Int] - -> MarkdownParser (F [Blocks]) +multilineRow :: PandocMonad m + => [Int] + -> MarkdownParser m (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines @@ -1202,7 +1215,7 @@ multilineRow indices = do -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: MarkdownParser (F Inlines) +tableCaption :: PandocMonad m => MarkdownParser m (F Inlines) tableCaption = try $ do guardEnabled Ext_table_captions skipNonindentSpaces @@ -1210,8 +1223,9 @@ tableCaption = try $ do trimInlinesF . mconcat <$> many1 inline <* blanklines -- Parse a simple table with '---' header and one line per row. -simpleTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +simpleTable :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) simpleTable headless = do (aligns, _widths, heads', lines') <- tableWith (simpleTableHeader headless) tableLine @@ -1224,13 +1238,15 @@ simpleTable headless = do -- (which may be multiline), then the rows, -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -multilineTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +multilineTable :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) multilineTable headless = tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter -multilineTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) +multilineTableHeader :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do unless headless $ tableSep >> notFollowedBy blankline @@ -1261,8 +1277,8 @@ multilineTableHeader headless = try $ do -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +gridTable :: PandocMonad m => Bool -- ^ Headerless table + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) gridTable headless = tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter @@ -1271,7 +1287,7 @@ gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ trimr line -gridPart :: Char -> Parser [Char] st ((Int, Int), Alignment) +gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment) gridPart ch = do leftColon <- option False (True <$ char ':') dashes <- many1 (char ch) @@ -1286,7 +1302,7 @@ gridPart ch = do (False, False) -> AlignDefault return ((lengthDashes, lengthDashes + 1), alignment) -gridDashedLines :: Char -> Parser [Char] st [((Int, Int), Alignment)] +gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String @@ -1294,12 +1310,12 @@ removeFinalBar = reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. -gridTableSep :: Char -> MarkdownParser Char +gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. -gridTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) +gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table + -> MarkdownParser m (F [Blocks], [Alignment], [Int]) gridTableHeader headless = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -1320,20 +1336,20 @@ gridTableHeader headless = try $ do heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads return (heads, aligns, indices) -gridTableRawLine :: [Int] -> MarkdownParser [String] +gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String] gridTableRawLine indices = do char '|' line <- anyLine return (gridTableSplitLine indices line) -- | Parse row of grid table. -gridTableRow :: [Int] - -> MarkdownParser (F [Blocks]) +gridTableRow :: PandocMonad m => [Int] + -> MarkdownParser m (F [Blocks]) gridTableRow indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols) + fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols) removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -1344,10 +1360,10 @@ removeOneLeadingSpace xs = startsWithSpace (y:_) = y == ' ' -- | Parse footer for a grid table. -gridTableFooter :: MarkdownParser [Char] +gridTableFooter :: PandocMonad m => MarkdownParser m [Char] gridTableFooter = blanklines -pipeBreak :: MarkdownParser ([Alignment], [Int]) +pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak = try $ do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1359,7 +1375,7 @@ pipeBreak = try $ do blankline return $ unzip (first:rest) -pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar @@ -1377,13 +1393,13 @@ pipeTable = try $ do else replicate (length aligns) 0.0 return $ (aligns, widths, heads', sequence lines'') -sepPipe :: MarkdownParser () +sepPipe :: PandocMonad m => MarkdownParser m () sepPipe = try $ do char '|' <|> char '+' notFollowedBy blankline -- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: MarkdownParser (F [Blocks]) +pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks]) pipeTableRow = try $ do scanForPipe skipMany spaceChar @@ -1399,14 +1415,14 @@ pipeTableRow = try $ do blankline return $ sequence cells -pipeTableCell :: MarkdownParser (F Blocks) +pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks) pipeTableCell = do result <- many inline if null result then return mempty else return $ B.plain . mconcat <$> sequence result -pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1422,7 +1438,7 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter), len) -- Succeed only if current line contains a pipe. -scanForPipe :: Parser [Char] st () +scanForPipe :: PandocMonad m => ParserT [Char] st m () scanForPipe = do inp <- getInput case break (\c -> c == '\n' || c == '|') inp of @@ -1432,11 +1448,12 @@ scanForPipe = do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in -- Text.Pandoc.Parsing. -tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int]) - -> ([Int] -> MarkdownParser (F [Blocks])) - -> MarkdownParser sep - -> MarkdownParser end - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +tableWith :: PandocMonad m + => MarkdownParser m (F [Blocks], [Alignment], [Int]) + -> ([Int] -> MarkdownParser m (F [Blocks])) + -> MarkdownParser m sep + -> MarkdownParser m end + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser @@ -1447,7 +1464,7 @@ tableWith headerParser rowParser lineParser footerParser = try $ do else widthsFromIndices numColumns indices return $ (aligns, widths, heads, lines') -table :: MarkdownParser (F Blocks) +table :: PandocMonad m => MarkdownParser m (F Blocks) table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) (aligns, widths, heads, lns) <- @@ -1479,7 +1496,7 @@ table = try $ do -- inline -- -inline :: MarkdownParser (F Inlines) +inline :: PandocMonad m => MarkdownParser m (F Inlines) inline = choice [ whitespace , bareURL , str @@ -1509,7 +1526,7 @@ inline = choice [ whitespace , ltSign ] <?> "inline" -escapedChar' :: MarkdownParser Char +escapedChar' :: PandocMonad m => MarkdownParser m Char escapedChar' = try $ do char '\\' (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) @@ -1518,7 +1535,7 @@ escapedChar' = try $ do <|> (guardEnabled Ext_escaped_line_breaks >> char '\n') <|> oneOf "\\`*_{}[]()>#+-.!~\"" -escapedChar :: MarkdownParser (F Inlines) +escapedChar :: PandocMonad m => MarkdownParser m (F Inlines) escapedChar = do result <- escapedChar' case result of @@ -1527,14 +1544,14 @@ escapedChar = do return (return B.linebreak) -- "\[newline]" is a linebreak _ -> return $ return $ B.str [result] -ltSign :: MarkdownParser (F Inlines) +ltSign :: PandocMonad m => MarkdownParser m (F Inlines) ltSign = do guardDisabled Ext_raw_html <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag)) char '<' return $ return $ B.str "<" -exampleRef :: MarkdownParser (F Inlines) +exampleRef :: PandocMonad m => MarkdownParser m (F Inlines) exampleRef = try $ do guardEnabled Ext_example_lists char '@' @@ -1545,7 +1562,7 @@ exampleRef = try $ do Just n -> B.str (show n) Nothing -> B.str ('@':lab) -symbol :: MarkdownParser (F Inlines) +symbol :: PandocMonad m => MarkdownParser m (F Inlines) symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' @@ -1554,7 +1571,7 @@ symbol = do return $ return $ B.str [result] -- parses inline code, between n `s and n `s -code :: MarkdownParser (F Inlines) +code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces @@ -1566,16 +1583,17 @@ code = try $ do >> attributes) return $ return $ B.codeWith attr $ trim $ concat result -math :: MarkdownParser (F Inlines) +math :: PandocMonad m => MarkdownParser m (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> - ((getOption readerSmart >>= guard) *> (return <$> apostrophe) + (guardEnabled Ext_smart *> (return <$> apostrophe) <* notFollowedBy (space <|> satisfy isPunctuation)) -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. -enclosure :: Char - -> MarkdownParser (F Inlines) +enclosure :: PandocMonad m + => Char + -> MarkdownParser m (F Inlines) enclosure c = do -- we can't start an enclosure with _ if after a string and -- the intraword_underscores extension is enabled: @@ -1591,7 +1609,7 @@ enclosure c = do 1 -> one c mempty _ -> return (return $ B.str cs) -ender :: Char -> Int -> MarkdownParser () +ender :: PandocMonad m => Char -> Int -> MarkdownParser m () ender c n = try $ do count n (char c) guard (c == '*') @@ -1602,7 +1620,7 @@ ender c n = try $ do -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. -- Otherwise, emit ccc then the results. -three :: Char -> MarkdownParser (F Inlines) +three :: PandocMonad m => Char -> MarkdownParser m (F Inlines) three c = do contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) (ender c 3 >> return ((B.strong . B.emph) <$> contents)) @@ -1612,7 +1630,7 @@ three c = do -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. -two :: Char -> F Inlines -> MarkdownParser (F Inlines) +two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) (ender c 2 >> return (B.strong <$> (prefix' <> contents))) @@ -1620,7 +1638,7 @@ two c prefix' = do -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. -one :: Char -> F Inlines -> MarkdownParser (F Inlines) +one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline) <|> try (string [c,c] >> @@ -1629,52 +1647,53 @@ one c prefix' = do (ender c 1 >> return (B.emph <$> (prefix' <> contents))) <|> return (return (B.str [c]) <> (prefix' <> contents)) -strongOrEmph :: MarkdownParser (F Inlines) +strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) strongOrEmph = enclosure '*' <|> enclosure '_' -- | Parses a list of inlines between start and end delimiters. -inlinesBetween :: (Show b) - => MarkdownParser a - -> MarkdownParser b - -> MarkdownParser (F Inlines) +inlinesBetween :: PandocMonad m + => (Show b) + => MarkdownParser m a + -> MarkdownParser m b + -> MarkdownParser m (F Inlines) inlinesBetween start end = (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace <* notFollowedBy' end -strikeout :: MarkdownParser (F Inlines) +strikeout :: PandocMonad m => MarkdownParser m (F Inlines) strikeout = fmap B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: MarkdownParser (F Inlines) +superscript :: PandocMonad m => MarkdownParser m (F Inlines) superscript = fmap B.superscript <$> try (do guardEnabled Ext_superscript char '^' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: MarkdownParser (F Inlines) +subscript :: PandocMonad m => MarkdownParser m (F Inlines) subscript = fmap B.subscript <$> try (do guardEnabled Ext_subscript char '~' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: MarkdownParser (F Inlines) +whitespace :: PandocMonad m => MarkdownParser m (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: Parser [Char] st Char +nonEndline :: PandocMonad m => ParserT [Char] st m Char nonEndline = satisfy (/='\n') -str :: MarkdownParser (F Inlines) +str :: PandocMonad m => MarkdownParser m (F Inlines) str = do result <- many1 alphaNum updateLastStrPos let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) - isSmart <- getOption readerSmart + isSmart <- extensionEnabled Ext_smart <$> getOption readerExtensions if isSmart then case likelyAbbrev result of [] -> return $ return $ B.str result @@ -1699,7 +1718,7 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: MarkdownParser (F Inlines) +endline :: PandocMonad m => MarkdownParser m (F Inlines) endline = try $ do newline notFollowedBy blankline @@ -1721,17 +1740,17 @@ endline = try $ do -- -- a reference label for a link -reference :: MarkdownParser (F Inlines, String) +reference :: PandocMonad m => MarkdownParser m (F Inlines, String) reference = do notFollowedBy' (string "[^") -- footnote reference withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -parenthesizedChars :: MarkdownParser [Char] +parenthesizedChars :: PandocMonad m => MarkdownParser m [Char] parenthesizedChars = do result <- charsInBalanced '(' ')' litChar return $ '(' : result ++ ")" -- source for a link, with optional title -source :: MarkdownParser (String, String) +source :: PandocMonad m => MarkdownParser m (String, String) source = do char '(' skipSpaces @@ -1748,10 +1767,10 @@ source = do char ')' return (escapeURI $ trimr src, tit) -linkTitle :: MarkdownParser String +linkTitle :: PandocMonad m => MarkdownParser m String linkTitle = quotedTitle '"' <|> quotedTitle '\'' -link :: MarkdownParser (F Inlines) +link :: PandocMonad m => MarkdownParser m (F Inlines) link = try $ do st <- getState guard $ stateAllowLinks st @@ -1760,7 +1779,7 @@ link = try $ do setState $ st{ stateAllowLinks = True } regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) -bracketedSpan :: MarkdownParser (F Inlines) +bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines) bracketedSpan = try $ do guardEnabled Ext_bracketed_spans (lab,_) <- reference @@ -1773,8 +1792,10 @@ bracketedSpan = try $ do -> return $ B.smallcaps <$> lab _ -> return $ B.spanWith attr <$> lab -regLink :: (Attr -> String -> String -> Inlines -> Inlines) - -> F Inlines -> MarkdownParser (F Inlines) +regLink :: PandocMonad m + => (Attr -> String -> String -> Inlines -> Inlines) + -> F Inlines + -> MarkdownParser m (F Inlines) regLink constructor lab = try $ do (src, tit) <- source attr <- option nullAttr $ @@ -1782,8 +1803,10 @@ regLink constructor lab = try $ do return $ constructor attr src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: (Attr -> String -> String -> Inlines -> Inlines) - -> (F Inlines, String) -> MarkdownParser (F Inlines) +referenceLink :: PandocMonad m + => (Attr -> String -> String -> Inlines -> Inlines) + -> (F Inlines, String) + -> MarkdownParser m (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False (_,raw') <- option (mempty, "") $ @@ -1824,7 +1847,7 @@ dropBrackets = reverse . dropRB . reverse . dropLB dropLB ('[':xs) = xs dropLB xs = xs -bareURL :: MarkdownParser (F Inlines) +bareURL :: PandocMonad m => MarkdownParser m (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris getState >>= guard . stateAllowLinks @@ -1832,7 +1855,7 @@ bareURL = try $ do notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") return $ return $ B.link src "" (B.str orig) -autoLink :: MarkdownParser (F Inlines) +autoLink :: PandocMonad m => MarkdownParser m (F Inlines) autoLink = try $ do getState >>= guard . stateAllowLinks char '<' @@ -1846,7 +1869,7 @@ autoLink = try $ do guardEnabled Ext_link_attributes >> attributes return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra) -image :: MarkdownParser (F Inlines) +image :: PandocMonad m => MarkdownParser m (F Inlines) image = try $ do char '!' (lab,raw) <- reference @@ -1856,7 +1879,7 @@ image = try $ do _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) -note :: MarkdownParser (F Inlines) +note :: PandocMonad m => MarkdownParser m (F Inlines) note = try $ do guardEnabled Ext_footnotes ref <- noteMarker @@ -1872,14 +1895,14 @@ note = try $ do let contents' = runF contents st{ stateNotes' = [] } return $ B.note contents' -inlineNote :: MarkdownParser (F Inlines) +inlineNote :: PandocMonad m => MarkdownParser m (F Inlines) inlineNote = try $ do guardEnabled Ext_inline_notes char '^' contents <- inlinesInBalancedBrackets return $ B.note . B.para <$> contents -rawLaTeXInline' :: MarkdownParser (F Inlines) +rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines) rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env @@ -1887,7 +1910,7 @@ rawLaTeXInline' = try $ do return $ return $ B.rawInline "tex" s -- "tex" because it might be context or latex -rawConTeXtEnvironment :: Parser [Char] st String +rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1896,14 +1919,14 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String +inBrackets :: PandocMonad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -spanHtml :: MarkdownParser (F Inlines) +spanHtml :: PandocMonad m => MarkdownParser m (F Inlines) spanHtml = try $ do guardEnabled Ext_native_spans (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) @@ -1918,7 +1941,7 @@ spanHtml = try $ do -> return $ B.smallcaps <$> contents _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents -divHtml :: MarkdownParser (F Blocks) +divHtml :: PandocMonad m => MarkdownParser m (F Blocks) divHtml = try $ do guardEnabled Ext_native_divs (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) @@ -1940,7 +1963,7 @@ divHtml = try $ do else -- avoid backtracing return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents -rawHtmlInline :: MarkdownParser (F Inlines) +rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines) rawHtmlInline = do guardEnabled Ext_raw_html inHtmlBlock <- stateInHtmlBlock <$> getState @@ -1962,7 +1985,7 @@ rawHtmlInline = do emojiChars :: [Char] emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] -emoji :: MarkdownParser (F Inlines) +emoji :: PandocMonad m => MarkdownParser m (F Inlines) emoji = try $ do guardEnabled Ext_emoji char ':' @@ -1974,7 +1997,7 @@ emoji = try $ do -- Citations -cite :: MarkdownParser (F Inlines) +cite :: PandocMonad m => MarkdownParser m (F Inlines) cite = do guardEnabled Ext_citations citations <- textualCite @@ -1982,7 +2005,7 @@ cite = do return $ (flip B.cite (B.text raw)) <$> cs return citations -textualCite :: MarkdownParser (F Inlines) +textualCite :: PandocMonad m => MarkdownParser m (F Inlines) textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -2017,7 +2040,7 @@ textualCite = try $ do Just n -> B.str (show n) _ -> B.cite [first] $ B.str $ '@':key) -bareloc :: Citation -> MarkdownParser (F [Citation]) +bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation]) bareloc c = try $ do spnl char '[' @@ -2032,7 +2055,7 @@ bareloc c = try $ do rest' <- rest return $ c{ citationSuffix = B.toList suff' } : rest' -normalCite :: MarkdownParser (F [Citation]) +normalCite :: PandocMonad m => MarkdownParser m (F [Citation]) normalCite = try $ do char '[' spnl @@ -2041,7 +2064,7 @@ normalCite = try $ do char ']' return citations -suffix :: MarkdownParser (F Inlines) +suffix :: PandocMonad m => MarkdownParser m (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl @@ -2050,14 +2073,14 @@ suffix = try $ do then (B.space <>) <$> rest else rest -prefix :: MarkdownParser (F Inlines) +prefix :: PandocMonad m => MarkdownParser m (F Inlines) prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: MarkdownParser (F [Citation]) +citeList :: PandocMonad m => MarkdownParser m (F [Citation]) citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) -citation :: MarkdownParser (F Citation) +citation :: PandocMonad m => MarkdownParser m (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey @@ -2075,13 +2098,13 @@ citation = try $ do , citationHash = 0 } -smart :: MarkdownParser (F Inlines) +smart :: PandocMonad m => MarkdownParser m (F Inlines) smart = do - getOption readerSmart >>= guard + guardEnabled Ext_smart doubleQuoted <|> singleQuoted <|> choice (map (return <$>) [apostrophe, dash, ellipses]) -singleQuoted :: MarkdownParser (F Inlines) +singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ @@ -2091,7 +2114,7 @@ singleQuoted = try $ do -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: MarkdownParser (F Inlines) +doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 0dea22c53..b81d0f3e4 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -56,23 +56,26 @@ import qualified Data.Set as Set import Data.Char (isDigit, isSpace) import Data.Maybe (fromMaybe) import Text.Printf (printf) -import Debug.Trace (trace) - -import Text.Pandoc.Error +import Control.Monad.Except (throwError) +import Text.Pandoc.Class (PandocMonad, report) -- | Read mediawiki from an input string and return a Pandoc document. -readMediaWiki :: ReaderOptions -- ^ Reader options +readMediaWiki :: PandocMonad m + => ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readMediaWiki opts s = - readWith parseMediaWiki MWState{ mwOptions = opts - , mwMaxNestingLevel = 4 - , mwNextLinkNumber = 1 - , mwCategoryLinks = [] - , mwHeaderMap = M.empty - , mwIdentifierList = Set.empty - } - (s ++ "\n") + -> m Pandoc +readMediaWiki opts s = do + parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts + , mwMaxNestingLevel = 4 + , mwNextLinkNumber = 1 + , mwCategoryLinks = [] + , mwHeaderMap = M.empty + , mwIdentifierList = Set.empty + } + (s ++ "\n") + case parsed of + Right result -> return result + Left e -> throwError e data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int @@ -82,7 +85,7 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwIdentifierList :: Set.Set String } -type MWParser = Parser [Char] MWState +type MWParser m = ParserT [Char] MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions @@ -101,7 +104,7 @@ instance HasIdentifierList MWState where -- This is used to prevent exponential blowups for things like: -- ''a'''a''a'''a''a'''a''a'''a -nested :: MWParser a -> MWParser a +nested :: PandocMonad m => MWParser m a -> MWParser m a nested p = do nestlevel <- mwMaxNestingLevel `fmap` getState guard $ nestlevel > 0 @@ -116,7 +119,7 @@ specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" -sym :: String -> MWParser () +sym :: PandocMonad m => String -> MWParser m () sym s = () <$ try (string s) newBlockTags :: [String] @@ -137,10 +140,10 @@ eitherBlockOrInline :: [String] eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", "map", "area", "object"] -htmlComment :: MWParser () +htmlComment :: PandocMonad m => MWParser m () htmlComment = () <$ htmlTag isCommentTag -inlinesInTags :: String -> MWParser Inlines +inlinesInTags :: PandocMonad m => String -> MWParser m Inlines inlinesInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) if '/' `elem` raw -- self-closing tag @@ -148,7 +151,7 @@ inlinesInTags tag = try $ do else trimInlines . mconcat <$> manyTill inline (htmlTag (~== TagClose tag)) -blocksInTags :: String -> MWParser Blocks +blocksInTags :: PandocMonad m => String -> MWParser m Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) let closer = if tag == "li" @@ -162,7 +165,7 @@ blocksInTags tag = try $ do then return mempty else mconcat <$> manyTill block closer -charsInTags :: String -> MWParser [Char] +charsInTags :: PandocMonad m => String -> MWParser m [Char] charsInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) if '/' `elem` raw -- self-closing tag @@ -173,7 +176,7 @@ charsInTags tag = try $ do -- main parser -- -parseMediaWiki :: MWParser Pandoc +parseMediaWiki :: PandocMonad m => MWParser m Pandoc parseMediaWiki = do bs <- mconcat <$> many block spaces @@ -188,9 +191,8 @@ parseMediaWiki = do -- block parsers -- -block :: MWParser Blocks +block :: PandocMonad m => MWParser m Blocks block = do - tr <- getOption readerTrace pos <- getPosition res <- mempty <$ skipMany1 blankline <|> table @@ -204,19 +206,18 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - when tr $ - trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList res)) (return ()) + report DEBUG $ printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res) return res -para :: MWParser Blocks +para :: PandocMonad m => MWParser m Blocks para = do contents <- trimInlines . mconcat <$> many1 inline if F.all (==Space) contents then return mempty else return $ B.para contents -table :: MWParser Blocks +table :: PandocMonad m => MWParser m Blocks table = do tableStart styles <- option [] parseAttrs <* blankline @@ -244,10 +245,10 @@ table = do else (replicate cols mempty, hdr:rows') return $ B.table caption cellspecs headers rows -parseAttrs :: MWParser [(String,String)] +parseAttrs :: PandocMonad m => MWParser m [(String,String)] parseAttrs = many1 parseAttr -parseAttr :: MWParser (String, String) +parseAttr :: PandocMonad m => MWParser m (String, String) parseAttr = try $ do skipMany spaceChar k <- many1 letter @@ -256,17 +257,17 @@ parseAttr = try $ do <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) -tableStart :: MWParser () +tableStart :: PandocMonad m => MWParser m () tableStart = try $ guardColumnOne *> skipSpaces *> sym "{|" -tableEnd :: MWParser () +tableEnd :: PandocMonad m => MWParser m () tableEnd = try $ guardColumnOne *> skipSpaces *> sym "|}" -rowsep :: MWParser () +rowsep :: PandocMonad m => MWParser m () rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <* optional parseAttr <* blanklines -cellsep :: MWParser () +cellsep :: PandocMonad m => MWParser m () cellsep = try $ (guardColumnOne *> skipSpaces <* ( (char '|' <* notFollowedBy (oneOf "-}+")) @@ -276,7 +277,7 @@ cellsep = try $ <|> (() <$ try (string "||")) <|> (() <$ try (string "!!")) -tableCaption :: MWParser Inlines +tableCaption :: PandocMonad m => MWParser m Inlines tableCaption = try $ do guardColumnOne skipSpaces @@ -284,10 +285,10 @@ tableCaption = try $ do optional (try $ parseAttr *> skipSpaces *> char '|' *> skipSpaces) (trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline) -tableRow :: MWParser [((Alignment, Double), Blocks)] +tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)] tableRow = try $ skipMany htmlComment *> many tableCell -tableCell :: MWParser ((Alignment, Double), Blocks) +tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks) tableCell = try $ do cellsep skipMany spaceChar @@ -313,7 +314,7 @@ parseWidth s = ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds) _ -> Nothing -template :: MWParser String +template :: PandocMonad m => MWParser m String template = try $ do string "{{" notFollowedBy (char '{') @@ -322,7 +323,7 @@ template = try $ do contents <- manyTill chunk (try $ string "}}") return $ "{{" ++ concat contents ++ "}}" -blockTag :: MWParser Blocks +blockTag :: PandocMonad m => MWParser m Blocks blockTag = do (tag, _) <- lookAhead $ htmlTag isBlockTag' case tag of @@ -341,7 +342,7 @@ trimCode :: String -> String trimCode ('\n':xs) = stripTrailingNewlines xs trimCode xs = stripTrailingNewlines xs -syntaxhighlight :: String -> [Attribute String] -> MWParser Blocks +syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks syntaxhighlight tag attrs = try $ do let mblang = lookup "lang" attrs let mbstart = lookup "start" attrs @@ -351,13 +352,13 @@ syntaxhighlight tag attrs = try $ do contents <- charsInTags tag return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents -hrule :: MWParser Blocks +hrule :: PandocMonad m => MWParser m Blocks hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline) -guardColumnOne :: MWParser () +guardColumnOne :: PandocMonad m => MWParser m () guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) -preformatted :: MWParser Blocks +preformatted :: PandocMonad m => MWParser m Blocks preformatted = try $ do guardColumnOne char ' ' @@ -388,7 +389,7 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode normalizeCode $ (Code a1 (x ++ y)) : zs normalizeCode (x:xs) = x : normalizeCode xs -header :: MWParser Blocks +header :: PandocMonad m => MWParser m Blocks header = try $ do guardColumnOne eqs <- many1 (char '=') @@ -398,13 +399,13 @@ header = try $ do attr <- registerHeader nullAttr contents return $ B.headerWith attr lev contents -bulletList :: MWParser Blocks +bulletList :: PandocMonad m => MWParser m Blocks bulletList = B.bulletList <$> ( many1 (listItem '*') <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <* optional (htmlTag (~== TagClose "ul"))) ) -orderedList :: MWParser Blocks +orderedList :: PandocMonad m => MWParser m Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) <|> try @@ -415,10 +416,10 @@ orderedList = let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) -definitionList :: MWParser Blocks +definitionList :: PandocMonad m => MWParser m Blocks definitionList = B.definitionList <$> many1 defListItem -defListItem :: MWParser (Inlines, [Blocks]) +defListItem :: PandocMonad m => MWParser m (Inlines, [Blocks]) defListItem = try $ do terms <- mconcat . intersperse B.linebreak <$> many defListTerm -- we allow dd with no dt, or dt with no dd @@ -429,27 +430,27 @@ defListItem = try $ do else many (listItem ':') return (terms, defs) -defListTerm :: MWParser Inlines +defListTerm :: PandocMonad m => MWParser m Inlines defListTerm = char ';' >> skipMany spaceChar >> anyLine >>= parseFromString (trimInlines . mconcat <$> many inline) -listStart :: Char -> MWParser () +listStart :: PandocMonad m => Char -> MWParser m () listStart c = char c *> notFollowedBy listStartChar -listStartChar :: MWParser Char +listStartChar :: PandocMonad m => MWParser m Char listStartChar = oneOf "*#;:" -anyListStart :: MWParser Char +anyListStart :: PandocMonad m => MWParser m Char anyListStart = char '*' <|> char '#' <|> char ':' <|> char ';' -li :: MWParser Blocks +li :: PandocMonad m => MWParser m Blocks li = lookAhead (htmlTag (~== TagOpen "li" [])) *> (firstParaToPlain <$> blocksInTags "li") <* spaces -listItem :: Char -> MWParser Blocks +listItem :: PandocMonad m => Char -> MWParser m Blocks listItem c = try $ do extras <- many (try $ char c <* lookAhead listStartChar) if null extras @@ -475,10 +476,10 @@ listItem c = try $ do -- }} -- * next list item -- which seems to be valid mediawiki. -listChunk :: MWParser String +listChunk :: PandocMonad m => MWParser m String listChunk = template <|> count 1 anyChar -listItem' :: Char -> MWParser Blocks +listItem' :: PandocMonad m => Char -> MWParser m Blocks listItem' c = try $ do listStart c skipMany spaceChar @@ -498,7 +499,7 @@ firstParaToPlain contents = -- inline parsers -- -inline :: MWParser Inlines +inline :: PandocMonad m => MWParser m Inlines inline = whitespace <|> url <|> str @@ -516,10 +517,10 @@ inline = whitespace <|> (B.rawInline "mediawiki" <$> template) <|> special -str :: MWParser Inlines +str :: PandocMonad m => MWParser m Inlines str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) -math :: MWParser Inlines +math :: PandocMonad m => MWParser m Inlines math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) <|> (B.math . trim <$> charsInTags "math") <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) @@ -529,13 +530,13 @@ math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) mStart = string "\\(" mEnd = try (string "\\)") -variable :: MWParser String +variable :: PandocMonad m => MWParser m String variable = try $ do string "{{{" contents <- manyTill anyChar (try $ string "}}}") return $ "{{{" ++ contents ++ "}}}" -inlineTag :: MWParser Inlines +inlineTag :: PandocMonad m => MWParser m Inlines inlineTag = do (tag, _) <- lookAhead $ htmlTag isInlineTag' case tag of @@ -557,18 +558,18 @@ inlineTag = do TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) -special :: MWParser Inlines +special :: PandocMonad m => MWParser m Inlines special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *> oneOf specialChars) -inlineHtml :: MWParser Inlines +inlineHtml :: PandocMonad m => MWParser m Inlines inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' -whitespace :: MWParser Inlines +whitespace :: PandocMonad m => MWParser m Inlines whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment) <|> B.softbreak <$ endline -endline :: MWParser () +endline :: PandocMonad m => MWParser m () endline = () <$ try (newline <* notFollowedBy spaceChar <* notFollowedBy newline <* @@ -577,12 +578,12 @@ endline = () <$ try (newline <* notFollowedBy' header <* notFollowedBy anyListStart) -imageIdentifiers :: [MWParser ()] +imageIdentifiers :: PandocMonad m => [MWParser m ()] imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier", "Bild"] -image :: MWParser Inlines +image :: PandocMonad m => MWParser m Inlines image = try $ do sym "[[" choice imageIdentifiers @@ -600,7 +601,7 @@ image = try $ do <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption -imageOption :: MWParser String +imageOption :: PandocMonad m => MWParser m String imageOption = try $ char '|' *> opt where opt = try (oneOfStrings [ "border", "thumbnail", "frameless" @@ -619,7 +620,7 @@ collapseUnderscores (x:xs) = x : collapseUnderscores xs addUnderscores :: String -> String addUnderscores = collapseUnderscores . intercalate "_" . words -internalLink :: MWParser Inlines +internalLink :: PandocMonad m => MWParser m Inlines internalLink = try $ do sym "[[" pagename <- unwords . words <$> many (noneOf "|]") @@ -637,7 +638,7 @@ internalLink = try $ do return mempty else return link -externalLink :: MWParser Inlines +externalLink :: PandocMonad m => MWParser m Inlines externalLink = try $ do char '[' (_, src) <- uri @@ -649,29 +650,29 @@ externalLink = try $ do return $ B.str $ show num return $ B.link src "" lab -url :: MWParser Inlines +url :: PandocMonad m => MWParser m Inlines url = do (orig, src) <- uri return $ B.link src "" (B.str orig) -- | Parses a list of inlines between start and end delimiters. -inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines +inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines inlinesBetween start end = (trimInlines . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace <* notFollowedBy' end -emph :: MWParser Inlines +emph :: PandocMonad m => MWParser m Inlines emph = B.emph <$> nested (inlinesBetween start end) where start = sym "''" >> lookAhead nonspaceChar end = try $ notFollowedBy' (() <$ strong) >> sym "''" -strong :: MWParser Inlines +strong :: PandocMonad m => MWParser m Inlines strong = B.strong <$> nested (inlinesBetween start end) where start = sym "'''" >> lookAhead nonspaceChar end = try $ sym "'''" -doubleQuotes :: MWParser Inlines +doubleQuotes :: PandocMonad m => MWParser m Inlines doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar closeDoubleQuote = try $ sym "\"" diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 4ec164e19..1953c0c83 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -32,8 +32,11 @@ module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Options (ReaderOptions) +import Control.Monad.Except (throwError) import Text.Pandoc.Error +import Text.Pandoc.Class -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, @@ -45,9 +48,14 @@ import Text.Pandoc.Error -- -- > Pandoc nullMeta [Plain [Str "hi"]] -- -readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) +readNative :: PandocMonad m + => ReaderOptions + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> m Pandoc +readNative _ s = + case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of + Right doc -> return doc + Left _ -> throwError $ PandocParseError "couldn't read native" readBlocks :: String -> Either PandocError [Block] readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) @@ -59,5 +67,5 @@ readInlines :: String -> Either PandocError [Inline] readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) readInline :: String -> Either PandocError Inline -readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s) +readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 4dcf5e5a0..cec64895c 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -11,10 +11,9 @@ import Text.HTML.TagSoup.Entity (lookupEntity) import Data.Generics import Control.Monad.State import Data.Default -import Control.Monad.Except -import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad) -type OPML = ExceptT PandocError (State OPMLState) +type OPML m = StateT OPMLState m data OPMLState = OPMLState{ opmlSectionLevel :: Int @@ -30,12 +29,14 @@ instance Default OPMLState where , opmlDocDate = mempty } -readOPML :: ReaderOptions -> String -> Either PandocError Pandoc -readOPML _ inp = setTitle (opmlDocTitle st') - . setAuthors (opmlDocAuthors st') - . setDate (opmlDocDate st') - . doc . mconcat <$> bs - where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp) +readOPML :: PandocMonad m => ReaderOptions -> String -> m Pandoc +readOPML _ inp = do + (bs, st') <- flip runStateT def (mapM parseBlock $ normalizeTree $ parseXML inp) + return $ + setTitle (opmlDocTitle st') $ + setAuthors (opmlDocAuthors st') $ + setDate (opmlDocDate st') $ + doc $ mconcat bs -- normalize input, consolidating adjacent Text and CRef elements normalizeTree :: [Content] -> [Content] @@ -62,21 +63,22 @@ attrValue attr elt = Just z -> z Nothing -> "" -exceptT :: Either PandocError a -> OPML a -exceptT = either throwError return +-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a +-- exceptT = either throwError return -asHtml :: String -> OPML Inlines -asHtml s = (\(Pandoc _ bs) -> case bs of +asHtml :: PandocMonad m => String -> OPML m Inlines +asHtml s = + (\(Pandoc _ bs) -> case bs of [Plain ils] -> fromList ils - _ -> mempty) <$> exceptT (readHtml def s) + _ -> mempty) <$> (lift $ readHtml def s) -asMarkdown :: String -> OPML Blocks -asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s) +asMarkdown :: PandocMonad m => String -> OPML m Blocks +asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def s) -getBlocks :: Element -> OPML Blocks +getBlocks :: PandocMonad m => Element -> OPML m Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) -parseBlock :: Content -> OPML Blocks +parseBlock :: PandocMonad m => Content -> OPML m Blocks parseBlock (Elem e) = case qName (elName e) of "ownerName" -> mempty <$ modify (\st -> diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 046fb4d6d..ac22f2c09 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -39,6 +39,10 @@ import qualified Data.ByteString.Lazy as B import System.FilePath +import Control.Monad.Except (throwError) + +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options @@ -52,11 +56,21 @@ import Text.Pandoc.Readers.Odt.Generic.XMLConverter import Text.Pandoc.Readers.Odt.Generic.Fallible import Text.Pandoc.Shared (filteredFilesFromArchive) --- -readOdt :: ReaderOptions +readOdt :: PandocMonad m + => ReaderOptions -> B.ByteString - -> Either PandocError (Pandoc, MediaBag) -readOdt _ bytes = bytesToOdt bytes-- of + -> m Pandoc +readOdt opts bytes = case readOdt' opts bytes of + Right (doc, mb) -> do + P.setMediaBag mb + return doc + Left e -> throwError e + +-- +readOdt' :: ReaderOptions + -> B.ByteString + -> Either PandocError (Pandoc, MediaBag) +readOdt' _ bytes = bytesToOdt bytes-- of -- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag) -- Left err -> Left err @@ -64,7 +78,7 @@ readOdt _ bytes = bytesToOdt bytes-- of bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag) bytesToOdt bytes = case toArchiveOrFail bytes of Right archive -> archiveToOdt archive - Left _ -> Left $ ParseFailure "Couldn't parse odt file." + Left _ -> Left $ PandocParseError "Couldn't parse odt file." -- archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) @@ -85,7 +99,7 @@ archiveToOdt archive | otherwise -- Not very detailed, but I don't think more information would be helpful - = Left $ ParseFailure "Couldn't parse odt file." + = Left $ PandocParseError "Couldn't parse odt file." where filePathIsOdtMedia :: FilePath -> Bool filePathIsOdtMedia fp = diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 2672b01ef..a1bd8cb59 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -663,7 +663,7 @@ read_list = matchingElement NsText "list" -- read_list_item :: ElementMatcher [Blocks] read_list_item = matchingElement NsText "list-item" - $ liftA (compactify'.(:[])) + $ liftA (compactify.(:[])) ( matchChildContent' [ read_paragraph , read_header , read_list @@ -749,7 +749,7 @@ read_table_row = matchingElement NsTable "table-row" -- read_table_cell :: ElementMatcher [Blocks] read_table_cell = matchingElement NsTable "table-cell" - $ liftA (compactify'.(:[])) + $ liftA (compactify.(:[])) $ matchChildContent' [ read_paragraph ] diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 4e1c926da..c8dbbf45a 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -31,24 +31,31 @@ import Text.Pandoc.Readers.Org.Blocks ( blockList, meta ) import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM ) import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState ) +import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Control.Monad.Reader ( runReader ) +import Control.Monad.Except ( throwError ) +import Control.Monad.Reader ( runReaderT ) -- | Parse org-mode string and return a Pandoc document. -readOrg :: ReaderOptions -- ^ Reader options +readOrg :: PandocMonad m + => ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readOrg opts s = flip runReader def $ - readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n") + -> m Pandoc +readOrg opts s = do + parsed <- flip runReaderT def $ + readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n") + case parsed of + Right result -> return result + Left _ -> throwError $ PandocParseError "problem parsing org" -- -- Parser -- -parseOrg :: OrgParser Pandoc +parseOrg :: PandocMonad m => OrgParser m Pandoc parseOrg = do blocks' <- blockList meta' <- meta diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index b1004dda6..5588c4552 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -44,7 +44,7 @@ import Control.Monad ( void ) import Text.Pandoc.Readers.Org.Parsing -- | Horizontal Line (five -- dashes or more) -hline :: OrgParser () +hline :: Monad m => OrgParser m () hline = try $ do skipSpaces string "-----" @@ -54,58 +54,59 @@ hline = try $ do return () -- | Read the start of a header line, return the header level -headerStart :: OrgParser Int +headerStart :: Monad m => OrgParser m Int headerStart = try $ (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos -tableStart :: OrgParser Char +tableStart :: Monad m => OrgParser m Char tableStart = try $ skipSpaces *> char '|' -latexEnvStart :: OrgParser String +latexEnvStart :: Monad m => OrgParser m String latexEnvStart = try $ do skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" <* blankline where - latexEnvName :: OrgParser String + latexEnvName :: Monad m => OrgParser m String latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*") -- | Parses bullet list marker. -bulletListStart :: OrgParser () +bulletListStart :: Monad m => OrgParser m () bulletListStart = try $ choice [ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1 , () <$ skipSpaces1 <* char '*' <* skipSpaces1 ] -genericListStart :: OrgParser String - -> OrgParser Int +genericListStart :: Monad m + => OrgParser m String + -> OrgParser m Int genericListStart listMarker = try $ (+) <$> (length <$> many spaceChar) <*> (length <$> listMarker <* many1 spaceChar) -orderedListStart :: OrgParser Int +orderedListStart :: Monad m => OrgParser m Int orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") -drawerStart :: OrgParser String +drawerStart :: Monad m => OrgParser m String drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline where drawerName = char ':' *> manyTill nonspaceChar (char ':') -metaLineStart :: OrgParser () +metaLineStart :: Monad m => OrgParser m () metaLineStart = try $ skipSpaces <* string "#+" -commentLineStart :: OrgParser () +commentLineStart :: Monad m => OrgParser m () commentLineStart = try $ skipSpaces <* string "# " -exampleLineStart :: OrgParser () +exampleLineStart :: Monad m => OrgParser m () exampleLineStart = () <$ try (skipSpaces *> string ": ") -noteMarker :: OrgParser String +noteMarker :: Monad m => OrgParser m String noteMarker = try $ do char '[' choice [ many1Till digit (char ']') @@ -114,12 +115,12 @@ noteMarker = try $ do ] -- | Succeeds if the parser is at the end of a block. -endOfBlock :: OrgParser () +endOfBlock :: Monad m => OrgParser m () endOfBlock = lookAhead . try $ do void blankline <|> anyBlockStart where -- Succeeds if there is a new block starting at this position. - anyBlockStart :: OrgParser () + anyBlockStart :: Monad m => OrgParser m () anyBlockStart = try . choice $ [ exampleLineStart , hline diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 484d97482..78ac8d0d1 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -44,9 +44,10 @@ import Text.Pandoc.Readers.Org.Shared import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines, Blocks ) +import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Shared ( compactify', compactify'DL, safeRead ) +import Text.Pandoc.Shared ( compactify, compactifyDL, safeRead ) import Control.Monad ( foldM, guard, mzero, void ) import Data.Char ( isSpace, toLower, toUpper) @@ -105,7 +106,7 @@ data Headline = Headline -- | Read an Org mode headline and its contents (i.e. a document subtree). -- @lvl@ gives the minimum acceptable level of the tree. -headline :: Int -> OrgParser (F Headline) +headline :: PandocMonad m => Int -> OrgParser m (F Headline) headline lvl = try $ do level <- headerStart guard (lvl <= level) @@ -130,16 +131,16 @@ headline lvl = try $ do , headlineChildren = children' } where - endOfTitle :: OrgParser () + endOfTitle :: Monad m => OrgParser m () endOfTitle = void . lookAhead $ optional headerTags *> newline - headerTags :: OrgParser [Tag] + headerTags :: Monad m => OrgParser m [Tag] headerTags = try $ let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) -- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks -headlineToBlocks :: Headline -> OrgParser Blocks +headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks headlineToBlocks hdln@(Headline {..}) = do maxHeadlineLevels <- getExportSetting exportHeadlineLevels case () of @@ -162,7 +163,7 @@ isCommentTitle :: Inlines -> Bool isCommentTitle (B.toList -> (Str "COMMENT":_)) = True isCommentTitle _ = False -archivedHeadlineToBlocks :: Headline -> OrgParser Blocks +archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks archivedHeadlineToBlocks hdln = do archivedTreesOption <- getExportSetting exportArchivedTrees case archivedTreesOption of @@ -170,7 +171,7 @@ archivedHeadlineToBlocks hdln = do ArchivedTreesExport -> headlineToHeaderWithContents hdln ArchivedTreesHeadlineOnly -> headlineToHeader hdln -headlineToHeaderWithList :: Headline -> OrgParser Blocks +headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks headlineToHeaderWithList hdln@(Headline {..}) = do maxHeadlineLevels <- getExportSetting exportHeadlineLevels header <- headlineToHeader hdln @@ -189,13 +190,13 @@ headlineToHeaderWithList hdln@(Headline {..}) = do (Header _ _ inlns:_) -> B.para (B.fromList inlns) _ -> mempty -headlineToHeaderWithContents :: Headline -> OrgParser Blocks +headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks headlineToHeaderWithContents hdln@(Headline {..}) = do header <- headlineToHeader hdln childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren) return $ header <> headlineContents <> childrenBlocks -headlineToHeader :: Headline -> OrgParser Blocks +headlineToHeader :: Monad m => Headline -> OrgParser m Blocks headlineToHeader (Headline {..}) = do exportTodoKeyword <- getExportSetting exportWithTodoKeywords let todoText = if exportTodoKeyword @@ -208,7 +209,7 @@ headlineToHeader (Headline {..}) = do attr <- registerHeader propAttr headlineText return $ B.headerWith attr headlineLevel text -todoKeyword :: OrgParser TodoMarker +todoKeyword :: Monad m => OrgParser m TodoMarker todoKeyword = try $ do taskStates <- activeTodoMarkers <$> getState let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar) @@ -250,7 +251,7 @@ tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty -- -- | Get a list of blocks. -blockList :: OrgParser [Block] +blockList :: PandocMonad m => OrgParser m [Block] blockList = do initialBlocks <- blocks headlines <- sequence <$> manyTill (headline 1) eof @@ -259,15 +260,15 @@ blockList = do return . B.toList $ (runF initialBlocks st) <> headlineBlocks -- | Get the meta information safed in the state. -meta :: OrgParser Meta +meta :: Monad m => OrgParser m Meta meta = do meta' <- metaExport runF meta' <$> getState -blocks :: OrgParser (F Blocks) +blocks :: PandocMonad m => OrgParser m (F Blocks) blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof) -block :: OrgParser (F Blocks) +block :: PandocMonad m => OrgParser m (F Blocks) block = choice [ mempty <$ blanklines , table , orgBlock @@ -306,7 +307,7 @@ attrFromBlockAttributes (BlockAttributes{..}) = kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String) +stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String) stringyMetaAttribute attrCheck = try $ do metaLineStart attrName <- map toUpper <$> many1Till nonspaceChar (char ':') @@ -315,7 +316,7 @@ stringyMetaAttribute attrCheck = try $ do attrValue <- anyLine return (attrName, attrValue) -blockAttributes :: OrgParser BlockAttributes +blockAttributes :: PandocMonad m => OrgParser m BlockAttributes blockAttributes = try $ do kv <- many (stringyMetaAttribute attrCheck) let caption = foldl' (appendValues "CAPTION") Nothing kv @@ -350,17 +351,17 @@ blockAttributes = try $ do Just acc -> Just $ acc ++ ' ':value Nothing -> Just value -keyValues :: OrgParser [(String, String)] +keyValues :: Monad m => OrgParser m [(String, String)] keyValues = try $ manyTill ((,) <$> key <*> value) newline where - key :: OrgParser String + key :: Monad m => OrgParser m String key = try $ skipSpaces *> char ':' *> many1 nonspaceChar - value :: OrgParser String + value :: Monad m => OrgParser m String value = skipSpaces *> manyTill anyChar endOfValue - endOfValue :: OrgParser () + endOfValue :: Monad m => OrgParser m () endOfValue = lookAhead $ (() <$ try (many1 spaceChar <* key)) <|> () <$ newline @@ -371,7 +372,7 @@ keyValues = try $ -- -- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE. -orgBlock :: OrgParser (F Blocks) +orgBlock :: PandocMonad m => OrgParser m (F Blocks) orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart @@ -390,25 +391,25 @@ orgBlock = try $ do let (ident, classes, kv) = attrFromBlockAttributes blockAttrs in fmap $ B.divWith (ident, classes ++ [blkType], kv) where - blockHeaderStart :: OrgParser String + blockHeaderStart :: Monad m => OrgParser m String blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord lowercase :: String -> String lowercase = map toLower -rawBlockLines :: (String -> F Blocks) -> String -> OrgParser (F Blocks) +rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) -parseBlockLines :: (F Blocks -> F Blocks) -> String -> OrgParser (F Blocks) +parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent)) where - parsedBlockContent :: OrgParser (F Blocks) + parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do raw <- rawBlockContent blockType parseFromString blocks (raw ++ "\n") -- | Read the raw string content of a block -rawBlockContent :: String -> OrgParser String +rawBlockContent :: Monad m => String -> OrgParser m String rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop @@ -418,10 +419,10 @@ rawBlockContent blockType = try $ do . map (tabsToSpaces tabLen . commaEscaped) $ blkLines where - rawLine :: OrgParser String + rawLine :: Monad m => OrgParser m String rawLine = try $ ("" <$ blankline) <|> anyLine - blockEnder :: OrgParser () + blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) stripIndent :: [String] -> [String] @@ -448,18 +449,18 @@ rawBlockContent blockType = try $ do commaEscaped cs = cs -- | Read but ignore all remaining block headers. -ignHeaders :: OrgParser () +ignHeaders :: Monad m => OrgParser m () ignHeaders = (() <$ newline) <|> (() <$ anyLine) -- | Read a block containing code intended for export in specific backends -- only. -exportBlock :: String -> OrgParser (F Blocks) +exportBlock :: Monad m => String -> OrgParser m (F Blocks) exportBlock blockType = try $ do exportType <- skipSpaces *> orgArgWord <* ignHeaders contents <- rawBlockContent blockType returnF (B.rawBlock (map toLower exportType) contents) -verseBlock :: String -> OrgParser (F Blocks) +verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType @@ -468,7 +469,7 @@ verseBlock blockType = try $ do where -- replace initial spaces with nonbreaking spaces to preserve -- indentation, parse the rest as normal inline - parseVerseLine :: String -> OrgParser (F Inlines) + parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) parseVerseLine cs = do let (initialSpaces, indentedLine) = span isSpace cs let nbspIndent = if null initialSpaces @@ -480,7 +481,7 @@ verseBlock blockType = try $ do -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" -- argument in the block header. -codeBlock :: BlockAttributes -> String -> OrgParser (F Blocks) +codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -513,7 +514,7 @@ exportsResults :: [(String, String)] -> Bool exportsResults attrs = ("rundoc-exports", "results") `elem` attrs || ("rundoc-exports", "both") `elem` attrs -trailingResultsBlock :: OrgParser (Maybe (F Blocks)) +trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks)) trailingResultsBlock = optionMaybe . try $ do blanklines stringAnyCase "#+RESULTS:" @@ -522,7 +523,7 @@ trailingResultsBlock = optionMaybe . try $ do -- | Parse code block arguments -- TODO: We currently don't handle switches. -codeHeaderArgs :: OrgParser ([String], [(String, String)]) +codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) @@ -537,27 +538,27 @@ codeHeaderArgs = try $ do where hasRundocParameters = not . null -switch :: OrgParser (Char, Maybe String) +switch :: Monad m => OrgParser m (Char, Maybe String) switch = try $ simpleSwitch <|> lineNumbersSwitch where simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> (string "-l \"" *> many1Till nonspaceChar (char '"')) -blockOption :: OrgParser (String, String) +blockOption :: Monad m => OrgParser m (String, String) blockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgParamValue return (argKey, paramValue) -orgParamValue :: OrgParser String +orgParamValue :: Monad m => OrgParser m String orgParamValue = try $ skipSpaces *> notFollowedBy (char ':' ) *> many1 nonspaceChar <* skipSpaces -horizontalRule :: OrgParser (F Blocks) +horizontalRule :: Monad m => OrgParser m (F Blocks) horizontalRule = return B.horizontalRule <$ try hline @@ -568,7 +569,7 @@ horizontalRule = return B.horizontalRule <$ try hline -- | A generic drawer which has no special meaning for org-mode. -- Whether or not this drawer is included in the output depends on the drawers -- export setting. -genericDrawer :: OrgParser (F Blocks) +genericDrawer :: PandocMonad m => OrgParser m (F Blocks) genericDrawer = try $ do name <- map toUpper <$> drawerStart content <- manyTill drawerLine (try drawerEnd) @@ -582,35 +583,35 @@ genericDrawer = try $ do Right names | name `notElem` names -> return mempty _ -> drawerDiv name <$> parseLines content where - parseLines :: [String] -> OrgParser (F Blocks) + parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) parseLines = parseFromString blocks . (++ "\n") . unlines drawerDiv :: String -> F Blocks -> F Blocks drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) -drawerLine :: OrgParser String +drawerLine :: Monad m => OrgParser m String drawerLine = anyLine -drawerEnd :: OrgParser String +drawerEnd :: Monad m => OrgParser m String drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline -- | Read a :PROPERTIES: drawer and return the key/value pairs contained -- within. -propertiesDrawer :: OrgParser Properties +propertiesDrawer :: Monad m => OrgParser m Properties propertiesDrawer = try $ do drawerType <- drawerStart guard $ map toUpper drawerType == "PROPERTIES" manyTill property (try drawerEnd) where - property :: OrgParser (PropertyKey, PropertyValue) + property :: Monad m => OrgParser m (PropertyKey, PropertyValue) property = try $ (,) <$> key <*> value - key :: OrgParser PropertyKey + key :: Monad m => OrgParser m PropertyKey key = fmap toPropertyKey . try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') - value :: OrgParser PropertyValue + value :: Monad m => OrgParser m PropertyValue value = fmap toPropertyValue . try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) @@ -621,7 +622,7 @@ propertiesDrawer = try $ do -- | Figures or an image paragraph (i.e. an image on a line by itself). Only -- images with a caption attribute are interpreted as figures. -figure :: OrgParser (F Blocks) +figure :: PandocMonad m => OrgParser m (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph @@ -632,7 +633,7 @@ figure = try $ do let isFigure = not . isNothing $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where - selfTarget :: OrgParser String + selfTarget :: PandocMonad m => OrgParser m String selfTarget = try $ char '[' *> linkTarget <* char ']' imageBlock :: Bool -> BlockAttributes -> String -> F Blocks @@ -654,7 +655,7 @@ figure = try $ do else "fig:" ++ cs -- | Succeeds if looking at the end of the current paragraph -endOfParagraph :: OrgParser () +endOfParagraph :: Monad m => OrgParser m () endOfParagraph = try $ skipSpaces *> newline *> endOfBlock @@ -663,11 +664,11 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- -- | Example code marked up by a leading colon. -example :: OrgParser (F Blocks) +example :: Monad m => OrgParser m (F Blocks) example = try $ do return . return . exampleCode =<< unlines <$> many1 exampleLine where - exampleLine :: OrgParser String + exampleLine :: Monad m => OrgParser m String exampleLine = try $ exampleLineStart *> anyLine exampleCode :: String -> Blocks @@ -678,10 +679,10 @@ exampleCode = B.codeBlockWith ("", ["example"], []) -- Comments, Options and Metadata -- -specialLine :: OrgParser (F Blocks) +specialLine :: PandocMonad m => OrgParser m (F Blocks) specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine -rawExportLine :: OrgParser Blocks +rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart key <- metaKey @@ -689,7 +690,7 @@ rawExportLine = try $ do then B.rawBlock key <$> anyLine else mzero -commentLine :: OrgParser Blocks +commentLine :: Monad m => OrgParser m Blocks commentLine = commentLineStart *> anyLine *> pure mempty @@ -718,7 +719,7 @@ data OrgTable = OrgTable , orgTableRows :: [[Blocks]] } -table :: OrgParser (F Blocks) +table :: PandocMonad m => OrgParser m (F Blocks) table = try $ do blockAttrs <- blockAttributes lookAhead tableStart @@ -745,18 +746,18 @@ orgToPandocTable (OrgTable colProps heads lns) caption = <*> totalWidth in (align', width') -tableRows :: OrgParser [OrgTableRow] +tableRows :: PandocMonad m => OrgParser m [OrgTableRow] tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) -tableContentRow :: OrgParser OrgTableRow +tableContentRow :: PandocMonad m => OrgParser m OrgTableRow tableContentRow = try $ OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) -tableContentCell :: OrgParser (F Blocks) +tableContentCell :: PandocMonad m => OrgParser m (F Blocks) tableContentCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell -tableAlignRow :: OrgParser OrgTableRow +tableAlignRow :: Monad m => OrgParser m OrgTableRow tableAlignRow = try $ do tableStart colProps <- many1Till columnPropertyCell newline @@ -764,7 +765,7 @@ tableAlignRow = try $ do guard $ any (/= def) colProps return $ OrgAlignRow colProps -columnPropertyCell :: OrgParser ColumnProperty +columnPropertyCell :: Monad m => OrgParser m ColumnProperty columnPropertyCell = emptyCell <|> propCell <?> "alignment info" where emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell) @@ -776,18 +777,18 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info" <* char '>' <* emptyCell) -tableAlignFromChar :: OrgParser Alignment +tableAlignFromChar :: Monad m => OrgParser m Alignment tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft , char 'c' *> return AlignCenter , char 'r' *> return AlignRight ] -tableHline :: OrgParser OrgTableRow +tableHline :: Monad m => OrgParser m OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) -endOfCell :: OrgParser Char +endOfCell :: Monad m => OrgParser m Char endOfCell = try $ char '|' <|> lookAhead newline rowsToTable :: [OrgTableRow] @@ -840,7 +841,7 @@ rowToContent orgTable row = -- -- LaTeX fragments -- -latexFragment :: OrgParser (F Blocks) +latexFragment :: Monad m => OrgParser m (F Blocks) latexFragment = try $ do envName <- latexEnvStart content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) @@ -851,7 +852,7 @@ latexFragment = try $ do , "\\end{", e, "}\n" ] -latexEnd :: String -> OrgParser () +latexEnd :: Monad m => String -> OrgParser m () latexEnd envName = try $ () <$ skipSpaces <* string ("\\end{" ++ envName ++ "}") @@ -861,7 +862,7 @@ latexEnd envName = try $ -- -- Footnote defintions -- -noteBlock :: OrgParser (F Blocks) +noteBlock :: PandocMonad m => OrgParser m (F Blocks) noteBlock = try $ do ref <- noteMarker <* skipSpaces content <- mconcat <$> blocksTillHeaderOrNote @@ -873,7 +874,7 @@ noteBlock = try $ do <|> () <$ lookAhead headerStart) -- Paragraphs or Plain text -paraOrPlain :: OrgParser (F Blocks) +paraOrPlain :: PandocMonad m => OrgParser m (F Blocks) paraOrPlain = try $ do -- Make sure we are not looking at a headline notFollowedBy' (char '*' *> (oneOf " *")) @@ -892,24 +893,24 @@ paraOrPlain = try $ do -- list blocks -- -list :: OrgParser (F Blocks) +list :: PandocMonad m => OrgParser m (F Blocks) list = choice [ definitionList, bulletList, orderedList ] <?> "list" -definitionList :: OrgParser (F Blocks) +definitionList :: PandocMonad m => OrgParser m (F Blocks) definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactify'DL . sequence + fmap B.definitionList . fmap compactifyDL . sequence <$> many1 (definitionListItem $ bulletListStart' (Just n)) -bulletList :: OrgParser (F Blocks) +bulletList :: PandocMonad m => OrgParser m (F Blocks) bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify' . sequence + fmap B.bulletList . fmap compactify . sequence <$> many1 (listItem (bulletListStart' $ Just n)) -orderedList :: OrgParser (F Blocks) -orderedList = fmap B.orderedList . fmap compactify' . sequence +orderedList :: PandocMonad m => OrgParser m (F Blocks) +orderedList = fmap B.orderedList . fmap compactify . sequence <$> many1 (listItem orderedListStart) -bulletListStart' :: Maybe Int -> OrgParser Int +bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int -- returns length of bulletList prefix, inclusive of marker bulletListStart' Nothing = do ind <- length <$> many spaceChar oneOf (bullets $ ind == 0) @@ -925,8 +926,9 @@ bulletListStart' (Just n) = do count (n-1) spaceChar bullets :: Bool -> String bullets unindented = if unindented then "+-" else "*+-" -definitionListItem :: OrgParser Int - -> OrgParser (F (Inlines, [Blocks])) +definitionListItem :: PandocMonad m + => OrgParser m Int + -> OrgParser m (F (Inlines, [Blocks])) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try definitionMarker) @@ -942,8 +944,9 @@ definitionListItem parseMarkerGetLength = try $ do -- parse raw text for one list item, excluding start marker and continuations -listItem :: OrgParser Int - -> OrgParser (F Blocks) +listItem :: PandocMonad m + => OrgParser m Int + -> OrgParser m (F Blocks) listItem start = try . withContext ListItemState $ do markerLength <- try start firstLine <- anyLineNewline @@ -953,8 +956,8 @@ listItem start = try . withContext ListItemState $ do -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int - -> OrgParser String +listContinuation :: Monad m => Int + -> OrgParser m String listContinuation markerLength = try $ notFollowedBy' blankline *> (mappend <$> (concat <$> many1 listLine) @@ -963,7 +966,7 @@ listContinuation markerLength = try $ listLine = try $ indentWith markerLength *> anyLineNewline -- indent by specified number of spaces (or equiv. tabs) - indentWith :: Int -> OrgParser String + indentWith :: Monad m => Int -> OrgParser m String indentWith num = do tabStop <- getOption readerTabStop if num < tabStop @@ -972,5 +975,5 @@ listContinuation markerLength = try $ , try (char '\t' >> count (num - tabStop) (char ' ')) ] -- | Parse any line, include the final newline in the output. -anyLineNewline :: OrgParser String +anyLineNewline :: Monad m => OrgParser m String anyLineNewline = (++ "\n") <$> anyLine diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index 764e5b0d5..391877c03 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -37,14 +37,14 @@ import Data.Char ( toLower ) import Data.Maybe ( listToMaybe ) -- | Read and handle space separated org-mode export settings. -exportSettings :: OrgParser () +exportSettings :: Monad m => OrgParser m () exportSettings = void $ sepBy spaces exportSetting -- | Setter function for export settings. type ExportSettingSetter a = a -> ExportSettings -> ExportSettings -- | Read and process a single org-mode export option. -exportSetting :: OrgParser () +exportSetting :: Monad m => OrgParser m () exportSetting = choice [ booleanSetting "^" (\val es -> es { exportSubSuperscripts = val }) , booleanSetting "'" (\val es -> es { exportSmartQuotes = val }) @@ -81,10 +81,11 @@ exportSetting = choice , ignoredSetting "|" ] <?> "export setting" -genericExportSetting :: OrgParser a +genericExportSetting :: Monad m + => OrgParser m a -> String -> ExportSettingSetter a - -> OrgParser () + -> OrgParser m () genericExportSetting optionParser settingIdentifier setter = try $ do _ <- string settingIdentifier *> char ':' value <- optionParser @@ -94,11 +95,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do st { orgStateExportSettings = setter val . orgStateExportSettings $ st } -- | A boolean option, either nil (False) or non-nil (True). -booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser () +booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m () booleanSetting = genericExportSetting elispBoolean -- | An integer-valued option. -integerSetting :: String -> ExportSettingSetter Int -> OrgParser () +integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m () integerSetting = genericExportSetting parseInt where parseInt = try $ @@ -106,9 +107,10 @@ integerSetting = genericExportSetting parseInt -- | Either the string "headline" or an elisp boolean and treated as an -- @ArchivedTreesOption@. -archivedTreeSetting :: String +archivedTreeSetting :: Monad m + => String -> ExportSettingSetter ArchivedTreesOption - -> OrgParser () + -> OrgParser m () archivedTreeSetting = genericExportSetting $ archivedTreesHeadlineSetting <|> archivedTreesBoolean where @@ -125,9 +127,10 @@ archivedTreeSetting = else ArchivedTreesNoExport -- | A list or a complement list (i.e. a list starting with `not`). -complementableListSetting :: String +complementableListSetting :: Monad m + => String -> ExportSettingSetter (Either [String] [String]) - -> OrgParser () + -> OrgParser m () complementableListSetting = genericExportSetting $ choice [ Left <$> complementStringList , Right <$> stringList @@ -135,31 +138,31 @@ complementableListSetting = genericExportSetting $ choice ] where -- Read a plain list of strings. - stringList :: OrgParser [String] + stringList :: Monad m => OrgParser m [String] stringList = try $ char '(' *> sepBy elispString spaces <* char ')' -- Read an emacs lisp list specifying a complement set. - complementStringList :: OrgParser [String] + complementStringList :: Monad m => OrgParser m [String] complementStringList = try $ string "(not " *> sepBy elispString spaces <* char ')' - elispString :: OrgParser String + elispString :: Monad m => OrgParser m String elispString = try $ char '"' *> manyTill alphaNum (char '"') -- | Read but ignore the export setting. -ignoredSetting :: String -> OrgParser () +ignoredSetting :: Monad m => String -> OrgParser m () ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) -- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are -- interpreted as true. -elispBoolean :: OrgParser Bool +elispBoolean :: Monad m => OrgParser m Bool elispBoolean = try $ do value <- many1 nonspaceChar return $ case map toLower value of diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 7e1bb61c2..bcf8f6df9 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -47,9 +47,11 @@ import Text.Pandoc.Options import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap +import Text.Pandoc.Class (PandocMonad) import Prelude hiding (sequence) import Control.Monad ( guard, mplus, mzero, when, void ) +import Control.Monad.Trans ( lift ) import Data.Char ( isAlphaNum, isSpace ) import Data.List ( intersperse ) import Data.Maybe ( fromMaybe ) @@ -60,46 +62,46 @@ import Data.Traversable (sequence) -- -- Functions acting on the parser state -- -recordAnchorId :: String -> OrgParser () +recordAnchorId :: PandocMonad m => String -> OrgParser m () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } -pushToInlineCharStack :: Char -> OrgParser () +pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m () pushToInlineCharStack c = updateState $ \s -> s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } -popInlineCharStack :: OrgParser () +popInlineCharStack :: PandocMonad m => OrgParser m () popInlineCharStack = updateState $ \s -> s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } -surroundingEmphasisChar :: OrgParser [Char] +surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char] surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState -startEmphasisNewlinesCounting :: Int -> OrgParser () +startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Just maxNewlines } -decEmphasisNewlinesCount :: OrgParser () +decEmphasisNewlinesCount :: PandocMonad m => OrgParser m () decEmphasisNewlinesCount = updateState $ \s -> s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } -newlinesCountWithinLimits :: OrgParser Bool +newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool newlinesCountWithinLimits = do st <- getState return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True -resetEmphasisNewlines :: OrgParser () +resetEmphasisNewlines :: PandocMonad m => OrgParser m () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } -addToNotesTable :: OrgNoteRecord -> OrgParser () +addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m () addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState updateState $ \s -> s{ orgStateNotes' = note:oldnotes } -- | Parse a single Org-mode inline element -inline :: OrgParser (F Inlines) +inline :: PandocMonad m => OrgParser m (F Inlines) inline = choice [ whitespace , linebreak @@ -125,7 +127,7 @@ inline = <?> "inline" -- | Read the rest of the input as inlines. -inlines :: OrgParser (F Inlines) +inlines :: PandocMonad m => OrgParser m (F Inlines) inlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: @@ -133,23 +135,23 @@ specialChars :: [Char] specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~" -whitespace :: OrgParser (F Inlines) +whitespace :: PandocMonad m => OrgParser m (F Inlines) whitespace = pure B.space <$ skipMany1 spaceChar <* updateLastPreCharPos <* updateLastForbiddenCharPos <?> "whitespace" -linebreak :: OrgParser (F Inlines) +linebreak :: PandocMonad m => OrgParser m (F Inlines) linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline -str :: OrgParser (F Inlines) +str :: PandocMonad m => OrgParser m (F Inlines) str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural -- break. This should reflect the values of the Emacs variable -- @org-element-pagaraph-separate@. -endline :: OrgParser (F Inlines) +endline :: PandocMonad m => OrgParser m (F Inlines) endline = try $ do newline notFollowedBy' endOfBlock @@ -174,7 +176,7 @@ endline = try $ do -- contributors. All this should be consolidated once an official Org-mode -- citation syntax has emerged. -cite :: OrgParser (F Inlines) +cite :: PandocMonad m => OrgParser m (F Inlines) cite = try $ berkeleyCite <|> do guardEnabled Ext_citations (cs, raw) <- withRaw $ choice @@ -185,40 +187,41 @@ cite = try $ berkeleyCite <|> do return $ (flip B.cite (B.text raw)) <$> cs -- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). -pandocOrgCite :: OrgParser (F [Citation]) +pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) pandocOrgCite = try $ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' -orgRefCite :: OrgParser (F [Citation]) +orgRefCite :: PandocMonad m => OrgParser m (F [Citation]) orgRefCite = try $ choice [ normalOrgRefCite , fmap (:[]) <$> linkLikeOrgRefCite ] -normalOrgRefCite :: OrgParser (F [Citation]) +normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation]) normalOrgRefCite = try $ do mode <- orgRefCiteMode - -- org-ref style citation key, parsed into a citation of the given mode - let orgRefCiteItem :: OrgParser (F Citation) - orgRefCiteItem = try $ do - key <- orgRefCiteKey - returnF $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = mode - , citationNoteNum = 0 - , citationHash = 0 - } - firstCitation <- orgRefCiteItem - moreCitations <- many (try $ char ',' *> orgRefCiteItem) + firstCitation <- orgRefCiteList mode + moreCitations <- many (try $ char ',' *> orgRefCiteList mode) return . sequence $ firstCitation : moreCitations - where + where + -- | A list of org-ref style citation keys, parsed as citation of the given + -- citation mode. + orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation) + orgRefCiteList citeMode = try $ do + key <- orgRefCiteKey + returnF $ Citation + { citationId = key + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = citeMode + , citationNoteNum = 0 + , citationHash = 0 + } -- | Read an Berkeley-style Org-mode citation. Berkeley citation style was -- develop and adjusted to Org-mode style by John MacFarlane and Richard -- Lawrence, respectively, both philosophers at UC Berkeley. -berkeleyCite :: OrgParser (F Inlines) +berkeleyCite :: PandocMonad m => OrgParser m (F Inlines) berkeleyCite = try $ do bcl <- berkeleyCitationList return $ do @@ -260,7 +263,7 @@ data BerkeleyCitationList = BerkeleyCitationList , berkeleyCiteCommonSuffix :: Maybe Inlines , berkeleyCiteCitations :: [Citation] } -berkeleyCitationList :: OrgParser (F BerkeleyCitationList) +berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList) berkeleyCitationList = try $ do char '[' parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] @@ -275,22 +278,22 @@ berkeleyCitationList = try $ do <*> sequence commonSuffix <*> citations) where - citationListPart :: OrgParser (F Inlines) + citationListPart :: PandocMonad m => OrgParser m (F Inlines) citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do notFollowedBy' citeKey notFollowedBy (oneOf ";]") inline -berkeleyBareTag :: OrgParser () +berkeleyBareTag :: PandocMonad m => OrgParser m () berkeleyBareTag = try $ void berkeleyBareTag' -berkeleyParensTag :: OrgParser () +berkeleyParensTag :: PandocMonad m => OrgParser m () berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag' -berkeleyBareTag' :: OrgParser () +berkeleyBareTag' :: PandocMonad m => OrgParser m () berkeleyBareTag' = try $ void (string "cite") -berkeleyTextualCite :: OrgParser (F [Citation]) +berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) berkeleyTextualCite = try $ do (suppressAuthor, key) <- citeKey returnF . return $ Citation @@ -305,14 +308,14 @@ berkeleyTextualCite = try $ do -- The following is what a Berkeley-style bracketed textual citation parser -- would look like. However, as these citations are a subset of Pandoc's Org -- citation style, this isn't used. --- berkeleyBracketedTextualCite :: OrgParser (F [Citation]) +-- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation]) -- berkeleyBracketedTextualCite = try . (fmap head) $ -- enclosedByPair '[' ']' berkeleyTextualCite -- | Read a link-like org-ref style citation. The citation includes pre and -- post text. However, multiple citations are not possible due to limitations -- in the syntax. -linkLikeOrgRefCite :: OrgParser (F Citation) +linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation) linkLikeOrgRefCite = try $ do _ <- string "[[" mode <- orgRefCiteMode @@ -335,13 +338,13 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: OrgParser String +orgRefCiteKey :: PandocMonad m => OrgParser m String orgRefCiteKey = try . many1 . satisfy $ \c -> isAlphaNum c || c `elem` ("-_:\\./"::String) -- | Supported citation types. Only a small subset of org-ref types is -- supported for now. TODO: rewrite this, use LaTeX reader as template. -orgRefCiteMode :: OrgParser CitationMode +orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode orgRefCiteMode = choice $ map (\(s, mode) -> mode <$ try (string s <* char ':')) [ ("cite", AuthorInText) @@ -352,10 +355,10 @@ orgRefCiteMode = , ("citeyear", SuppressAuthor) ] -citeList :: OrgParser (F [Citation]) +citeList :: PandocMonad m => OrgParser m (F [Citation]) citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) -citation :: OrgParser (F Citation) +citation :: PandocMonad m => OrgParser m (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey @@ -384,10 +387,10 @@ citation = try $ do then (B.space <>) <$> rest else rest -footnote :: OrgParser (F Inlines) +footnote :: PandocMonad m => OrgParser m (F Inlines) footnote = try $ inlineNote <|> referencedNote -inlineNote :: OrgParser (F Inlines) +inlineNote :: PandocMonad m => OrgParser m (F Inlines) inlineNote = try $ do string "[fn:" ref <- many alphaNum @@ -397,7 +400,7 @@ inlineNote = try $ do addToNotesTable ("fn:" ++ ref, note) return $ B.note <$> note -referencedNote :: OrgParser (F Inlines) +referencedNote :: PandocMonad m => OrgParser m (F Inlines) referencedNote = try $ do ref <- noteMarker return $ do @@ -409,14 +412,14 @@ referencedNote = try $ do let contents' = runF contents st{ orgStateNotes' = [] } return $ B.note contents' -linkOrImage :: OrgParser (F Inlines) +linkOrImage :: PandocMonad m => OrgParser m (F Inlines) linkOrImage = explicitOrImageLink <|> selflinkOrImage <|> angleLink <|> plainLink <?> "link or image" -explicitOrImageLink :: OrgParser (F Inlines) +explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines) explicitOrImageLink = try $ do char '[' srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget @@ -431,30 +434,30 @@ explicitOrImageLink = try $ do _ -> linkToInlinesF src =<< title' -selflinkOrImage :: OrgParser (F Inlines) +selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' return $ linkToInlinesF src (B.str src) -plainLink :: OrgParser (F Inlines) +plainLink :: PandocMonad m => OrgParser m (F Inlines) plainLink = try $ do (orig, src) <- uri returnF $ B.link src "" (B.str orig) -angleLink :: OrgParser (F Inlines) +angleLink :: PandocMonad m => OrgParser m (F Inlines) angleLink = try $ do char '<' link <- plainLink char '>' return link -linkTarget :: OrgParser String +linkTarget :: PandocMonad m => OrgParser m String linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") -possiblyEmptyLinkTarget :: OrgParser String +possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser (F String) +applyCustomLinkFormat :: String -> OrgParser m (F String) applyCustomLinkFormat link = do let (linkType, rest) = break (== ':') link return $ do @@ -487,7 +490,7 @@ internalLink link title = do -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as -- an anchor. -anchor :: OrgParser (F Inlines) +anchor :: PandocMonad m => OrgParser m (F Inlines) anchor = try $ do anchorId <- parseAnchor recordAnchorId anchorId @@ -509,7 +512,7 @@ solidify = map replaceSpecialChar | otherwise = '-' -- | Parses an inline code block and marks it as an babel block. -inlineCodeBlock :: OrgParser (F Inlines) +inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar @@ -519,13 +522,13 @@ inlineCodeBlock = try $ do let attrKeyVal = map toRundocAttrib (("language", lang) : opts) returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where - inlineBlockOption :: OrgParser (String, String) + inlineBlockOption :: PandocMonad m => OrgParser m (String, String) inlineBlockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgInlineParamValue return (argKey, paramValue) - orgInlineParamValue :: OrgParser String + orgInlineParamValue :: PandocMonad m => OrgParser m String orgInlineParamValue = try $ skipSpaces *> notFollowedBy (char ':') @@ -533,7 +536,7 @@ inlineCodeBlock = try $ do <* skipSpaces -emphasizedText :: OrgParser (F Inlines) +emphasizedText :: PandocMonad m => OrgParser m (F Inlines) emphasizedText = do state <- getState guard . exportEmphasizedText . orgStateExportSettings $ state @@ -544,60 +547,63 @@ emphasizedText = do , underline ] -enclosedByPair :: Char -- ^ opening char +enclosedByPair :: PandocMonad m + => Char -- ^ opening char -> Char -- ^ closing char - -> OrgParser a -- ^ parser - -> OrgParser [a] + -> OrgParser m a -- ^ parser + -> OrgParser m [a] enclosedByPair s e p = char s *> many1Till p (char e) -emph :: OrgParser (F Inlines) +emph :: PandocMonad m => OrgParser m (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' -strong :: OrgParser (F Inlines) +strong :: PandocMonad m => OrgParser m (F Inlines) strong = fmap B.strong <$> emphasisBetween '*' -strikeout :: OrgParser (F Inlines) +strikeout :: PandocMonad m => OrgParser m (F Inlines) strikeout = fmap B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. -underline :: OrgParser (F Inlines) +underline :: PandocMonad m => OrgParser m (F Inlines) underline = fmap B.strong <$> emphasisBetween '_' -verbatim :: OrgParser (F Inlines) +verbatim :: PandocMonad m => OrgParser m (F Inlines) verbatim = return . B.code <$> verbatimBetween '=' -code :: OrgParser (F Inlines) +code :: PandocMonad m => OrgParser m (F Inlines) code = return . B.code <$> verbatimBetween '~' -subscript :: OrgParser (F Inlines) +subscript :: PandocMonad m => OrgParser m (F Inlines) subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) -superscript :: OrgParser (F Inlines) +superscript :: PandocMonad m => OrgParser m (F Inlines) superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) -math :: OrgParser (F Inlines) +math :: PandocMonad m => OrgParser m (F Inlines) math = return . B.math <$> choice [ math1CharBetween '$' , mathStringBetween '$' , rawMathBetween "\\(" "\\)" ] -displayMath :: OrgParser (F Inlines) +displayMath :: PandocMonad m => OrgParser m (F Inlines) displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" , rawMathBetween "$$" "$$" ] -updatePositions :: Char - -> OrgParser Char +updatePositions :: PandocMonad m + => Char + -> OrgParser m Char updatePositions c = do when (c `elem` emphasisPreChars) updateLastPreCharPos when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos return c -symbol :: OrgParser (F Inlines) +symbol :: PandocMonad m => OrgParser m (F Inlines) symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) -emphasisBetween :: Char - -> OrgParser (F Inlines) +emphasisBetween :: PandocMonad m + => Char + -> OrgParser m (F Inlines) emphasisBetween c = try $ do startEmphasisNewlinesCounting emphasisAllowedNewlines res <- enclosedInlines (emphasisStart c) (emphasisEnd c) @@ -606,8 +612,9 @@ emphasisBetween c = try $ do resetEmphasisNewlines return res -verbatimBetween :: Char - -> OrgParser String +verbatimBetween :: PandocMonad m + => Char + -> OrgParser m String verbatimBetween c = try $ emphasisStart c *> many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) @@ -615,8 +622,9 @@ verbatimBetween c = try $ verbatimChar = noneOf "\n\r" >>= updatePositions -- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: Char - -> OrgParser String +mathStringBetween :: PandocMonad m + => Char + -> OrgParser m String mathStringBetween c = try $ do mathStart c body <- many1TillNOrLessNewlines mathAllowedNewlines @@ -626,8 +634,9 @@ mathStringBetween c = try $ do return $ body ++ [final] -- | Parse a single character between @c@ using math rules -math1CharBetween :: Char - -> OrgParser String +math1CharBetween :: PandocMonad m + => Char + -> OrgParser m String math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars @@ -635,13 +644,14 @@ math1CharBetween c = try $ do eof <|> () <$ lookAhead (oneOf mathPostChars) return [res] -rawMathBetween :: String +rawMathBetween :: PandocMonad m + => String -> String - -> OrgParser String + -> OrgParser m String rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) -- | Parses the start (opening character) of emphasis -emphasisStart :: Char -> OrgParser Char +emphasisStart :: PandocMonad m => Char -> OrgParser m Char emphasisStart c = try $ do guard =<< afterEmphasisPreChar guard =<< notAfterString @@ -654,7 +664,7 @@ emphasisStart c = try $ do return c -- | Parses the closing character of emphasis -emphasisEnd :: Char -> OrgParser Char +emphasisEnd :: PandocMonad m => Char -> OrgParser m Char emphasisEnd c = try $ do guard =<< notAfterForbiddenBorderChar char c @@ -665,11 +675,11 @@ emphasisEnd c = try $ do where acceptablePostChars = surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars) -mathStart :: Char -> OrgParser Char +mathStart :: PandocMonad m => Char -> OrgParser m Char mathStart c = try $ char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) -mathEnd :: Char -> OrgParser Char +mathEnd :: PandocMonad m => Char -> OrgParser m Char mathEnd c = try $ do res <- noneOf (c:mathForbiddenBorderChars) char c @@ -677,15 +687,15 @@ mathEnd c = try $ do return res -enclosedInlines :: OrgParser a - -> OrgParser b - -> OrgParser (F Inlines) +enclosedInlines :: PandocMonad m => OrgParser m a + -> OrgParser m b + -> OrgParser m (F Inlines) enclosedInlines start end = try $ trimInlinesF . mconcat <$> enclosed start end inline -enclosedRaw :: OrgParser a - -> OrgParser b - -> OrgParser String +enclosedRaw :: PandocMonad m => OrgParser m a + -> OrgParser m b + -> OrgParser m String enclosedRaw start end = try $ start *> (onSingleLine <|> spanningTwoLines) where onSingleLine = try $ many1Till (noneOf "\n\r") end @@ -694,10 +704,10 @@ enclosedRaw start end = try $ -- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume -- newlines. -many1TillNOrLessNewlines :: Int - -> OrgParser Char - -> OrgParser a - -> OrgParser String +many1TillNOrLessNewlines :: PandocMonad m => Int + -> OrgParser m Char + -> OrgParser m a + -> OrgParser m String many1TillNOrLessNewlines n p end = try $ nMoreLines (Just n) mempty >>= oneOrMore where @@ -746,21 +756,21 @@ mathAllowedNewlines :: Int mathAllowedNewlines = 2 -- | Whether we are right behind a char allowed before emphasis -afterEmphasisPreChar :: OrgParser Bool +afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool afterEmphasisPreChar = do pos <- getPosition lastPrePos <- orgStateLastPreCharPos <$> getState return . fromMaybe True $ (== pos) <$> lastPrePos -- | Whether the parser is right after a forbidden border char -notAfterForbiddenBorderChar :: OrgParser Bool +notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool notAfterForbiddenBorderChar = do pos <- getPosition lastFBCPos <- orgStateLastForbiddenCharPos <$> getState return $ lastFBCPos /= Just pos -- | Read a sub- or superscript expression -subOrSuperExpr :: OrgParser (F Inlines) +subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") @@ -768,7 +778,7 @@ subOrSuperExpr = try $ ] >>= parseFromString (mconcat <$> many inline) where enclosing (left, right) s = left : s ++ [right] -simpleSubOrSuperString :: OrgParser String +simpleSubOrSuperString :: PandocMonad m => OrgParser m String simpleSubOrSuperString = try $ do state <- getState guard . exportSubSuperscripts . orgStateExportSettings $ state @@ -777,17 +787,18 @@ simpleSubOrSuperString = try $ do <*> many1 alphaNum ] -inlineLaTeX :: OrgParser (F Inlines) +inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) inlineLaTeX = try $ do cmd <- inlineLaTeXCommand + ils <- (lift . lift) $ parseAsInlineLaTeX cmd maybe mzero returnF $ - parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd + parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils where parseAsMath :: String -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs - parseAsInlineLaTeX :: String -> Maybe Inlines - parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs + parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) + parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs parseAsMathMLSym :: String -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) @@ -803,10 +814,11 @@ inlineLaTeX = try $ do maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just -inlineLaTeXCommand :: OrgParser String +inlineLaTeXCommand :: PandocMonad m => OrgParser m String inlineLaTeXCommand = try $ do rest <- getInput - case runParser rawLaTeXInline def "source" rest of + parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest + case parsed of Right (RawInline _ cs) -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. @@ -820,16 +832,16 @@ inlineLaTeXCommand = try $ do dropWhileEnd :: (a -> Bool) -> [a] -> [a] dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] -exportSnippet :: OrgParser (F Inlines) +exportSnippet :: PandocMonad m => OrgParser m (F Inlines) exportSnippet = try $ do string "@@" format <- many1Till (alphaNum <|> char '-') (char ':') snippet <- manyTill anyChar (try $ string "@@") returnF $ B.rawInline format snippet -smart :: OrgParser (F Inlines) +smart :: PandocMonad m => OrgParser m (F Inlines) smart = do - getOption readerSmart >>= guard + guardEnabled Ext_smart doubleQuoted <|> singleQuoted <|> choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) where @@ -844,7 +856,7 @@ smart = do <* updateLastForbiddenCharPos *> return (B.str "\x2019") -singleQuoted :: OrgParser (F Inlines) +singleQuoted :: PandocMonad m => OrgParser m (F Inlines) singleQuoted = try $ do guard =<< getExportSetting exportSmartQuotes singleQuoteStart @@ -856,7 +868,7 @@ singleQuoted = try $ do -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: OrgParser (F Inlines) +doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) doubleQuoted = try $ do guard =<< getExportSetting exportSmartQuotes doubleQuoteStart diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 1fea3e890..2f4e21248 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -41,6 +41,7 @@ import Text.Pandoc.Readers.Org.Parsing import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Blocks, Inlines ) +import Text.Pandoc.Class ( PandocMonad ) import Text.Pandoc.Definition import Control.Monad ( mzero, void ) @@ -51,7 +52,7 @@ import Data.Monoid ( (<>) ) import Network.HTTP ( urlEncode ) -- | Returns the current meta, respecting export options. -metaExport :: OrgParser (F Meta) +metaExport :: Monad m => OrgParser m (F Meta) metaExport = do st <- getState let settings = orgStateExportSettings st @@ -68,10 +69,10 @@ removeMeta key meta' = -- | Parse and handle a single line containing meta information -- The order, in which blocks are tried, makes sure that we're not looking at -- the beginning of a block, so we don't need to check for it -metaLine :: OrgParser Blocks +metaLine :: PandocMonad m => OrgParser m Blocks metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) -declarationLine :: OrgParser () +declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do key <- map toLower <$> metaKey (key', value) <- metaValue key @@ -79,12 +80,12 @@ declarationLine = try $ do let meta' = B.setMeta key' <$> value <*> pure nullMeta in st { orgStateMeta = meta' <> orgStateMeta st } -metaKey :: OrgParser String +metaKey :: Monad m => OrgParser m String metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces -metaValue :: String -> OrgParser (String, (F MetaValue)) +metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue)) metaValue key = let inclKey = "header-includes" in case key of @@ -103,10 +104,10 @@ metaValue key = accumulatingList inclKey (metaExportSnippet "html") _ -> (key,) <$> metaString -metaInlines :: OrgParser (F MetaValue) +metaInlines :: PandocMonad m => OrgParser m (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline -metaInlinesCommaSeparated :: OrgParser (F MetaValue) +metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',') newline @@ -114,21 +115,21 @@ metaInlinesCommaSeparated = do let toMetaInlines = MetaInlines . B.toList return $ MetaList . map toMetaInlines <$> sequence authors -metaString :: OrgParser (F MetaValue) +metaString :: Monad m => OrgParser m (F MetaValue) metaString = metaModifiedString id -metaModifiedString :: (String -> String) -> OrgParser (F MetaValue) +metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue) metaModifiedString f = return . MetaString . f <$> anyLine -- | Read an format specific meta definition -metaExportSnippet :: String -> OrgParser (F MetaValue) +metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue) metaExportSnippet format = return . MetaInlines . B.toList . B.rawInline format <$> anyLine -- | Accumulate the result of the @parser@ in a list under @key@. -accumulatingList :: String - -> OrgParser (F MetaValue) - -> OrgParser (F MetaValue) +accumulatingList :: Monad m => String + -> OrgParser m (F MetaValue) + -> OrgParser m (F MetaValue) accumulatingList key p = do value <- p meta' <- orgStateMeta <$> getState @@ -141,7 +142,7 @@ accumulatingList key p = do -- -- export options -- -optionLine :: OrgParser () +optionLine :: Monad m => OrgParser m () optionLine = try $ do key <- metaKey case key of @@ -152,14 +153,14 @@ optionLine = try $ do "typ_todo" -> todoSequence >>= updateState . registerTodoSequence _ -> mzero -addLinkFormat :: String +addLinkFormat :: Monad m => String -> (String -> String) - -> OrgParser () + -> OrgParser m () addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat :: Monad m => OrgParser m ((String, String -> String)) parseLinkFormat = try $ do linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat @@ -167,7 +168,7 @@ parseLinkFormat = try $ do -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. -parseFormat :: OrgParser (String -> String) +parseFormat :: Monad m => OrgParser m (String -> String) parseFormat = try $ do replacePlain <|> replaceUrl <|> justAppend where @@ -181,13 +182,13 @@ parseFormat = try $ do rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) -inlinesTillNewline :: OrgParser (F Inlines) +inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines) inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline -- -- ToDo Sequences and Keywords -- -todoSequence :: OrgParser TodoSequence +todoSequence :: Monad m => OrgParser m TodoSequence todoSequence = try $ do todoKws <- todoKeywords doneKws <- optionMaybe $ todoDoneSep *> todoKeywords @@ -201,13 +202,13 @@ todoSequence = try $ do (x:xs) -> return $ keywordsToSequence (reverse xs) [x] where - todoKeywords :: OrgParser [String] + todoKeywords :: Monad m => OrgParser m [String] todoKeywords = try $ let keyword = many1 nonspaceChar <* skipSpaces endOfKeywords = todoDoneSep <|> void newline in manyTill keyword (lookAhead endOfKeywords) - todoDoneSep :: OrgParser () + todoDoneSep :: Monad m => OrgParser m () todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 keywordsToSequence :: [String] -> [String] -> TodoSequence diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 38f95ca95..181dd1d5c 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -51,7 +51,7 @@ module Text.Pandoc.Readers.Org.ParserState ) where import Control.Monad (liftM, liftM2) -import Control.Monad.Reader (Reader, runReader, ask, asks, local) +import Control.Monad.Reader (Reader, runReader, ReaderT, ask, asks, local) import Data.Default (Default(..)) import qualified Data.Map as M @@ -122,7 +122,7 @@ instance HasLastStrPosition OrgParserState where getLastStrPos = orgStateLastStrPos setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } -instance HasQuoteContext st (Reader OrgParserLocal) where +instance Monad m => HasQuoteContext st (ReaderT OrgParserLocal m) where getQuoteContext = asks orgLocalQuoteContext withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 95415f823..1eb8a3b00 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -72,6 +72,7 @@ module Text.Pandoc.Readers.Org.Parsing , citeKey -- * Re-exports from Text.Pandoc.Parsec , runParser + , runParserT , getInput , char , letter @@ -114,17 +115,17 @@ import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline , parseFromString ) import Control.Monad ( guard ) -import Control.Monad.Reader ( Reader ) +import Control.Monad.Reader ( ReaderT ) -- | The parser used to read org files. -type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) +type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities -- -- | Parse any line of text -anyLine :: OrgParser String +anyLine :: Monad m => OrgParser m String anyLine = P.anyLine <* updateLastPreCharPos @@ -132,7 +133,7 @@ anyLine = -- The version Text.Pandoc.Parsing cannot be used, as we need additional parts -- of the state saved and restored. -parseFromString :: OrgParser a -> String -> OrgParser a +parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a parseFromString parser str' = do oldLastPreCharPos <- orgStateLastPreCharPos <$> getState updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } @@ -141,33 +142,34 @@ parseFromString parser str' = do return result -- | Skip one or more tab or space characters. -skipSpaces1 :: OrgParser () +skipSpaces1 :: Monad m => OrgParser m () skipSpaces1 = skipMany1 spaceChar -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. -newline :: OrgParser Char +newline :: Monad m => OrgParser m Char newline = P.newline <* updateLastPreCharPos <* updateLastForbiddenCharPos -- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: OrgParser [Char] +blanklines :: Monad m => OrgParser m [Char] blanklines = P.blanklines <* updateLastPreCharPos <* updateLastForbiddenCharPos -- | Succeeds when we're in list context. -inList :: OrgParser () +inList :: Monad m => OrgParser m () inList = do ctx <- orgStateParserContext <$> getState guard (ctx == ListItemState) -- | Parse in different context -withContext :: ParserContext -- ^ New parser context - -> OrgParser a -- ^ Parser to run in that context - -> OrgParser a +withContext :: Monad m + => ParserContext -- ^ New parser context + -> OrgParser m a -- ^ Parser to run in that context + -> OrgParser m a withContext context parser = do oldContext <- orgStateParserContext <$> getState updateState $ \s -> s{ orgStateParserContext = context } @@ -180,19 +182,19 @@ withContext context parser = do -- -- | Get an export setting. -getExportSetting :: (ExportSettings -> a) -> OrgParser a +getExportSetting :: Monad m => (ExportSettings -> a) -> OrgParser m a getExportSetting s = s . orgStateExportSettings <$> getState -- | Set the current position as the last position at which a forbidden char -- was found (i.e. a character which is not allowed at the inner border of -- markup). -updateLastForbiddenCharPos :: OrgParser () +updateLastForbiddenCharPos :: Monad m => OrgParser m () updateLastForbiddenCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} -- | Set the current parser position as the position at which a character was -- seen which allows inline markup to follow. -updateLastPreCharPos :: OrgParser () +updateLastPreCharPos :: Monad m => OrgParser m () updateLastPreCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastPreCharPos = Just p} @@ -201,15 +203,15 @@ updateLastPreCharPos = getPosition >>= \p -> -- -- | Read the key of a plist style key-value list. -orgArgKey :: OrgParser String +orgArgKey :: Monad m => OrgParser m String orgArgKey = try $ skipSpaces *> char ':' *> many1 orgArgWordChar -- | Read the value of a plist style key-value list. -orgArgWord :: OrgParser String +orgArgWord :: Monad m => OrgParser m String orgArgWord = many1 orgArgWordChar -- | Chars treated as part of a word in plists. -orgArgWordChar :: OrgParser Char +orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar = alphaNum <|> oneOf "-_" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e05b6cba2..c9868c11f 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} {- @@ -29,19 +30,18 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} -module Text.Pandoc.Readers.RST ( - readRST, - readRSTWithWarnings - ) where +module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, fromList) import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Options +import Text.Pandoc.Error +import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad ( when, liftM, guard, mzero ) -import Data.List ( findIndex, intercalate, +import Data.List ( findIndex, intercalate, isInfixOf, transpose, sort, deleteFirstsBy, isSuffixOf , nub, union) -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, isJust) import qualified Data.Map as M import Text.Printf ( printf ) import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) @@ -49,18 +49,21 @@ import qualified Text.Pandoc.Builder as B import Data.Sequence (viewr, ViewR(..)) import Data.Char (toLower, isHexDigit, isSpace) import Data.Monoid ((<>)) -import Text.Pandoc.Error +import Control.Monad.Except (throwError, catchError) +import Text.Pandoc.Class (PandocMonad, warning, readFileLazy, warningWithPos) -- | Parse reStructuredText string and return Pandoc document. -readRST :: ReaderOptions -- ^ Reader options +readRST :: PandocMonad m + => ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") - -readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) -readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n") + -> m Pandoc +readRST opts s = do + parsed <- (readWithM parseRST) def{ stateOptions = opts } (s ++ "\n\n") + case parsed of + Right result -> return result + Left e -> throwError e -type RSTParser = Parser [Char] ParserState +type RSTParser m = ParserT [Char] ParserState m -- -- Constants and data structure definitions @@ -141,7 +144,7 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds factorSemi (Str ys) factorSemi x = [x] -parseRST :: RSTParser Pandoc +parseRST :: PandocMonad m => RSTParser m Pandoc parseRST = do optional blanklines -- skip blank lines at beginning of file startPos <- getPosition @@ -168,13 +171,14 @@ parseRST = do -- parsing blocks -- -parseBlocks :: RSTParser Blocks +parseBlocks :: PandocMonad m => RSTParser m Blocks parseBlocks = mconcat <$> manyTill block eof -block :: RSTParser Blocks +block :: PandocMonad m => RSTParser m Blocks block = choice [ codeBlock , blockQuote , fieldList + , include , directive , comment , header @@ -191,7 +195,7 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: Int -> RSTParser (String, String) +rawFieldListItem :: Monad m => Int -> RSTParser m (String, String) rawFieldListItem minIndent = try $ do indent <- length <$> many (char ' ') guard $ indent >= minIndent @@ -204,7 +208,7 @@ rawFieldListItem minIndent = try $ do let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" return (name, raw) -fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) +fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent term <- parseInlineFromString name @@ -212,7 +216,7 @@ fieldListItem minIndent = try $ do optional blanklines return (term, [contents]) -fieldList :: RSTParser Blocks +fieldList :: PandocMonad m => RSTParser m Blocks fieldList = try $ do indent <- length <$> lookAhead (many spaceChar) items <- many1 $ fieldListItem indent @@ -224,7 +228,7 @@ fieldList = try $ do -- line block -- -lineBlock :: RSTParser Blocks +lineBlock :: PandocMonad m => RSTParser m Blocks lineBlock = try $ do lines' <- lineBlockLines lines'' <- mapM parseInlineFromString lines' @@ -235,7 +239,7 @@ lineBlock = try $ do -- -- note: paragraph can end in a :: starting a code block -para :: RSTParser Blocks +para :: PandocMonad m => RSTParser m Blocks para = try $ do result <- trimInlines . mconcat <$> many1 inline option (B.plain result) $ try $ do @@ -248,18 +252,18 @@ para = try $ do <> raw _ -> return (B.para result) -plain :: RSTParser Blocks +plain :: PandocMonad m => RSTParser m Blocks plain = B.plain . trimInlines . mconcat <$> many1 inline -- -- header blocks -- -header :: RSTParser Blocks +header :: PandocMonad m => RSTParser m Blocks header = doubleHeader <|> singleHeader <?> "header" -- a header with lines on top and bottom -doubleHeader :: RSTParser Blocks +doubleHeader :: PandocMonad m => RSTParser m Blocks doubleHeader = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line @@ -285,7 +289,7 @@ doubleHeader = try $ do return $ B.headerWith attr level txt -- a header with line on the bottom only -singleHeader :: RSTParser Blocks +singleHeader :: PandocMonad m => RSTParser m Blocks singleHeader = try $ do notFollowedBy' whitespace txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline}) @@ -309,7 +313,7 @@ singleHeader = try $ do -- hrule block -- -hrule :: Parser [Char] st Blocks +hrule :: Monad m => ParserT [Char] st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -323,14 +327,14 @@ hrule = try $ do -- -- read a line indented by a given string -indentedLine :: String -> Parser [Char] st [Char] +indentedLine :: Monad m => String -> ParserT [Char] st m [Char] indentedLine indents = try $ do string indents anyLine -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. -indentedBlock :: Parser [Char] st [Char] +indentedBlock :: Monad m => ParserT [Char] st m [Char] indentedBlock = try $ do indents <- lookAhead $ many1 spaceChar lns <- many1 $ try $ do b <- option "" blanklines @@ -339,24 +343,24 @@ indentedBlock = try $ do optional blanklines return $ unlines lns -quotedBlock :: Parser [Char] st [Char] +quotedBlock :: Monad m => ParserT [Char] st m [Char] quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines return $ unlines lns -codeBlockStart :: Parser [Char] st Char +codeBlockStart :: Monad m => ParserT [Char] st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: Parser [Char] st Blocks +codeBlock :: Monad m => ParserT [Char] st m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: Parser [Char] st Blocks +codeBlockBody :: Monad m => ParserT [Char] st m Blocks codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> (indentedBlock <|> quotedBlock) -lhsCodeBlock :: RSTParser Blocks +lhsCodeBlock :: Monad m => RSTParser m Blocks lhsCodeBlock = try $ do getPosition >>= guard . (==1) . sourceColumn guardEnabled Ext_literate_haskell @@ -366,14 +370,14 @@ lhsCodeBlock = try $ do return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns -latexCodeBlock :: Parser [Char] st [[Char]] +latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Parser [Char] st [[Char]] +birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it @@ -381,28 +385,103 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine then map (drop 1) lns else lns -birdTrackLine :: Parser [Char] st [Char] +birdTrackLine :: Monad m => ParserT [Char] st m [Char] birdTrackLine = char '>' >> anyLine -- -- block quotes -- -blockQuote :: RSTParser Blocks +blockQuote :: PandocMonad m => RSTParser m Blocks blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: contents <- parseFromString parseBlocks $ raw ++ "\n\n" return $ B.blockQuote contents +{- +Unsupported options for include: +tab-width +encoding +-} + +include :: PandocMonad m => RSTParser m Blocks +include = try $ do + string ".. include::" + skipMany spaceChar + f <- trim <$> anyLine + fields <- many $ rawFieldListItem 3 + -- options + let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead + let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead + guard $ not (null f) + oldPos <- getPosition + oldInput <- getInput + containers <- stateContainers <$> getState + when (f `elem` containers) $ + throwError $ PandocParseError $ "Include file loop at " ++ show oldPos + updateState $ \s -> s{ stateContainers = f : stateContainers s } + res <- readFileLazy' f + contents <- case res of + Right x -> return x + Left _e -> do + warning $ "Could not read include file " ++ f ++ "." + return "" + let contentLines = lines contents + let numLines = length contentLines + let startLine' = case startLine of + Nothing -> 1 + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end + let endLine' = case endLine of + Nothing -> numLines + 1 + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end + let contentLines' = drop (startLine' - 1) + $ take (endLine' - 1) + $ contentLines + let contentLines'' = (case trim <$> lookup "end-before" fields of + Just patt -> takeWhile (not . (patt `isInfixOf`)) + Nothing -> id) . + (case trim <$> lookup "start-after" fields of + Just patt -> drop 1 . + dropWhile (not . (patt `isInfixOf`)) + Nothing -> id) $ contentLines' + let contents' = unlines contentLines'' + case lookup "code" fields of + Just lang -> do + let numberLines = lookup "number-lines" fields + let classes = trimr lang : ["numberLines" | isJust numberLines] ++ + maybe [] words (lookup "class" fields) + let kvs = maybe [] (\n -> [("startFrom", trimr n)]) numberLines + let ident = maybe "" trimr $ lookup "name" fields + let attribs = (ident, classes, kvs) + return $ B.codeBlockWith attribs contents' + Nothing -> case lookup "literal" fields of + Just _ -> return $ B.rawBlock "rst" contents' + Nothing -> do + setPosition $ newPos f 1 1 + setInput contents' + bs <- optional blanklines >> + (mconcat <$> many block) + setInput oldInput + setPosition oldPos + updateState $ \s -> s{ stateContainers = + tail $ stateContainers s } + return bs + +readFileLazy' :: PandocMonad m => FilePath -> m (Either PandocError String) +readFileLazy' f = catchError ((Right . UTF8.toStringLazy) <$> readFileLazy f) $ + \(e :: PandocError) -> return (Left e) + -- -- list blocks -- -list :: RSTParser Blocks +list :: PandocMonad m => RSTParser m Blocks list = choice [ bulletList, orderedList, definitionList ] <?> "list" -definitionListItem :: RSTParser (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => RSTParser m (Inlines, [Blocks]) definitionListItem = try $ do -- avoid capturing a directive or comment notFollowedBy (try $ char '.' >> char '.') @@ -412,11 +491,11 @@ definitionListItem = try $ do contents <- parseFromString parseBlocks $ raw ++ "\n" return (term, [contents]) -definitionList :: RSTParser Blocks +definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Parser [Char] st Int +bulletListStart :: Monad m => ParserT [Char] st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -424,16 +503,16 @@ bulletListStart = try $ do return $ length (marker:white) -- parses ordered list start and returns its length (inc following whitespace) -orderedListStart :: ListNumberStyle +orderedListStart :: Monad m => ListNumberStyle -> ListNumberDelim - -> RSTParser Int + -> RSTParser m Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) white <- many1 spaceChar return $ markerLen + length white -- parse a line of a list item -listLine :: Int -> RSTParser [Char] +listLine :: Monad m => Int -> RSTParser m [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -441,7 +520,7 @@ listLine markerLength = try $ do return $ line ++ "\n" -- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> RSTParser [Char] +indentWith :: Monad m => Int -> RSTParser m [Char] indentWith num = do tabStop <- getOption readerTabStop if (num < tabStop) @@ -450,8 +529,8 @@ indentWith num = do (try (char '\t' >> count (num - tabStop) (char ' '))) ] -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: RSTParser Int - -> RSTParser (Int, [Char]) +rawListItem :: Monad m => RSTParser m Int + -> RSTParser m (Int, [Char]) rawListItem start = try $ do markerLength <- start firstLine <- anyLine @@ -461,14 +540,15 @@ rawListItem start = try $ do -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int -> RSTParser [Char] +listContinuation :: Monad m => Int -> RSTParser m [Char] listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) return $ blanks ++ concat result -listItem :: RSTParser Int - -> RSTParser Blocks +listItem :: PandocMonad m + => RSTParser m Int + -> RSTParser m Blocks listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) @@ -490,21 +570,21 @@ listItem start = try $ do [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys] _ -> parsed -orderedList :: RSTParser Blocks +orderedList :: PandocMonad m => RSTParser m Blocks orderedList = try $ do (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar) items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify' items + let items' = compactify items return $ B.orderedListWith (start, style, delim) items' -bulletList :: RSTParser Blocks -bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) +bulletList :: PandocMonad m => RSTParser m Blocks +bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart) -- -- directive (e.g. comment, container, compound-paragraph) -- -comment :: RSTParser Blocks +comment :: Monad m => RSTParser m Blocks comment = try $ do string ".." skipMany1 spaceChar <|> (() <$ lookAhead newline) @@ -513,11 +593,11 @@ comment = try $ do optional indentedBlock return mempty -directiveLabel :: RSTParser String +directiveLabel :: Monad m => RSTParser m String directiveLabel = map toLower <$> many1Till (letter <|> char '-') (try $ string "::") -directive :: RSTParser Blocks +directive :: PandocMonad m => RSTParser m Blocks directive = try $ do string ".." directive' @@ -526,7 +606,7 @@ directive = try $ do -- date -- include -- title -directive' :: RSTParser Blocks +directive' :: PandocMonad m => RSTParser m Blocks directive' = do skipMany1 spaceChar label <- directiveLabel @@ -614,13 +694,13 @@ directive' = do return $ B.divWith attrs children other -> do pos <- getPosition - addWarning (Just pos) $ "ignoring unknown directive: " ++ other + warningWithPos pos $ "ignoring unknown directive: " ++ other return mempty -- TODO: -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix -addNewRole :: String -> [(String, String)] -> RSTParser Blocks +addNewRole :: PandocMonad m => String -> [(String, String)] -> RSTParser m Blocks addNewRole roleString fields = do (role, parentRole) <- parseFromString inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState @@ -642,20 +722,20 @@ addNewRole roleString fields = do -- warn about syntax we ignore flip mapM_ fields $ \(key, _) -> case key of - "language" -> when (baseRole /= "code") $ addWarning Nothing $ + "language" -> when (baseRole /= "code") $ warning $ "ignoring :language: field because the parent of role :" ++ role ++ ": is :" ++ baseRole ++ ": not :code:" - "format" -> when (baseRole /= "raw") $ addWarning Nothing $ + "format" -> when (baseRole /= "raw") $ warning $ "ignoring :format: field because the parent of role :" ++ role ++ ": is :" ++ baseRole ++ ": not :raw:" - _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ + _ -> warning $ "ignoring unknown field :" ++ key ++ ": in definition of role :" ++ role ++ ": in" when (parentRole == "raw" && countKeys "format" > 1) $ - addWarning Nothing $ + warning $ "ignoring :format: fields after the first in the definition of role :" ++ role ++": in" when (parentRole == "code" && countKeys "language" > 1) $ - addWarning Nothing $ + warning $ "ignoring :language: fields after the first in the definition of role :" ++ role ++": in" @@ -664,7 +744,7 @@ addNewRole roleString fields = do M.insert role (baseRole, fmt, attr) customRoles } - return $ B.singleton Null + return mempty where countKeys k = length . filter (== k) . map fst $ fields inheritedRole = @@ -700,7 +780,7 @@ extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc where (ds,rest) = span isHexDigit s mbc = safeRead ('\'':'\\':'x':ds ++ "'") -extractCaption :: RSTParser (Inlines, Blocks) +extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks) extractCaption = do capt <- trimInlines . mconcat <$> many inline legend <- optional blanklines >> (mconcat <$> many block) @@ -712,7 +792,7 @@ toChunks = dropWhile null . map (trim . unlines) . splitBy (all (`elem` (" \t" :: String))) . lines -codeblock :: [String] -> Maybe String -> String -> String -> RSTParser Blocks +codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks codeblock classes numberLines lang body = return $ B.codeBlockWith attribs $ stripTrailingNewlines body where attribs = ("", classes', kvs) @@ -728,7 +808,7 @@ codeblock classes numberLines lang body = --- note block --- -noteBlock :: RSTParser [Char] +noteBlock :: Monad m => RSTParser m [Char] noteBlock = try $ do startPos <- getPosition string ".." @@ -747,7 +827,7 @@ noteBlock = try $ do -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -noteMarker :: RSTParser [Char] +noteMarker :: Monad m => RSTParser m [Char] noteMarker = do char '[' res <- many1 digit @@ -760,13 +840,13 @@ noteMarker = do -- reference key -- -quotedReferenceName :: RSTParser Inlines +quotedReferenceName :: PandocMonad m => RSTParser m Inlines quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! label' <- trimInlines . mconcat <$> many1Till inline (char '`') return label' -unquotedReferenceName :: RSTParser Inlines +unquotedReferenceName :: PandocMonad m => RSTParser m Inlines unquotedReferenceName = try $ do label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':') return label' @@ -775,24 +855,24 @@ unquotedReferenceName = try $ do -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName' :: Parser [Char] st String +simpleReferenceName' :: Monad m => ParserT [Char] st m String simpleReferenceName' = do x <- alphaNum xs <- many $ alphaNum <|> (try $ oneOf "-_:+." <* lookAhead alphaNum) return (x:xs) -simpleReferenceName :: Parser [Char] st Inlines +simpleReferenceName :: Monad m => ParserT [Char] st m Inlines simpleReferenceName = do raw <- simpleReferenceName' return $ B.str raw -referenceName :: RSTParser Inlines +referenceName :: PandocMonad m => RSTParser m Inlines referenceName = quotedReferenceName <|> (try $ simpleReferenceName <* lookAhead (char ':')) <|> unquotedReferenceName -referenceKey :: RSTParser [Char] +referenceKey :: PandocMonad m => RSTParser m [Char] referenceKey = do startPos <- getPosition choice [substKey, anonymousKey, regularKey] @@ -801,7 +881,7 @@ referenceKey = do -- return enough blanks to replace key return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -targetURI :: Parser [Char] st [Char] +targetURI :: Monad m => ParserT [Char] st m [Char] targetURI = do skipSpaces optional newline @@ -810,7 +890,7 @@ targetURI = do blanklines return $ escapeURI $ trim $ contents -substKey :: RSTParser () +substKey :: PandocMonad m => RSTParser m () substKey = try $ do string ".." skipMany1 spaceChar @@ -828,7 +908,7 @@ substKey = try $ do let key = toKey $ stripFirstAndLast ref updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s } -anonymousKey :: RSTParser () +anonymousKey :: Monad m => RSTParser m () anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI @@ -842,7 +922,7 @@ stripTicks = reverse . stripTick . reverse . stripTick where stripTick ('`':xs) = xs stripTick xs = xs -regularKey :: RSTParser () +regularKey :: PandocMonad m => RSTParser m () regularKey = try $ do string ".. _" (_,ref) <- withRaw referenceName @@ -869,45 +949,46 @@ regularKey = try $ do -- Grid tables TODO: -- - column spans -dashedLine :: Char -> Parser [Char] st (Int, Int) +dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator -simpleTableSep :: Char -> RSTParser Char +simpleTableSep :: Monad m => Char -> RSTParser m Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: RSTParser [Char] +simpleTableFooter :: Monad m => RSTParser m [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: [Int] -> RSTParser [String] +simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String] simpleTableRawLine indices = do line <- many1Till anyChar newline return (simpleTableSplitLine indices line) -- Parse a table row and return a list of blocks (columns). -simpleTableRow :: [Int] -> RSTParser [[Block]] +simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks] simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices colLines <- return [] -- TODO let cols = map unlines . transpose $ firstLine : colLines - mapM (parseFromString (B.toList . mconcat <$> many plain)) cols + mapM (parseFromString (mconcat <$> many plain)) cols simpleTableSplitLine :: [Int] -> String -> [String] simpleTableSplitLine indices line = map trim $ tail $ splitByIndices (init indices) line -simpleTableHeader :: Bool -- ^ Headerless table - -> RSTParser ([[Block]], [Alignment], [Int]) +simpleTableHeader :: PandocMonad m + => Bool -- ^ Headerless table + -> RSTParser m ([Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -921,26 +1002,33 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - heads <- mapM (parseFromString (B.toList . mconcat <$> many plain)) $ + heads <- mapM (parseFromString (mconcat <$> many plain)) $ map trim rawHeads return (heads, aligns, indices) -- Parse a simple table. -simpleTable :: Bool -- ^ Headerless table - -> RSTParser Blocks +simpleTable :: PandocMonad m + => Bool -- ^ Headerless table + -> RSTParser m Blocks simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter + tbl <- tableWith (simpleTableHeader headless) simpleTableRow + sep simpleTableFooter -- Simple tables get 0s for relative column widths (i.e., use default) - return $ B.singleton $ Table c a (replicate (length a) 0) h l + case B.toList tbl of + [Table c a _w h l] -> return $ B.singleton $ + Table c a (replicate (length a) 0) h l + _ -> do + warning "tableWith returned something unexpected" + return tbl -- TODO error? where sep = return () -- optional (simpleTableSep '-') -gridTable :: Bool -- ^ Headerless table - -> RSTParser Blocks -gridTable headerless = B.singleton - <$> gridTableWith (B.toList <$> parseBlocks) headerless +gridTable :: PandocMonad m + => Bool -- ^ Headerless table + -> RSTParser m Blocks +gridTable headerless = gridTableWith parseBlocks headerless -table :: RSTParser Blocks +table :: PandocMonad m => RSTParser m Blocks table = gridTable False <|> simpleTable False <|> gridTable True <|> simpleTable True <?> "table" @@ -948,7 +1036,7 @@ table = gridTable False <|> simpleTable False <|> -- inline -- -inline :: RSTParser Inlines +inline :: PandocMonad m => RSTParser m Inlines inline = choice [ note -- can start with whitespace, so try before ws , whitespace , link @@ -964,29 +1052,29 @@ inline = choice [ note -- can start with whitespace, so try before ws , escapedChar , symbol ] <?> "inline" -parseInlineFromString :: String -> RSTParser Inlines +parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline) -hyphens :: RSTParser Inlines +hyphens :: Monad m => RSTParser m Inlines hyphens = do result <- many1 (char '-') optional endline -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Parser [Char] st Inlines +escapedChar :: Monad m => ParserT [Char] st m Inlines escapedChar = do c <- escaped anyChar return $ if c == ' ' -- '\ ' is null in RST then mempty else B.str [c] -symbol :: RSTParser Inlines +symbol :: Monad m => RSTParser m Inlines symbol = do result <- oneOf specialChars return $ B.str [result] -- parses inline code, between codeStart and codeEnd -code :: RSTParser Inlines +code :: Monad m => RSTParser m Inlines code = try $ do string "``" result <- manyTill anyChar (try (string "``")) @@ -994,7 +1082,7 @@ code = try $ do $ trim $ unwords $ lines result -- succeeds only if we're not right after a str (ie. in middle of word) -atStart :: RSTParser a -> RSTParser a +atStart :: Monad m => RSTParser m a -> RSTParser m a atStart p = do pos <- getPosition st <- getState @@ -1002,11 +1090,11 @@ atStart p = do guard $ stateLastStrPos st /= Just pos p -emph :: RSTParser Inlines +emph :: PandocMonad m => RSTParser m Inlines emph = B.emph . trimInlines . mconcat <$> enclosed (atStart $ char '*') (char '*') inline -strong :: RSTParser Inlines +strong :: PandocMonad m => RSTParser m Inlines strong = B.strong . trimInlines . mconcat <$> enclosed (atStart $ string "**") (try $ string "**") inline @@ -1018,12 +1106,12 @@ strong = B.strong . trimInlines . mconcat <$> -- - Classes are silently discarded in addNewRole -- - Lacks sensible implementation for title-reference (which is the default) -- - Allows direct use of the :raw: role, rST only allows inherited use. -interpretedRole :: RSTParser Inlines +interpretedRole :: PandocMonad m => RSTParser m Inlines interpretedRole = try $ do (role, contents) <- roleBefore <|> roleAfter renderRole contents Nothing role nullAttr -renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines +renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of "sup" -> return $ B.superscript $ B.str contents "superscript" -> return $ B.superscript $ B.str contents @@ -1050,7 +1138,7 @@ renderRole contents fmt role attr = case role of renderRole contents newFmt newRole newAttr Nothing -> do pos <- getPosition - addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in" + warningWithPos pos $ "ignoring unknown role :" ++ custom ++ ": in" return $ B.str contents -- Undefined role where titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour @@ -1063,31 +1151,31 @@ renderRole contents fmt role attr = case role of addClass :: String -> Attr -> Attr addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues) -roleName :: RSTParser String +roleName :: PandocMonad m => RSTParser m String roleName = many1 (letter <|> char '-') -roleMarker :: RSTParser String +roleMarker :: PandocMonad m => RSTParser m String roleMarker = char ':' *> roleName <* char ':' -roleBefore :: RSTParser (String,String) +roleBefore :: PandocMonad m => RSTParser m (String,String) roleBefore = try $ do role <- roleMarker contents <- unmarkedInterpretedText return (role,contents) -roleAfter :: RSTParser (String,String) +roleAfter :: PandocMonad m => RSTParser m (String,String) roleAfter = try $ do contents <- unmarkedInterpretedText role <- roleMarker <|> (stateRstDefaultRole <$> getState) return (role,contents) -unmarkedInterpretedText :: RSTParser [Char] +unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar -whitespace :: RSTParser Inlines +whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" -str :: RSTParser Inlines +str :: Monad m => RSTParser m Inlines str = do let strChar = noneOf ("\t\n " ++ specialChars) result <- many1 strChar @@ -1095,7 +1183,7 @@ str = do return $ B.str result -- an endline character that can be treated as a space, not a structural break -endline :: RSTParser Inlines +endline :: Monad m => RSTParser m Inlines endline = try $ do newline notFollowedBy blankline @@ -1111,10 +1199,10 @@ endline = try $ do -- links -- -link :: RSTParser Inlines +link :: PandocMonad m => RSTParser m Inlines link = choice [explicitLink, referenceLink, autoLink] <?> "link" -explicitLink :: RSTParser Inlines +explicitLink :: PandocMonad m => RSTParser m Inlines explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code @@ -1135,7 +1223,7 @@ explicitLink = try $ do case M.lookup key keyTable of Nothing -> do pos <- getPosition - addWarning (Just pos) $ + warningWithPos pos $ "Could not find reference for " ++ show key return ("","",nullAttr) @@ -1143,7 +1231,7 @@ explicitLink = try $ do _ -> return (src, "", nullAttr) return $ B.linkWith attr (escapeURI src') tit label'' -referenceLink :: RSTParser Inlines +referenceLink :: PandocMonad m => RSTParser m Inlines referenceLink = try $ do (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <* char '_' @@ -1160,7 +1248,7 @@ referenceLink = try $ do ((src,tit), attr) <- case M.lookup key keyTable of Nothing -> do pos <- getPosition - addWarning (Just pos) $ + warningWithPos pos $ "Could not find reference for " ++ show key return (("",""),nullAttr) @@ -1169,20 +1257,20 @@ referenceLink = try $ do when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } return $ B.linkWith attr src tit label' -autoURI :: RSTParser Inlines +autoURI :: Monad m => RSTParser m Inlines autoURI = do (orig, src) <- uri return $ B.link src "" $ B.str orig -autoEmail :: RSTParser Inlines +autoEmail :: Monad m => RSTParser m Inlines autoEmail = do (orig, src) <- emailAddress return $ B.link src "" $ B.str orig -autoLink :: RSTParser Inlines +autoLink :: PandocMonad m => RSTParser m Inlines autoLink = autoURI <|> autoEmail -subst :: RSTParser Inlines +subst :: PandocMonad m => RSTParser m Inlines subst = try $ do (_,ref) <- withRaw $ enclosed (char '|') (char '|') inline state <- getState @@ -1191,12 +1279,12 @@ subst = try $ do case M.lookup key substTable of Nothing -> do pos <- getPosition - addWarning (Just pos) $ + warningWithPos pos $ "Could not find reference for " ++ show key return mempty Just target -> return target -note :: RSTParser Inlines +note :: PandocMonad m => RSTParser m Inlines note = try $ do optional whitespace ref <- noteMarker @@ -1206,7 +1294,7 @@ note = try $ do case lookup ref notes of Nothing -> do pos <- getPosition - addWarning (Just pos) $ + warningWithPos pos $ "Could not find note for " ++ show ref return mempty Just raw -> do @@ -1224,20 +1312,20 @@ note = try $ do updateState $ \st -> st{ stateNotes = newnotes } return $ B.note contents -smart :: RSTParser Inlines +smart :: PandocMonad m => RSTParser m Inlines smart = do - getOption readerSmart >>= guard + guardEnabled Ext_smart doubleQuoted <|> singleQuoted <|> choice [apostrophe, dash, ellipses] -singleQuoted :: RSTParser Inlines +singleQuoted :: PandocMonad m => RSTParser m Inlines singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ B.singleQuoted . trimInlines . mconcat <$> many1Till inline singleQuoteEnd -doubleQuoted :: RSTParser Inlines +doubleQuoted :: PandocMonad m => RSTParser m Inlines doubleQuoted = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 76a25ad82..1a827bcd9 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -30,7 +30,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of twiki text to 'Pandoc' document. -} module Text.Pandoc.Readers.TWiki ( readTWiki - , readTWikiWithWarnings ) where import Text.Pandoc.Definition @@ -40,44 +39,38 @@ import Text.Pandoc.Parsing hiding (enclosed, macro, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) import Control.Monad import Text.Printf (printf) -import Debug.Trace (trace) import Text.Pandoc.XML (fromEntities) import Data.Maybe (fromMaybe) import Text.HTML.TagSoup import Data.Char (isAlphaNum) import qualified Data.Foldable as F -import Text.Pandoc.Error +import Control.Monad.Except (throwError) +import Text.Pandoc.Class (PandocMonad, report) -- | Read twiki from an input string and return a Pandoc document. -readTWiki :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readTWiki opts s = - (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n") - -readTWikiWithWarnings :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError (Pandoc, [String]) -readTWikiWithWarnings opts s = - (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") - where parseTWikiWithWarnings = do - doc <- parseTWiki - warnings <- stateWarnings <$> getState - return (doc, warnings) - -type TWParser = Parser [Char] ParserState +readTWiki :: PandocMonad m + => ReaderOptions + -> String + -> m Pandoc +readTWiki opts s = do + res <- readWithM parseTWiki def{ stateOptions = opts } (s ++ "\n\n") + case res of + Left e -> throwError e + Right d -> return d + +type TWParser = ParserT [Char] ParserState -- -- utility functions -- -tryMsg :: String -> TWParser a -> TWParser a +tryMsg :: String -> TWParser m a -> TWParser m a tryMsg msg p = try p <?> msg -skip :: TWParser a -> TWParser () +skip :: TWParser m a -> TWParser m () skip parser = parser >> return () -nested :: TWParser a -> TWParser a +nested :: PandocMonad m => TWParser m a -> TWParser m a nested p = do nestlevel <- stateMaxNestingLevel <$> getState guard $ nestlevel > 0 @@ -86,7 +79,7 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -htmlElement :: String -> TWParser (Attr, String) +htmlElement :: PandocMonad m => String -> TWParser m (Attr, String) htmlElement tag = tryMsg tag $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) content <- manyTill anyChar (endtag <|> endofinput) @@ -103,7 +96,8 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals) classes = maybe [] words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] -parseHtmlContentWithAttrs :: String -> TWParser a -> TWParser (Attr, [a]) +parseHtmlContentWithAttrs :: PandocMonad m + => String -> TWParser m a -> TWParser m (Attr, [a]) parseHtmlContentWithAttrs tag parser = do (attr, content) <- htmlElement tag parsedContent <- try $ parseContent content @@ -112,14 +106,14 @@ parseHtmlContentWithAttrs tag parser = do parseContent = parseFromString $ nested $ manyTill parser endOfContent endOfContent = try $ skipMany blankline >> skipSpaces >> eof -parseHtmlContent :: String -> TWParser a -> TWParser [a] +parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd -- -- main parser -- -parseTWiki :: TWParser Pandoc +parseTWiki :: PandocMonad m => TWParser m Pandoc parseTWiki = do bs <- mconcat <$> many block spaces @@ -131,20 +125,18 @@ parseTWiki = do -- block parsers -- -block :: TWParser B.Blocks +block :: PandocMonad m => TWParser m B.Blocks block = do - tr <- getOption readerTrace pos <- getPosition res <- mempty <$ skipMany1 blankline <|> blockElements <|> para skipMany blankline - when tr $ - trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList res)) (return ()) + report DEBUG $ printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res) return res -blockElements :: TWParser B.Blocks +blockElements :: PandocMonad m => TWParser m B.Blocks blockElements = choice [ separator , header , verbatim @@ -155,10 +147,10 @@ blockElements = choice [ separator , noautolink ] -separator :: TWParser B.Blocks +separator :: PandocMonad m => TWParser m B.Blocks separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule -header :: TWParser B.Blocks +header :: PandocMonad m => TWParser m B.Blocks header = tryMsg "header" $ do string "---" level <- many1 (char '+') >>= return . length @@ -169,43 +161,45 @@ header = tryMsg "header" $ do attr <- registerHeader ("", classes, []) content return $ B.headerWith attr level $ content -verbatim :: TWParser B.Blocks +verbatim :: PandocMonad m => TWParser m B.Blocks verbatim = (htmlElement "verbatim" <|> htmlElement "pre") >>= return . (uncurry B.codeBlockWith) -literal :: TWParser B.Blocks +literal :: PandocMonad m => TWParser m B.Blocks literal = htmlElement "literal" >>= return . rawBlock where format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) content -list :: String -> TWParser B.Blocks +list :: PandocMonad m => String -> TWParser m B.Blocks list prefix = choice [ bulletList prefix , orderedList prefix , definitionList prefix] -definitionList :: String -> TWParser B.Blocks +definitionList :: PandocMonad m => String -> TWParser m B.Blocks definitionList prefix = tryMsg "definitionList" $ do indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ " elements <- many $ parseDefinitionListItem (prefix ++ concat indent) return $ B.definitionList elements where - parseDefinitionListItem :: String -> TWParser (B.Inlines, [B.Blocks]) + parseDefinitionListItem :: PandocMonad m + => String -> TWParser m (B.Inlines, [B.Blocks]) parseDefinitionListItem indent = do string (indent ++ "$ ") >> skipSpaces term <- many1Till inline $ string ": " line <- listItemLine indent $ string "$ " return $ (mconcat term, [line]) -bulletList :: String -> TWParser B.Blocks +bulletList :: PandocMonad m => String -> TWParser m B.Blocks bulletList prefix = tryMsg "bulletList" $ parseList prefix (char '*') (char ' ') -orderedList :: String -> TWParser B.Blocks +orderedList :: PandocMonad m => String -> TWParser m B.Blocks orderedList prefix = tryMsg "orderedList" $ parseList prefix (oneOf "1iIaA") (string ". ") -parseList :: String -> TWParser Char -> TWParser a -> TWParser B.Blocks +parseList :: PandocMonad m + => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks parseList prefix marker delim = do (indent, style) <- lookAhead $ string prefix *> listStyle <* delim blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) @@ -222,10 +216,12 @@ parseList prefix marker delim = do style <- marker return (concat indent, style) -parseListItem :: Show a => String -> TWParser a -> TWParser B.Blocks +parseListItem :: (PandocMonad m, Show a) + => String -> TWParser m a -> TWParser m B.Blocks parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker -listItemLine :: Show a => String -> TWParser a -> TWParser B.Blocks +listItemLine :: (PandocMonad m, Show a) + => String -> TWParser m a -> TWParser m B.Blocks listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat where lineContent = do @@ -242,7 +238,7 @@ listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat lastNewline = try $ char '\n' <* eof newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList -table :: TWParser B.Blocks +table :: PandocMonad m => TWParser m B.Blocks table = try $ do tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip rows <- many1 tableParseRow @@ -254,7 +250,7 @@ table = try $ do columns rows = replicate (columCount rows) mempty columCount rows = length $ head rows -tableParseHeader :: TWParser ((Alignment, Double), B.Blocks) +tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks) tableParseHeader = try $ do char '|' leftSpaces <- many spaceChar >>= return . length @@ -270,27 +266,27 @@ tableParseHeader = try $ do | left > right = (AlignRight, 0) | otherwise = (AlignLeft, 0) -tableParseRow :: TWParser [B.Blocks] +tableParseRow :: PandocMonad m => TWParser m [B.Blocks] tableParseRow = many1Till tableParseColumn newline -tableParseColumn :: TWParser B.Blocks +tableParseColumn :: PandocMonad m => TWParser m B.Blocks tableParseColumn = char '|' *> skipSpaces *> tableColumnContent (skipSpaces >> char '|') <* skipSpaces <* optional tableEndOfRow -tableEndOfRow :: TWParser Char +tableEndOfRow :: PandocMonad m => TWParser m Char tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|' -tableColumnContent :: TWParser a -> TWParser B.Blocks +tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat where content = continuation <|> inline continuation = try $ char '\\' >> newline >> return mempty -blockQuote :: TWParser B.Blocks +blockQuote :: PandocMonad m => TWParser m B.Blocks blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat -noautolink :: TWParser B.Blocks +noautolink :: PandocMonad m => TWParser m B.Blocks noautolink = do (_, content) <- htmlElement "noautolink" st <- getState @@ -301,7 +297,7 @@ noautolink = do where parseContent = parseFromString $ many $ block -para :: TWParser B.Blocks +para :: PandocMonad m => TWParser m B.Blocks para = many1Till inline endOfParaElement >>= return . result . mconcat where endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement @@ -317,7 +313,7 @@ para = many1Till inline endOfParaElement >>= return . result . mconcat -- inline parsers -- -inline :: TWParser B.Inlines +inline :: PandocMonad m => TWParser m B.Inlines inline = choice [ whitespace , br , macro @@ -338,36 +334,39 @@ inline = choice [ whitespace , symbol ] <?> "inline" -whitespace :: TWParser B.Inlines +whitespace :: PandocMonad m => TWParser m B.Inlines whitespace = (lb <|> regsp) >>= return where lb = try $ skipMany spaceChar >> linebreak >> return B.space regsp = try $ skipMany1 spaceChar >> return B.space -br :: TWParser B.Inlines +br :: PandocMonad m => TWParser m B.Inlines br = try $ string "%BR%" >> return B.linebreak -linebreak :: TWParser B.Inlines +linebreak :: PandocMonad m => TWParser m B.Inlines linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) where lastNewline = eof >> return mempty innerNewline = return B.space -between :: (Monoid c) => TWParser a -> TWParser b -> (TWParser b -> TWParser c) -> TWParser c +between :: (Monoid c, PandocMonad m) + => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c) + -> TWParser m c between start end p = mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) -enclosed :: (Monoid b) => TWParser a -> (TWParser a -> TWParser b) -> TWParser b +enclosed :: (Monoid b, PandocMonad m) + => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b enclosed sep p = between sep (try $ sep <* endMarker) p where endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof endSpace = (spaceChar <|> newline) >> return B.space -macro :: TWParser B.Inlines +macro :: PandocMonad m => TWParser m B.Inlines macro = macroWithParameters <|> withoutParameters where withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan emptySpan name = buildSpan name [] mempty -macroWithParameters :: TWParser B.Inlines +macroWithParameters :: PandocMonad m => TWParser m B.Inlines macroWithParameters = try $ do char '%' name <- macroName @@ -382,13 +381,13 @@ buildSpan className kvs = B.spanWith attrs additionalClasses = maybe [] words $ lookup "class" kvs kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] -macroName :: TWParser String +macroName :: PandocMonad m => TWParser m String macroName = do first <- letter rest <- many $ alphaNum <|> char '_' return (first:rest) -attributes :: TWParser (String, [(String, String)]) +attributes :: PandocMonad m => TWParser m (String, [(String, String)]) attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>= return . foldr (either mkContent mkKvs) ([], []) where @@ -397,7 +396,7 @@ attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>= mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) mkKvs kv (cont, rest) = (cont, (kv : rest)) -attribute :: TWParser (Either String (String, String)) +attribute :: PandocMonad m => TWParser m (Either String (String, String)) attribute = withKey <|> withoutKey where withKey = try $ do @@ -411,49 +410,51 @@ attribute = withKey <|> withoutKey | allowSpaces == True = many1 $ noneOf "}" | otherwise = many1 $ noneOf " }" -nestedInlines :: Show a => TWParser a -> TWParser B.Inlines +nestedInlines :: (Show a, PandocMonad m) + => TWParser m a -> TWParser m B.Inlines nestedInlines end = innerSpace <|> nestedInline where innerSpace = try $ whitespace <* (notFollowedBy end) nestedInline = notFollowedBy whitespace >> nested inline -strong :: TWParser B.Inlines +strong :: PandocMonad m => TWParser m B.Inlines strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong -strongHtml :: TWParser B.Inlines +strongHtml :: PandocMonad m => TWParser m B.Inlines strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline) >>= return . B.strong . mconcat -strongAndEmph :: TWParser B.Inlines +strongAndEmph :: PandocMonad m => TWParser m B.Inlines strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong -emph :: TWParser B.Inlines +emph :: PandocMonad m => TWParser m B.Inlines emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph -emphHtml :: TWParser B.Inlines +emphHtml :: PandocMonad m => TWParser m B.Inlines emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) >>= return . B.emph . mconcat -nestedString :: Show a => TWParser a -> TWParser String +nestedString :: (Show a, PandocMonad m) + => TWParser m a -> TWParser m String nestedString end = innerSpace <|> (count 1 nonspaceChar) where innerSpace = try $ many1 spaceChar <* notFollowedBy end -boldCode :: TWParser B.Inlines +boldCode :: PandocMonad m => TWParser m B.Inlines boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities -htmlComment :: TWParser B.Inlines +htmlComment :: PandocMonad m => TWParser m B.Inlines htmlComment = htmlTag isCommentTag >> return mempty -code :: TWParser B.Inlines +code :: PandocMonad m => TWParser m B.Inlines code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities -codeHtml :: TWParser B.Inlines +codeHtml :: PandocMonad m => TWParser m B.Inlines codeHtml = do (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar return $ B.codeWith attrs $ fromEntities content -autoLink :: TWParser B.Inlines +autoLink :: PandocMonad m => TWParser m B.Inlines autoLink = try $ do state <- getState guard $ stateAllowLinks state @@ -467,36 +468,36 @@ autoLink = try $ do | c == '/' = True | otherwise = isAlphaNum c -str :: TWParser B.Inlines +str :: PandocMonad m => TWParser m B.Inlines str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str -nop :: TWParser B.Inlines +nop :: PandocMonad m => TWParser m B.Inlines nop = try $ (skip exclamation <|> skip nopTag) >> followContent where exclamation = char '!' nopTag = stringAnyCase "<nop>" followContent = many1 nonspaceChar >>= return . B.str . fromEntities -symbol :: TWParser B.Inlines +symbol :: PandocMonad m => TWParser m B.Inlines symbol = count 1 nonspaceChar >>= return . B.str -smart :: TWParser B.Inlines +smart :: PandocMonad m => TWParser m B.Inlines smart = do - getOption readerSmart >>= guard + guardEnabled Ext_smart doubleQuoted <|> singleQuoted <|> choice [ apostrophe , dash , ellipses ] -singleQuoted :: TWParser B.Inlines +singleQuoted :: PandocMonad m => TWParser m B.Inlines singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= (return . B.singleQuoted . B.trimInlines . mconcat) -doubleQuoted :: TWParser B.Inlines +doubleQuoted :: PandocMonad m => TWParser m B.Inlines doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) @@ -504,7 +505,7 @@ doubleQuoted = try $ do return (B.doubleQuoted $ B.trimInlines contents)) <|> (return $ (B.str "\8220") B.<> contents) -link :: TWParser B.Inlines +link :: PandocMonad m => TWParser m B.Inlines link = try $ do st <- getState guard $ stateAllowLinks st @@ -513,7 +514,7 @@ link = try $ do setState $ st{ stateAllowLinks = True } return $ B.link url title content -linkText :: TWParser (String, String, B.Inlines) +linkText :: PandocMonad m => TWParser m (String, String, B.Inlines) linkText = do string "[[" url <- many1Till anyChar (char ']') diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs deleted file mode 100644 index e5778b123..000000000 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ /dev/null @@ -1,48 +0,0 @@ -{- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.TeXMath - Copyright : Copyright (C) 2007-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of TeX math to a list of 'Pandoc' inline elements. --} -module Text.Pandoc.Readers.TeXMath ( texMathToInlines ) where - -import Text.Pandoc.Definition -import Text.TeXMath - --- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. --- Defaults to raw formula between @$@ or @$$@ characters if entire formula --- can't be converted. -texMathToInlines :: MathType - -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> [Inline] -texMathToInlines mt inp = - case writePandoc dt `fmap` readTeX inp of - Right (Just ils) -> ils - _ -> [Str (delim ++ inp ++ delim)] - where (dt, delim) = case mt of - DisplayMath -> (DisplayBlock, "$$") - InlineMath -> (DisplayInline, "$") - diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 8dbbf7be2..804ee39aa 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -64,31 +64,33 @@ import Text.HTML.TagSoup (fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match import Data.List ( intercalate, transpose, intersperse ) import Data.Char ( digitToInt, isUpper ) -import Control.Monad ( guard, liftM, when ) +import Control.Monad ( guard, liftM ) import Data.Monoid ((<>)) import Text.Printf -import Debug.Trace (trace) -import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad, report) +import Control.Monad.Except (throwError) -- | Parse a Textile text and return a Pandoc document. -readTextile :: ReaderOptions -- ^ Reader options +readTextile :: PandocMonad m + => ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readTextile opts s = - (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") + -> m Pandoc +readTextile opts s = do + parsed <- readWithM parseTextile def{ stateOptions = opts } (s ++ "\n\n") + case parsed of + Right result -> return result + Left e -> throwError e -- | Generate a Pandoc ADT from a textile document -parseTextile :: Parser [Char] ParserState Pandoc +parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc parseTextile = do -- textile allows raw HTML and does smart punctuation by default, -- but we do not enable smart punctuation unless it is explicitly -- asked for, for better conversion to other light markup formats oldOpts <- stateOptions `fmap` getState updateState $ \state -> state{ stateOptions = - oldOpts{ readerParseRaw = True - , readerOldDashes = True - } } + oldOpts{ readerParseRaw = True } } many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes @@ -103,10 +105,10 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc nullMeta (B.toList blocks) -- FIXME -noteMarker :: Parser [Char] ParserState [Char] +noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') -noteBlock :: Parser [Char] ParserState [Char] +noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char] noteBlock = try $ do startPos <- getPosition ref <- noteMarker @@ -121,11 +123,11 @@ noteBlock = try $ do return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -- | Parse document blocks -parseBlocks :: Parser [Char] ParserState Blocks +parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: [Parser [Char] ParserState Blocks] +blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -140,26 +142,24 @@ blockParsers = [ codeBlock ] -- | Any block in the order of definition of blockParsers -block :: Parser [Char] ParserState Blocks +block :: PandocMonad m => ParserT [Char] ParserState m Blocks block = do res <- choice blockParsers <?> "block" pos <- getPosition - tr <- getOption readerTrace - when tr $ - trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList res)) (return ()) + report DEBUG $ printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res) return res -commentBlock :: Parser [Char] ParserState Blocks +commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines return mempty -codeBlock :: Parser [Char] ParserState Blocks +codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: Parser [Char] ParserState Blocks +codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks codeBlockBc = try $ do string "bc." extended <- option False (True <$ char '.') @@ -179,7 +179,7 @@ trimTrailingNewlines :: String -> String trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: Parser [Char] ParserState Blocks +codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks codeBlockPre = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) result' <- manyTill anyChar (htmlTag (tagClose (=="pre"))) @@ -198,7 +198,7 @@ codeBlockPre = try $ do return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: Parser [Char] ParserState Blocks +header :: PandocMonad m => ParserT [Char] ParserState m Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -210,14 +210,14 @@ header = try $ do return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: Parser [Char] ParserState Blocks +blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace B.blockQuote <$> para -- Horizontal rule -hrule :: Parser [Char] st Blocks +hrule :: PandocMonad m => ParserT [Char] st m Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -232,39 +232,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: Parser [Char] ParserState Blocks +anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: Int -> Parser [Char] ParserState Blocks +anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks +bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks +bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks +orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks +orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks +genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace p <- mconcat <$> many listInline @@ -273,25 +273,25 @@ genericListItemAtDepth c depth = try $ do return $ (B.plain p) <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: Parser [Char] ParserState Blocks +definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. -listStart :: Parser [Char] ParserState () +listStart :: PandocMonad m => ParserT [Char] ParserState m () listStart = genericListStart '*' <|> () <$ genericListStart '#' <|> () <$ definitionListStart -genericListStart :: Char -> Parser [Char] st () +genericListStart :: PandocMonad m => Char -> ParserT [Char] st m () genericListStart c = () <$ try (many1 (char c) >> whitespace) -basicDLStart :: Parser [Char] ParserState () +basicDLStart :: PandocMonad m => ParserT [Char] ParserState m () basicDLStart = do char '-' whitespace notFollowedBy newline -definitionListStart :: Parser [Char] ParserState Inlines +definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines definitionListStart = try $ do basicDLStart trimInlines . mconcat <$> @@ -300,7 +300,7 @@ definitionListStart = try $ do <|> try (lookAhead (() <$ string ":=")) ) -listInline :: Parser [Char] ParserState Inlines +listInline :: PandocMonad m => ParserT [Char] ParserState m Inlines listInline = try (notFollowedBy newline >> inline) <|> try (endline <* notFollowedBy listStart) @@ -308,15 +308,15 @@ listInline = try (notFollowedBy newline >> inline) -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks]) definitionListItem = try $ do term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) return (term, def') - where inlineDef :: Parser [Char] ParserState [Blocks] + where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] inlineDef = liftM (\d -> [B.plain d]) $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline - multilineDef :: Parser [Char] ParserState [Blocks] + multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] multilineDef = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) @@ -327,7 +327,7 @@ definitionListItem = try $ do -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: Parser [Char] ParserState Blocks +rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks rawHtmlBlock = try $ do skipMany spaceChar (_,b) <- htmlTag isBlockTag @@ -335,14 +335,14 @@ rawHtmlBlock = try $ do return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: Parser [Char] ParserState Blocks +rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: Parser [Char] ParserState Blocks +para :: PandocMonad m => ParserT [Char] ParserState m Blocks para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -353,7 +353,7 @@ toAlignment '>' = AlignRight toAlignment '=' = AlignCenter toAlignment _ = AlignDefault -cellAttributes :: Parser [Char] ParserState (Bool, Alignment) +cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment) cellAttributes = try $ do isHeader <- option False (True <$ char '_') -- we just ignore colspan and rowspan markers: @@ -366,7 +366,7 @@ cellAttributes = try $ do return (isHeader, alignment) -- | A table cell spans until a pipe | -tableCell :: Parser [Char] ParserState ((Bool, Alignment), Blocks) +tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks) tableCell = try $ do char '|' (isHeader, alignment) <- option (False, AlignDefault) $ cellAttributes @@ -377,7 +377,7 @@ tableCell = try $ do return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells -tableRow :: Parser [Char] ParserState [((Bool, Alignment), Blocks)] +tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)] tableRow = try $ do -- skip optional row attributes optional $ try $ do @@ -387,7 +387,7 @@ tableRow = try $ do many1 tableCell <* char '|' <* blankline -- | A table with an optional header. -table :: Parser [Char] ParserState Blocks +table :: PandocMonad m => ParserT [Char] ParserState m Blocks table = try $ do -- ignore table attributes caption <- option mempty $ try $ do @@ -411,7 +411,7 @@ table = try $ do (map (map snd) rows) -- | Ignore markers for cols, thead, tfoot. -ignorableRow :: Parser [Char] ParserState () +ignorableRow :: PandocMonad m => ParserT [Char] ParserState m () ignorableRow = try $ do char '|' oneOf ":^-~" @@ -420,7 +420,7 @@ ignorableRow = try $ do _ <- anyLine return () -explicitBlockStart :: String -> Parser [Char] ParserState () +explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m () explicitBlockStart name = try $ do string name attributes @@ -430,9 +430,10 @@ explicitBlockStart name = try $ do -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. -maybeExplicitBlock :: String -- ^ block tag name - -> Parser [Char] ParserState Blocks -- ^ implicit block - -> Parser [Char] ParserState Blocks +maybeExplicitBlock :: PandocMonad m + => String -- ^ block tag name + -> ParserT [Char] ParserState m Blocks -- ^ implicit block + -> ParserT [Char] ParserState m Blocks maybeExplicitBlock name blk = try $ do optional $ explicitBlockStart name blk @@ -445,12 +446,12 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: Parser [Char] ParserState Inlines +inline :: PandocMonad m => ParserT [Char] ParserState m Inlines inline = do choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: [Parser [Char] ParserState Inlines] +inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines] inlineParsers = [ str , whitespace , endline @@ -470,7 +471,7 @@ inlineParsers = [ str ] -- | Inline markups -inlineMarkup :: Parser [Char] ParserState Inlines +inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (string "**") B.strong , simpleInline (string "__") B.emph @@ -484,29 +485,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) ] -- | Trademark, registered, copyright -mark :: Parser [Char] st Inlines +mark :: PandocMonad m => ParserT [Char] st m Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: Parser [Char] st Inlines +reg :: PandocMonad m => ParserT [Char] st m Inlines reg = do oneOf "Rr" char ')' return $ B.str "\174" -tm :: Parser [Char] st Inlines +tm :: PandocMonad m => ParserT [Char] st m Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' return $ B.str "\8482" -copy :: Parser [Char] st Inlines +copy :: PandocMonad m => ParserT [Char] st m Inlines copy = do oneOf "Cc" char ')' return $ B.str "\169" -note :: Parser [Char] ParserState Inlines +note :: PandocMonad m => ParserT [Char] ParserState m Inlines note = try $ do ref <- (char '[' *> many1 digit <* char ']') notes <- stateNotes <$> getState @@ -530,13 +531,13 @@ wordBoundaries :: [Char] wordBoundaries = markupChars ++ stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: Parser [Char] ParserState String +hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String hyphenedWords = do x <- wordChunk xs <- many (try $ char '-' >> wordChunk) return $ intercalate "-" (x:xs) -wordChunk :: Parser [Char] ParserState String +wordChunk :: PandocMonad m => ParserT [Char] ParserState m String wordChunk = try $ do hd <- noneOf wordBoundaries tl <- many ( (noneOf wordBoundaries) <|> @@ -545,7 +546,7 @@ wordChunk = try $ do return $ hd:tl -- | Any string -str :: Parser [Char] ParserState Inlines +str :: PandocMonad m => ParserT [Char] ParserState m Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediatly @@ -558,11 +559,11 @@ str = do return $ B.str fullStr -- | Some number of space chars -whitespace :: Parser [Char] st Inlines +whitespace :: PandocMonad m => ParserT [Char] st m Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: Parser [Char] ParserState Inlines +endline :: PandocMonad m => ParserT [Char] ParserState m Inlines endline = try $ do newline notFollowedBy blankline @@ -570,18 +571,18 @@ endline = try $ do notFollowedBy rawHtmlBlock return B.linebreak -rawHtmlInline :: Parser [Char] ParserState Inlines +rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: Parser [Char] ParserState Inlines +rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex B.singleton <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: Parser [Char] ParserState Inlines +link :: PandocMonad m => ParserT [Char] ParserState m Inlines link = try $ do bracketed <- (True <$ char '[') <|> return False char '"' *> notFollowedBy (oneOf " \t\n\r") @@ -600,7 +601,7 @@ link = try $ do else B.spanWith attr $ B.link url "" name' -- | image embedding -image :: Parser [Char] ParserState Inlines +image :: PandocMonad m => ParserT [Char] ParserState m Inlines image = try $ do char '!' >> notFollowedBy space (ident, cls, kvs) <- attributes @@ -612,50 +613,50 @@ image = try $ do char '!' return $ B.imageWith attr src alt (B.str alt) -escapedInline :: Parser [Char] ParserState Inlines +escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: Parser [Char] ParserState Inlines +escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines escapedEqs = B.str <$> (try $ string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: Parser [Char] ParserState Inlines +escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines escapedTag = B.str <$> (try $ string "<notextile>" *> manyTill anyChar' (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: Parser [Char] ParserState Inlines +symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines symbol = B.str . singleton <$> (notFollowedBy newline *> notFollowedBy rawHtmlBlock *> oneOf wordBoundaries) -- | Inline code -code :: Parser [Char] ParserState Inlines +code :: PandocMonad m => ParserT [Char] ParserState m Inlines code = code1 <|> code2 -- any character except a newline before a blank line -anyChar' :: Parser [Char] ParserState Char +anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char anyChar' = satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline) -code1 :: Parser [Char] ParserState Inlines +code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines code1 = B.code <$> surrounded (char '@') anyChar' -code2 :: Parser [Char] ParserState Inlines +code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines code2 = do htmlTag (tagOpen (=="tt") null) B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: Parser [Char] ParserState Attr +attributes :: PandocMonad m => ParserT [Char] ParserState m Attr attributes = (foldl (flip ($)) ("",[],[])) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) -specialAttribute :: Parser [Char] ParserState (Attr -> Attr) +specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) specialAttribute = do alignStr <- ("center" <$ char '=') <|> ("justify" <$ try (string "<>")) <|> @@ -664,11 +665,11 @@ specialAttribute = do notFollowedBy spaceChar return $ addStyle ("text-align:" ++ alignStr) -attribute :: Parser [Char] ParserState (Attr -> Attr) +attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) attribute = try $ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar -classIdAttr :: Parser [Char] ParserState (Attr -> Attr) +classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' ws <- words `fmap` manyTill anyChar' (char ')') @@ -679,7 +680,7 @@ classIdAttr = try $ do -- (class class #id) classes' -> return $ \(_,_,keyvals) -> ("",classes',keyvals) -styleAttr :: Parser [Char] ParserState (Attr -> Attr) +styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' return $ addStyle style @@ -690,21 +691,23 @@ addStyle style (id',classes,keyvals) = where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals] -langAttr :: Parser [Char] ParserState (Attr -> Attr) +langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) langAttr = do lang <- try $ enclosed (char '[') (char ']') alphaNum return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) -- | Parses material surrounded by a parser. -surrounded :: Parser [Char] st t -- ^ surrounding parser - -> Parser [Char] st a -- ^ content parser (to be used repeatedly) - -> Parser [Char] st [a] +surrounded :: PandocMonad m + => ParserT [Char] st m t -- ^ surrounding parser + -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) + -> ParserT [Char] st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) -simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser - -> (Inlines -> Inlines) -- ^ Inline constructor - -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) +simpleInline :: PandocMonad m + => ParserT [Char] ParserState m t -- ^ surrounding parser + -> (Inlines -> Inlines) -- ^ Inline constructor + -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly) simpleInline border construct = try $ do notAfterString border *> notFollowedBy (oneOf " \t\n\r") @@ -718,7 +721,7 @@ simpleInline border construct = try $ do then body else B.spanWith attr body -groupedInlineMarkup :: Parser [Char] ParserState Inlines +groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 0aafc83c7..9e2b6963d 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -29,7 +29,7 @@ Conversion of txt2tags formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags , getT2TMeta , T2TMeta (..) - , readTxt2TagsNoMacros) + ) where import qualified Text.Pandoc.Builder as B @@ -37,7 +37,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) import Data.Monoid ((<>)) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL) +import Text.Pandoc.Shared (escapeURI,compactify, compactifyDL) import Text.Pandoc.Parsing hiding (space, spaces, uri, macro) import Data.Char (toLower) import Data.List (transpose, intersperse, intercalate) @@ -46,13 +46,12 @@ import Data.Maybe (fromMaybe) import Control.Monad (void, guard, when) import Data.Default import Control.Monad.Reader (Reader, runReader, asks) -import Text.Pandoc.Error -import Data.Time.LocalTime (getZonedTime) -import System.Directory(getModificationTime) import Data.Time.Format (formatTime) import Text.Pandoc.Compat.Time (defaultTimeLocale) -import System.IO.Error (catchIOError) +import Control.Monad.Except (throwError, catchError) +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P type T2T = ParserT String ParserState (Reader T2TMeta) @@ -69,26 +68,42 @@ instance Default T2TMeta where def = T2TMeta "" "" "" "" -- | Get the meta information required by Txt2Tags macros -getT2TMeta :: [FilePath] -> FilePath -> IO T2TMeta -getT2TMeta inps out = do - curDate <- formatTime defaultTimeLocale "%F" <$> getZonedTime +getT2TMeta :: PandocMonad m => m T2TMeta +getT2TMeta = do + mbInps <- P.getInputFiles + let inps = case mbInps of + Just x -> x + Nothing -> [] + mbOutp <- P.getOutputFile + let outp = case mbOutp of + Just x -> x + Nothing -> "" + curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime let getModTime = fmap (formatTime defaultTimeLocale "%T") . - getModificationTime + P.getModificationTime curMtime <- case inps of - [] -> formatTime defaultTimeLocale "%T" <$> getZonedTime - _ -> catchIOError + [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime + _ -> catchError (maximum <$> mapM getModTime inps) (const (return "")) - return $ T2TMeta curDate curMtime (intercalate ", " inps) out + return $ T2TMeta curDate curMtime (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc -readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") +readTxt2Tags :: PandocMonad m + => ReaderOptions + -> String + -> m Pandoc +readTxt2Tags opts s = do + meta <- getT2TMeta + let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") + case parsed of + Right result -> return $ result + Left e -> throwError e -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document -readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc -readTxt2TagsNoMacros = readTxt2Tags def +-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc +-- readTxt2TagsNoMacros = readTxt2Tags parseT2T :: T2T Pandoc parseT2T = do @@ -210,16 +225,16 @@ list :: T2T Blocks list = choice [bulletList, orderedList, definitionList] bulletList :: T2T Blocks -bulletList = B.bulletList . compactify' +bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart parseBlocks) orderedList :: T2T Blocks -orderedList = B.orderedList . compactify' +orderedList = B.orderedList . compactify <$> many1 (listItem orderedListStart parseBlocks) definitionList :: T2T Blocks definitionList = try $ do - B.definitionList . compactify'DL <$> + B.definitionList . compactifyDL <$> many1 (listItem definitionListStart definitionListEnd) definitionListEnd :: T2T (Inlines, [Blocks]) @@ -432,9 +447,13 @@ inlineMarkup p f c special = try $ do lastChar <- anyChar end <- many1 (char c) let parser inp = parseFromString (mconcat <$> many p) inp - let start' = special (drop 2 start) + let start' = case drop 2 start of + "" -> mempty + xs -> special xs body' <- parser (middle ++ [lastChar]) - let end' = special (drop 2 end) + let end' = case drop 2 end of + "" -> mempty + xs -> special xs return $ f (start' <> body' <> end') Nothing -> do -- Either bad or case such as ***** guard (l >= 5) diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index d08d636df..85b298a85 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -40,7 +40,8 @@ import System.FilePath (takeExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', err, fetchItem', warn, trim) +import Control.Monad.Trans (MonadIO(..)) +import Text.Pandoc.Shared (renderTags', err, warn, trim) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.MIME (MimeType) import Text.Pandoc.UTF8 (toString) @@ -50,6 +51,7 @@ import Control.Applicative ((<|>)) import Text.Parsec (runParserT, ParsecT) import qualified Text.Parsec as P import Control.Monad.Trans (lift) +import Text.Pandoc.Class (fetchItem, runIO, setMediaBag) isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c @@ -143,7 +145,8 @@ getDataURI :: MediaBag -> Maybe String -> MimeType -> String getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri getDataURI media sourceURL mimetype src = do let ext = map toLower $ takeExtension src - fetchResult <- fetchItem' media sourceURL src + fetchResult <- runIO $ do setMediaBag media + fetchItem sourceURL src (raw, respMime) <- case fetchResult of Left msg -> err 67 $ "Could not fetch " ++ src ++ "\n" ++ show msg @@ -171,8 +174,8 @@ getDataURI media sourceURL mimetype src = do -- | Convert HTML into self-contained HTML, incorporating images, -- scripts, and CSS using data: URIs. -makeSelfContained :: WriterOptions -> String -> IO String -makeSelfContained opts inp = do +makeSelfContained :: MonadIO m => WriterOptions -> MediaBag -> String -> m String +makeSelfContained opts mediabag inp = liftIO $ do let tags = parseTags inp - out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags + out' <- mapM (convertTag mediabag (writerSourceURL opts)) tags return $ renderTags' out' diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index bd2da945e..22847931f 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -55,15 +55,12 @@ module Text.Pandoc.Shared ( orderedListMarkers, normalizeSpaces, extractSpaces, - normalize, - normalizeInlines, - normalizeBlocks, removeFormatting, + deNote, stringify, capitalize, compactify, - compactify', - compactify'DL, + compactifyDL, linesToPara, Element (..), hierarchicalize, @@ -82,8 +79,6 @@ module Text.Pandoc.Shared ( getDefaultReferenceODT, readDataFile, readDataFileUTF8, - fetchItem, - fetchItem', openURL, collapseFilePath, filteredFilesFromArchive, @@ -91,7 +86,6 @@ module Text.Pandoc.Shared ( err, warn, mapLeft, - hush, -- * for squashing blocks blocksToInlines, -- * Safe read @@ -104,11 +98,9 @@ module Text.Pandoc.Shared ( import Text.Pandoc.Definition import Text.Pandoc.Walk -import Text.Pandoc.MediaBag (MediaBag, lookupMedia) import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 -import System.Environment (getProgName) import System.Exit (exitWith, ExitCode(..)) import Data.Char ( toLower, isLower, isUpper, isAlpha, isLetter, isDigit, isSpace ) @@ -116,15 +108,13 @@ import Data.List ( find, stripPrefix, intercalate ) import Data.Maybe (mapMaybe) import Data.Version ( showVersion ) import qualified Data.Map as M -import Network.URI ( escapeURIString, nonStrictRelativeTo, - unEscapeString, parseURIReference, isAllowedInURI, - parseURI, URI(..) ) +import Network.URI ( escapeURIString, unEscapeString ) import qualified Data.Set as Set import System.Directory import System.FilePath (splitDirectories, isPathSeparator) import qualified System.FilePath.Posix as Posix -import Text.Pandoc.MIME (MimeType, getMimeType) -import System.FilePath ( (</>), takeExtension, dropExtension) +import Text.Pandoc.MIME (MimeType) +import System.FilePath ( (</>) ) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import Control.Monad.Trans (MonadIO (..)) @@ -399,153 +389,6 @@ extractSpaces f is = _ -> mempty in (left <> f (B.trimInlines . B.Many $ contents) <> right) --- | Normalize @Pandoc@ document, consolidating doubled 'Space's, --- combining adjacent 'Str's and 'Emph's, remove 'Null's and --- empty elements, etc. -normalize :: Pandoc -> Pandoc -normalize (Pandoc (Meta meta) blocks) = - Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks) - where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs - go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs - go (MetaList ms) = MetaList $ map go ms - go (MetaMap m) = MetaMap $ M.map go m - go x = x - -normalizeBlocks :: [Block] -> [Block] -normalizeBlocks (Null : xs) = normalizeBlocks xs -normalizeBlocks (Div attr bs : xs) = - Div attr (normalizeBlocks bs) : normalizeBlocks xs -normalizeBlocks (BlockQuote bs : xs) = - case normalizeBlocks bs of - [] -> normalizeBlocks xs - bs' -> BlockQuote bs' : normalizeBlocks xs -normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs -normalizeBlocks (BulletList items : xs) = - BulletList (map normalizeBlocks items) : normalizeBlocks xs -normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs -normalizeBlocks (OrderedList attr items : xs) = - OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs -normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs -normalizeBlocks (DefinitionList items : xs) = - DefinitionList (map go items) : normalizeBlocks xs - where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs) -normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs -normalizeBlocks (RawBlock f x : xs) = - case normalizeBlocks xs of - (RawBlock f' x' : rest) | f' == f -> - RawBlock f (x ++ ('\n':x')) : rest - rest -> RawBlock f x : rest -normalizeBlocks (Para ils : xs) = - case normalizeInlines ils of - [] -> normalizeBlocks xs - ils' -> Para ils' : normalizeBlocks xs -normalizeBlocks (Plain ils : xs) = - case normalizeInlines ils of - [] -> normalizeBlocks xs - ils' -> Plain ils' : normalizeBlocks xs -normalizeBlocks (Header lev attr ils : xs) = - Header lev attr (normalizeInlines ils) : normalizeBlocks xs -normalizeBlocks (Table capt aligns widths hdrs rows : xs) = - Table (normalizeInlines capt) aligns widths - (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows) - : normalizeBlocks xs -normalizeBlocks (x:xs) = x : normalizeBlocks xs -normalizeBlocks [] = [] - -normalizeInlines :: [Inline] -> [Inline] -normalizeInlines (Str x : ys) = - case concat (x : map fromStr strs) of - "" -> rest - n -> Str n : rest - where - (strs, rest) = span isStr $ normalizeInlines ys - isStr (Str _) = True - isStr _ = False - fromStr (Str z) = z - fromStr _ = error "normalizeInlines - fromStr - not a Str" -normalizeInlines (Space : SoftBreak : ys) = - SoftBreak : normalizeInlines ys -normalizeInlines (Space : ys) = - if null rest - then [] - else Space : rest - where isSp Space = True - isSp _ = False - rest = dropWhile isSp $ normalizeInlines ys -normalizeInlines (Emph xs : zs) = - case normalizeInlines zs of - (Emph ys : rest) -> normalizeInlines $ - Emph (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Emph xs' : rest -normalizeInlines (Strong xs : zs) = - case normalizeInlines zs of - (Strong ys : rest) -> normalizeInlines $ - Strong (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Strong xs' : rest -normalizeInlines (Subscript xs : zs) = - case normalizeInlines zs of - (Subscript ys : rest) -> normalizeInlines $ - Subscript (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Subscript xs' : rest -normalizeInlines (Superscript xs : zs) = - case normalizeInlines zs of - (Superscript ys : rest) -> normalizeInlines $ - Superscript (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Superscript xs' : rest -normalizeInlines (SmallCaps xs : zs) = - case normalizeInlines zs of - (SmallCaps ys : rest) -> normalizeInlines $ - SmallCaps (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> SmallCaps xs' : rest -normalizeInlines (Strikeout xs : zs) = - case normalizeInlines zs of - (Strikeout ys : rest) -> normalizeInlines $ - Strikeout (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Strikeout xs' : rest -normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys -normalizeInlines (RawInline f xs : zs) = - case normalizeInlines zs of - (RawInline f' ys : rest) | f == f' -> normalizeInlines $ - RawInline f (xs ++ ys) : rest - rest -> RawInline f xs : rest -normalizeInlines (Code _ "" : ys) = normalizeInlines ys -normalizeInlines (Code attr xs : zs) = - case normalizeInlines zs of - (Code attr' ys : rest) | attr == attr' -> normalizeInlines $ - Code attr (xs ++ ys) : rest - rest -> Code attr xs : rest --- allow empty spans, they may carry identifiers etc. --- normalizeInlines (Span _ [] : ys) = normalizeInlines ys -normalizeInlines (Span attr xs : zs) = - case normalizeInlines zs of - (Span attr' ys : rest) | attr == attr' -> normalizeInlines $ - Span attr (normalizeInlines $ xs ++ ys) : rest - rest -> Span attr (normalizeInlines xs) : rest -normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) : - normalizeInlines ys -normalizeInlines (Quoted qt ils : ys) = - Quoted qt (normalizeInlines ils) : normalizeInlines ys -normalizeInlines (Link attr ils t : ys) = - Link attr (normalizeInlines ils) t : normalizeInlines ys -normalizeInlines (Image attr ils t : ys) = - Image attr (normalizeInlines ils) t : normalizeInlines ys -normalizeInlines (Cite cs ils : ys) = - Cite cs (normalizeInlines ils) : normalizeInlines ys -normalizeInlines (x : xs) = x : normalizeInlines xs -normalizeInlines [] = [] - -- | Extract inlines, removing formatting. removeFormatting :: Walkable Inline a => a -> [Inline] removeFormatting = query go . walk deNote @@ -557,8 +400,10 @@ removeFormatting = query go . walk deNote go (Math _ x) = [Str x] go LineBreak = [Space] go _ = [] - deNote (Note _) = Str "" - deNote x = x + +deNote :: Inline -> Inline +deNote (Note _) = Str "" +deNote x = x -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link @@ -574,8 +419,6 @@ stringify = query go . walk deNote go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 go LineBreak = " " go _ = "" - deNote (Note _) = Str "" - deNote x = x -- | Bring all regular text in a pandoc structure to uppercase. -- @@ -589,28 +432,12 @@ capitalize = walk go go x = x -- | Change final list item from @Para@ to @Plain@ if the list contains --- no other @Para@ blocks. -compactify :: [[Block]] -- ^ List of list items (each a list of blocks) - -> [[Block]] -compactify [] = [] -compactify items = - case (init items, last items) of - (_,[]) -> items - (others, final) -> - case last final of - Para a -> case (filter isPara $ concat items) of - -- if this is only Para, change to Plain - [_] -> others ++ [init final ++ [Plain a]] - _ -> items - _ -> items - --- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather -- than @[Block]@. -compactify' :: [Blocks] -- ^ List of list items (each a list of blocks) +compactify :: [Blocks] -- ^ List of list items (each a list of blocks) -> [Blocks] -compactify' [] = [] -compactify' items = +compactify [] = [] +compactify items = let (others, final) = (init items, last items) in case reverse (B.toList final) of (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of @@ -619,9 +446,9 @@ compactify' items = _ -> items _ -> items --- | Like @compactify'@, but acts on items of definition lists. -compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] -compactify'DL items = +-- | Like @compactify@, but acts on items of definition lists. +compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] +compactifyDL items = let defs = concatMap snd items in case reverse (concatMap B.toList defs) of (Para x:xs) @@ -904,64 +731,6 @@ readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String readDataFileUTF8 userDir fname = UTF8.toString `fmap` readDataFile userDir fname --- | Specialized version of parseURIReference that disallows --- single-letter schemes. Reason: these are usually windows absolute --- paths. -parseURIReference' :: String -> Maybe URI -parseURIReference' s = - case parseURIReference s of - Just u - | length (uriScheme u) > 2 -> Just u - | null (uriScheme u) -> Just u -- protocol-relative - _ -> Nothing - --- | Fetch an image or other item from the local filesystem or the net. --- Returns raw content and maybe mime type. -fetchItem :: Maybe String -> String - -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) -fetchItem sourceURL s = - case (sourceURL >>= parseURIReference' . ensureEscaped, ensureEscaped s) of - (Just u, s') -> -- try fetching from relative path at source - case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u - Nothing -> openURL s' -- will throw error - (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI - case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon - Nothing -> openURL s' -- will throw error - (Nothing, s') -> - case parseURI s' of -- requires absolute URI - -- We don't want to treat C:/ as a scheme: - Just u' | length (uriScheme u') > 2 -> openURL (show u') - Just u' | uriScheme u' == "file:" -> - E.try $ readLocalFile $ dropWhile (=='/') (uriPath u') - _ -> E.try $ readLocalFile fp -- get from local file system - where readLocalFile f = do - cont <- BS.readFile f - return (cont, mime) - httpcolon = URI{ uriScheme = "http:", - uriAuthority = Nothing, - uriPath = "", - uriQuery = "", - uriFragment = "" } - dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#') - fp = unEscapeString $ dropFragmentAndQuery s - mime = case takeExtension fp of - ".gz" -> getMimeType $ dropExtension fp - ".svgz" -> getMimeType $ dropExtension fp ++ ".svg" - x -> getMimeType x - ensureEscaped = escapeURIString isAllowedInURI . map convertSlash - convertSlash '\\' = '/' - convertSlash x = x - --- | Like 'fetchItem', but also looks for items in a 'MediaBag'. -fetchItem' :: MediaBag -> Maybe String -> String - -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) -fetchItem' media sourceURL s = do - case lookupMedia s media of - Nothing -> fetchItem sourceURL s - Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime) - -- | Read from a URL and return raw data and maybe mime type. openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) openURL u @@ -1000,26 +769,20 @@ openURL u -- Error reporting -- -err :: Int -> String -> IO a -err exitCode msg = do - name <- getProgName - UTF8.hPutStrLn stderr $ name ++ ": " ++ msg +err :: MonadIO m => Int -> String -> m a +err exitCode msg = liftIO $ do + UTF8.hPutStrLn stderr msg exitWith $ ExitFailure exitCode return undefined warn :: MonadIO m => String -> m () warn msg = liftIO $ do - name <- getProgName - UTF8.hPutStrLn stderr $ "[" ++ name ++ " warning] " ++ msg + UTF8.hPutStrLn stderr $ "[warning] " ++ msg mapLeft :: (a -> b) -> Either a c -> Either b c mapLeft f (Left x) = Left (f x) mapLeft _ (Right x) = Right x -hush :: Either a b -> Maybe b -hush (Left _) = Nothing -hush (Right x) = Just x - -- | Remove intermediate "." and ".." directories from a path. -- -- > collapseFilePath "./foo" == "foo" diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index d15d27438..ddb073409 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -55,11 +55,14 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first getDefaultTemplate user writer = do let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions case format of - "native" -> return $ Right "" - "json" -> return $ Right "" - "docx" -> return $ Right "" - "fb2" -> return $ Right "" - "odt" -> getDefaultTemplate user "opendocument" + "native" -> return $ Right "" + "json" -> return $ Right "" + "docx" -> return $ Right "" + "fb2" -> return $ Right "" + "odt" -> getDefaultTemplate user "opendocument" + "html" -> getDefaultTemplate user "html5" + "docbook" -> getDefaultTemplate user "docbook5" + "epub" -> getDefaultTemplate user "epub2" "markdown_strict" -> getDefaultTemplate user "markdown" "multimarkdown" -> getDefaultTemplate user "markdown" "markdown_github" -> getDefaultTemplate user "markdown" diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs index 5d05fa303..8de102742 100644 --- a/src/Text/Pandoc/UUID.hs +++ b/src/Text/Pandoc/UUID.hs @@ -29,13 +29,12 @@ UUID generation using Version 4 (random method) described in RFC4122. See http://tools.ietf.org/html/rfc4122 -} -module Text.Pandoc.UUID ( UUID, getRandomUUID ) where +module Text.Pandoc.UUID ( UUID(..), getRandomUUID, getUUID ) where import Text.Printf ( printf ) -import System.Random ( randomIO ) +import System.Random ( RandomGen, randoms, getStdGen ) import Data.Word import Data.Bits ( setBit, clearBit ) -import Control.Monad ( liftM ) data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 @@ -64,14 +63,16 @@ instance Show UUID where printf "%02x" o ++ printf "%02x" p -getRandomUUID :: IO UUID -getRandomUUID = do - let getRN :: a -> IO Word8 - getRN _ = liftM fromIntegral (randomIO :: IO Int) - [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] <- mapM getRN ([1..16] :: [Int]) +getUUID :: RandomGen g => g -> UUID +getUUID gen = + let [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] = take 16 $ randoms gen :: [Word8] -- set variant - let i' = i `setBit` 7 `clearBit` 6 + i' = i `setBit` 7 `clearBit` 6 -- set version (0100 for random) - let g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 - return $ UUID a b c d e f g' h i' j k l m n o p + g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 + in + UUID a b c d e f g' h i' j k l m n o p + +getRandomUUID :: IO UUID +getRandomUUID = getUUID <$> getStdGen diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e9d3dccf1..356b29504 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -52,6 +52,7 @@ import qualified Data.Map as M import Data.Aeson (Value(String), fromJSON, toJSON, Result(..)) import qualified Data.Text as T import Data.Char (isSpace, isPunctuation) +import Text.Pandoc.Class (PandocMonad) data WriterState = WriterState { defListMarker :: String , orderedListLevel :: Int @@ -60,8 +61,8 @@ data WriterState = WriterState { defListMarker :: String } -- | Convert Pandoc to AsciiDoc. -writeAsciiDoc :: WriterOptions -> Pandoc -> String -writeAsciiDoc opts document = +writeAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeAsciiDoc opts document = return $ evalState (pandocToAsciiDoc opts document) WriterState{ defListMarker = "::" , orderedListLevel = 1 @@ -411,7 +412,7 @@ inlineToAsciiDoc _ (Math DisplayMath str) = inlineToAsciiDoc _ (RawInline f s) | f == "asciidoc" = return $ text s | otherwise = return empty -inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr +inlineToAsciiDoc _ LineBreak = return $ " +" <> cr inlineToAsciiDoc _ Space = return space inlineToAsciiDoc opts SoftBreak = case writerWrapText opts of diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 88a92eb47..b83f6785d 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -31,7 +31,7 @@ CommonMark: <http://commonmark.org> -} module Text.Pandoc.Writers.CommonMark (writeCommonMark) where -import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Writers.HTML (writeHtml5String) import Text.Pandoc.Definition import Text.Pandoc.Shared (isTightList, linesToPara) import Text.Pandoc.Templates (renderTemplate') @@ -39,26 +39,27 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import CMark import qualified Data.Text as T -import Control.Monad.Identity (runIdentity, Identity) import Control.Monad.State (runState, State, modify, get) import Text.Pandoc.Walk (walkM) +import Text.Pandoc.Class (PandocMonad) +import Data.Foldable (foldrM) -- | Convert Pandoc to CommonMark. -writeCommonMark :: WriterOptions -> Pandoc -> String -writeCommonMark opts (Pandoc meta blocks) = rendered - where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes') - (blocks', notes) = runState (walkM processNotes blocks) [] - notes' = if null notes - then [] - else [OrderedList (1, Decimal, Period) $ reverse notes] - metadata = runIdentity $ metaToJSON opts - (blocksToCommonMark opts) - (inlinesToCommonMark opts) - meta - context = defField "body" main $ metadata - rendered = case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context +writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeCommonMark opts (Pandoc meta blocks) = do + let (blocks', notes) = runState (walkM processNotes blocks) [] + notes' = if null notes + then [] + else [OrderedList (1, Decimal, Period) $ reverse notes] + main <- blocksToCommonMark opts (blocks' ++ notes') + metadata <- metaToJSON opts + (blocksToCommonMark opts) + (inlinesToCommonMark opts) + meta + let context = defField "body" main $ metadata + return $ case writerTemplate opts of + Nothing -> main + Just tpl -> renderTemplate' tpl context processNotes :: Inline -> State [[Block]] Inline processNotes (Note bs) = do @@ -70,16 +71,19 @@ processNotes x = return x node :: NodeType -> [Node] -> Node node = Node Nothing -blocksToCommonMark :: WriterOptions -> [Block] -> Identity String -blocksToCommonMark opts bs = return $ - T.unpack $ nodeToCommonmark cmarkOpts colwidth - $ node DOCUMENT (blocksToNodes bs) - where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - -inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String +blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m String +blocksToCommonMark opts bs = do + let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + nodes <- blocksToNodes bs + return $ + T.unpack $ + nodeToCommonmark cmarkOpts colwidth $ + node DOCUMENT nodes + +inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m String inlinesToCommonMark opts ils = return $ T.unpack $ nodeToCommonmark cmarkOpts colwidth $ node PARAGRAPH (inlinesToNodes ils) @@ -88,39 +92,44 @@ inlinesToCommonMark opts ils = return $ then Just $ writerColumns opts else Nothing -blocksToNodes :: [Block] -> [Node] -blocksToNodes = foldr blockToNodes [] - -blockToNodes :: Block -> [Node] -> [Node] -blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :) -blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :) -blockToNodes (LineBlock lns) = blockToNodes $ linesToPara lns -blockToNodes (CodeBlock (_,classes,_) xs) = - (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :) -blockToNodes (RawBlock fmt xs) - | fmt == Format "html" = (node (HTML_BLOCK (T.pack xs)) [] :) - | otherwise = (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] :) -blockToNodes (BlockQuote bs) = - (node BLOCK_QUOTE (blocksToNodes bs) :) -blockToNodes (BulletList items) = - (node (LIST ListAttributes{ - listType = BULLET_LIST, - listDelim = PERIOD_DELIM, - listTight = isTightList items, - listStart = 1 }) (map (node ITEM . blocksToNodes) items) :) -blockToNodes (OrderedList (start, _sty, delim) items) = - (node (LIST ListAttributes{ - listType = ORDERED_LIST, - listDelim = case delim of - OneParen -> PAREN_DELIM - TwoParens -> PAREN_DELIM - _ -> PERIOD_DELIM, - listTight = isTightList items, - listStart = start }) (map (node ITEM . blocksToNodes) items) :) -blockToNodes HorizontalRule = (node THEMATIC_BREAK [] :) -blockToNodes (Header lev _ ils) = (node (HEADING lev) (inlinesToNodes ils) :) -blockToNodes (Div _ bs) = (blocksToNodes bs ++) -blockToNodes (DefinitionList items) = blockToNodes (BulletList items') +blocksToNodes :: PandocMonad m => [Block] -> m [Node] +blocksToNodes = foldrM blockToNodes [] + +blockToNodes :: PandocMonad m => Block -> [Node] -> m [Node] +blockToNodes (Plain xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns) +blockToNodes (Para xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns) +blockToNodes (LineBlock lns) ns = blockToNodes (linesToPara lns) ns +blockToNodes (CodeBlock (_,classes,_) xs) ns = return $ + (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns) +blockToNodes (RawBlock fmt xs) ns + | fmt == Format "html" = return (node (HTML_BLOCK (T.pack xs)) [] : ns) + | otherwise = return (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] : ns) +blockToNodes (BlockQuote bs) ns = do + nodes <- blocksToNodes bs + return (node BLOCK_QUOTE nodes : ns) +blockToNodes (BulletList items) ns = do + nodes <- mapM blocksToNodes items + return (node (LIST ListAttributes{ + listType = BULLET_LIST, + listDelim = PERIOD_DELIM, + listTight = isTightList items, + listStart = 1 }) (map (node ITEM) nodes) : ns) +blockToNodes (OrderedList (start, _sty, delim) items) ns = do + nodes <- mapM blocksToNodes items + return (node (LIST ListAttributes{ + listType = ORDERED_LIST, + listDelim = case delim of + OneParen -> PAREN_DELIM + TwoParens -> PAREN_DELIM + _ -> PERIOD_DELIM, + listTight = isTightList items, + listStart = start }) (map (node ITEM) nodes) : ns) +blockToNodes HorizontalRule ns = return (node THEMATIC_BREAK [] : ns) +blockToNodes (Header lev _ ils) ns = return (node (HEADING lev) (inlinesToNodes ils) : ns) +blockToNodes (Div _ bs) ns = do + nodes <- blocksToNodes bs + return (nodes ++ ns) +blockToNodes (DefinitionList items) ns = blockToNodes (BulletList items') ns where items' = map dlToBullet items dlToBullet (term, ((Para xs : ys) : zs)) = Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs @@ -128,9 +137,10 @@ blockToNodes (DefinitionList items) = blockToNodes (BulletList items') Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs dlToBullet (term, xs) = Para term : concat xs -blockToNodes t@(Table _ _ _ _ _) = - (node (HTML_BLOCK (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :) -blockToNodes Null = id +blockToNodes t@(Table _ _ _ _ _) ns = do + s <- writeHtml5String def $! Pandoc nullMeta [t] + return (node (HTML_BLOCK (T.pack $! s)) [] : ns) +blockToNodes Null ns = return ns inlinesToNodes :: [Inline] -> [Node] inlinesToNodes = foldr inlineToNodes [] diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index c663c75ce..ea8b90db3 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Pretty import Text.Pandoc.ImageSize import Text.Pandoc.Templates ( renderTemplate' ) import Network.URI ( isURI, unEscapeString ) +import Text.Pandoc.Class (PandocMonad) data WriterState = WriterState { stNextRef :: Int -- number of next URL reference @@ -54,8 +55,8 @@ orderedListStyles :: [Char] orderedListStyles = cycle "narg" -- | Convert Pandoc to ConTeXt. -writeConTeXt :: WriterOptions -> Pandoc -> String -writeConTeXt options document = +writeConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeConTeXt options document = return $ let defaultWriterState = WriterState { stNextRef = 1 , stOrderedListLevel = 0 , stOptions = options @@ -110,7 +111,7 @@ toContextDir _ = "" -- | escape things as needed for ConTeXt escapeCharForConTeXt :: WriterOptions -> Char -> String escapeCharForConTeXt opts ch = - let ligatures = writerTeXLigatures opts in + let ligatures = isEnabled Ext_smart opts in case ch of '{' -> "\\{" '}' -> "\\}" diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 44f96d700..53618d173 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -28,7 +28,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to Docbook XML. -} -module Text.Pandoc.Writers.Docbook ( writeDocbook) where +module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where import Text.Pandoc.Definition import Text.Pandoc.XML import Text.Pandoc.Shared @@ -36,7 +36,7 @@ import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Writers.Math import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf ) import Data.Char ( toLower ) import Data.Monoid ( Any(..) ) @@ -47,15 +47,22 @@ import qualified Text.Pandoc.Builder as B import Text.TeXMath import qualified Text.XML.Light as Xml import Data.Generics (everywhere, mkT) +import Text.Pandoc.Class (PandocMonad) +import Control.Monad.Reader + +data DocBookVersion = DocBook4 | DocBook5 + deriving (Eq, Show) + +type DB = ReaderT DocBookVersion -- | Convert list of authors to a docbook <author> section -authorToDocbook :: WriterOptions -> [Inline] -> B.Inlines -authorToDocbook opts name' = - let name = render Nothing $ inlinesToDocbook opts name' - colwidth = if writerWrapText opts == WrapAuto +authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines +authorToDocbook opts name' = do + name <- render Nothing <$> inlinesToDocbook opts name' + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - in B.rawInline "docbook" $ render colwidth $ + return $ B.rawInline "docbook" $ render colwidth $ if ',' `elem` name then -- last name first let (lastname, rest) = break (==',') name @@ -72,46 +79,56 @@ authorToDocbook opts name' = in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ inTagsSimple "surname" (text $ escapeStringForXML lastname) +writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeDocbook4 opts d = + runReaderT (writeDocbook opts d) DocBook4 + +writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeDocbook5 opts d = + runReaderT (writeDocbook opts d) DocBook5 + -- | Convert Pandoc document to string in Docbook format. -writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook opts (Pandoc meta blocks) = +writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m String +writeDocbook opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks - colwidth = if writerWrapText opts == WrapAuto + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - render' = render colwidth - opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr) + let render' = render colwidth + let opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr) (writerTemplate opts) && TopLevelDefault == writerTopLevelDivision opts) then opts{ writerTopLevelDivision = TopLevelChapter } else opts - -- The numbering here follows LaTeX's internal numbering - startLvl = case writerTopLevelDivision opts' of + -- The numbering here follows LaTeX's internal numbering + let startLvl = case writerTopLevelDivision opts' of TopLevelPart -> -1 TopLevelChapter -> 0 TopLevelSection -> 1 TopLevelDefault -> 1 - auths' = map (authorToDocbook opts) $ docAuthors meta - meta' = B.setMeta "author" auths' meta - Just metadata = metaToJSON opts - (Just . render colwidth . (vcat . - (map (elementToDocbook opts' startLvl)) . hierarchicalize)) - (Just . render colwidth . inlinesToDocbook opts') + auths' <- mapM (authorToDocbook opts) $ docAuthors meta + let meta' = B.setMeta "author" auths' meta + metadata <- metaToJSON opts + (fmap (render colwidth . vcat) . + (mapM (elementToDocbook opts' startLvl) . + hierarchicalize)) + (fmap (render colwidth) . inlinesToDocbook opts') meta' - main = render' $ vcat (map (elementToDocbook opts' startLvl) elements) - context = defField "body" main + main <- (render' . vcat) <$> (mapM (elementToDocbook opts' startLvl) elements) + let context = defField "body" main $ defField "mathml" (case writerHTMLMathMethod opts of MathML _ -> True _ -> False) $ metadata - in case writerTemplate opts of + return $ case writerTemplate opts of Nothing -> main Just tpl -> renderTemplate' tpl context -- | Convert an Element to Docbook. -elementToDocbook :: WriterOptions -> Int -> Element -> Doc +elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc elementToDocbook opts _ (Blk block) = blockToDocbook opts block -elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = +elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = do + version <- ask -- Docbook doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] @@ -119,24 +136,25 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = tag = case lvl of -1 -> "part" 0 -> "chapter" - n | n >= 1 && n <= 5 -> if writerDocbook5 opts + n | n >= 1 && n <= 5 -> if version == DocBook5 then "section" else "sect" ++ show n _ -> "simplesect" - idName = if writerDocbook5 opts + idName = if version == DocBook5 then "xml:id" else "id" idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')] - nsAttr = if writerDocbook5 opts && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] + nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] else [] attribs = nsAttr ++ idAttr - in inTags True tag attribs $ - inTagsSimple "title" (inlinesToDocbook opts title) $$ - vcat (map (elementToDocbook opts (lvl + 1)) elements') + contents <- mapM (elementToDocbook opts (lvl + 1)) elements' + title' <- inlinesToDocbook opts title + return $ inTags True tag attribs $ + inTagsSimple "title" title' $$ vcat contents -- | Convert a list of Pandoc blocks to Docbook. -blocksToDocbook :: WriterOptions -> [Block] -> Doc -blocksToDocbook opts = vcat . map (blockToDocbook opts) +blocksToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc +blocksToDocbook opts = fmap vcat . mapM (blockToDocbook opts) -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -145,26 +163,29 @@ plainToPara x = x -- | Convert a list of pairs of terms and definitions into a list of -- Docbook varlistentrys. -deflistItemsToDocbook :: WriterOptions -> [([Inline],[[Block]])] -> Doc +deflistItemsToDocbook :: PandocMonad m + => WriterOptions -> [([Inline],[[Block]])] -> DB m Doc deflistItemsToDocbook opts items = - vcat $ map (\(term, defs) -> deflistItemToDocbook opts term defs) items + vcat <$> mapM (\(term, defs) -> deflistItemToDocbook opts term defs) items -- | Convert a term and a list of blocks into a Docbook varlistentry. -deflistItemToDocbook :: WriterOptions -> [Inline] -> [[Block]] -> Doc -deflistItemToDocbook opts term defs = - let def' = concatMap (map plainToPara) defs - in inTagsIndented "varlistentry" $ - inTagsIndented "term" (inlinesToDocbook opts term) $$ - inTagsIndented "listitem" (blocksToDocbook opts def') +deflistItemToDocbook :: PandocMonad m + => WriterOptions -> [Inline] -> [[Block]] -> DB m Doc +deflistItemToDocbook opts term defs = do + term' <- inlinesToDocbook opts term + def' <- blocksToDocbook opts $ concatMap (map plainToPara) defs + return $ inTagsIndented "varlistentry" $ + inTagsIndented "term" term' $$ + inTagsIndented "listitem" def' -- | Convert a list of lists of blocks to a list of Docbook list items. -listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc -listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items +listItemsToDocbook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m Doc +listItemsToDocbook opts items = vcat <$> mapM (listItemToDocbook opts) items -- | Convert a list of blocks into a Docbook list item. -listItemToDocbook :: WriterOptions -> [Block] -> Doc +listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc listItemToDocbook opts item = - inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item + inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item) imageToDocbook :: WriterOptions -> Attr -> String -> Doc imageToDocbook _ attr src = selfClosingTag "imagedata" $ @@ -176,43 +197,46 @@ imageToDocbook _ attr src = selfClosingTag "imagedata" $ Nothing -> [] -- | Convert a Pandoc block element to Docbook. -blockToDocbook :: WriterOptions -> Block -> Doc -blockToDocbook _ Null = empty +blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m Doc +blockToDocbook _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: blockToDocbook opts (Div (ident,_,_) [Para lst]) = let attribs = [("id", ident) | not (null ident)] in if hasLineBreaks lst - then flush $ nowrap $ inTags False "literallayout" attribs - $ inlinesToDocbook opts lst - else inTags True "para" attribs $ inlinesToDocbook opts lst -blockToDocbook opts (Div (ident,_,_) bs) = - (if null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) $$ - blocksToDocbook opts (map plainToPara bs) -blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize + then (flush . nowrap . inTags False "literallayout" attribs) + <$> inlinesToDocbook opts lst + else inTags True "para" attribs <$> inlinesToDocbook opts lst +blockToDocbook opts (Div (ident,_,_) bs) = do + contents <- blocksToDocbook opts (map plainToPara bs) + return $ + (if null ident + then mempty + else selfClosingTag "anchor" [("id", ident)]) $$ contents +blockToDocbook _ (Header _ _ _) = + return empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure -blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = - let alt = inlinesToDocbook opts txt - capt = if null txt +blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do + alt <- inlinesToDocbook opts txt + let capt = if null txt then empty else inTagsSimple "title" alt - in inTagsIndented "figure" $ + return $ inTagsIndented "figure" $ capt $$ (inTagsIndented "mediaobject" $ (inTagsIndented "imageobject" (imageToDocbook opts attr src)) $$ inTagsSimple "textobject" (inTagsSimple "phrase" alt)) blockToDocbook opts (Para lst) - | hasLineBreaks lst = flush $ nowrap $ inTagsSimple "literallayout" $ inlinesToDocbook opts lst - | otherwise = inTagsIndented "para" $ inlinesToDocbook opts lst + | hasLineBreaks lst = (flush . nowrap . inTagsSimple "literallayout") + <$> inlinesToDocbook opts lst + | otherwise = inTagsIndented "para" <$> inlinesToDocbook opts lst blockToDocbook opts (LineBlock lns) = blockToDocbook opts $ linesToPara lns blockToDocbook opts (BlockQuote blocks) = - inTagsIndented "blockquote" $ blocksToDocbook opts blocks -blockToDocbook _ (CodeBlock (_,classes,_) str) = + inTagsIndented "blockquote" <$> blocksToDocbook opts blocks +blockToDocbook _ (CodeBlock (_,classes,_) str) = return $ text ("<programlisting" ++ lang ++ ">") <> cr <> flush (text (escapeStringForXML str) <> cr <> text "</programlisting>") where lang = if null langs @@ -224,11 +248,11 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = then [s] else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes -blockToDocbook opts (BulletList lst) = +blockToDocbook opts (BulletList lst) = do let attribs = [("spacing", "compact") | isTightList lst] - in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst -blockToDocbook _ (OrderedList _ []) = empty -blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = + inTags True "itemizedlist" attribs <$> listItemsToDocbook opts lst +blockToDocbook _ (OrderedList _ []) = return empty +blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do let numeration = case numstyle of DefaultStyle -> [] Decimal -> [("numeration", "arabic")] @@ -239,39 +263,43 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerRoman -> [("numeration", "lowerroman")] spacing = [("spacing", "compact") | isTightList (first:rest)] attribs = numeration ++ spacing - items = if start == 1 - then listItemsToDocbook opts (first:rest) - else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest - in inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) = + items <- if start == 1 + then listItemsToDocbook opts (first:rest) + else do + first' <- blocksToDocbook opts (map plainToPara first) + rest' <- listItemsToDocbook opts rest + return $ + (inTags True "listitem" [("override",show start)] first') $$ + rest' + return $ inTags True "orderedlist" attribs items +blockToDocbook opts (DefinitionList lst) = do let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] - in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst -blockToDocbook opts (RawBlock f str) - | f == "docbook" = text str -- raw XML block - | f == "html" = if writerDocbook5 opts - then empty -- No html in Docbook5 - else text str -- allow html for backwards compatibility - | otherwise = empty -blockToDocbook _ HorizontalRule = empty -- not semantic -blockToDocbook opts (Table caption aligns widths headers rows) = - let captionDoc = if null caption - then empty - else inTagsIndented "title" - (inlinesToDocbook opts caption) - tableType = if isEmpty captionDoc then "informaltable" else "table" + inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst +blockToDocbook _ (RawBlock f str) + | f == "docbook" = return $ text str -- raw XML block + | f == "html" = do + version <- ask + if version == DocBook5 + then return empty -- No html in Docbook5 + else return $ text str -- allow html for backwards compatibility + | otherwise = return empty +blockToDocbook _ HorizontalRule = return empty -- not semantic +blockToDocbook opts (Table caption aligns widths headers rows) = do + captionDoc <- if null caption + then return empty + else inTagsIndented "title" <$> + inlinesToDocbook opts caption + let tableType = if isEmpty captionDoc then "informaltable" else "table" percent w = show (truncate (100*w) :: Integer) ++ "*" coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec" ([("colwidth", percent w) | w > 0] ++ [("align", alignmentToString al)])) widths aligns - head' = if all null headers - then empty - else inTagsIndented "thead" $ - tableRowToDocbook opts headers - body' = inTagsIndented "tbody" $ - vcat $ map (tableRowToDocbook opts) rows - in inTagsIndented tableType $ captionDoc $$ + head' <- if all null headers + then return empty + else inTagsIndented "thead" <$> tableRowToDocbook opts headers + body' <- (inTagsIndented "tbody" . vcat) <$> + mapM (tableRowToDocbook opts) rows + return $ inTagsIndented tableType $ captionDoc $$ (inTags True "tgroup" [("cols", show (length headers))] $ coltags $$ head' $$ body') @@ -292,89 +320,97 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableRowToDocbook :: WriterOptions +tableRowToDocbook :: PandocMonad m + => WriterOptions -> [[Block]] - -> Doc + -> DB m Doc tableRowToDocbook opts cols = - inTagsIndented "row" $ vcat $ map (tableItemToDocbook opts) cols + (inTagsIndented "row" . vcat) <$> mapM (tableItemToDocbook opts) cols -tableItemToDocbook :: WriterOptions +tableItemToDocbook :: PandocMonad m + => WriterOptions -> [Block] - -> Doc + -> DB m Doc tableItemToDocbook opts item = - inTags True "entry" [] $ vcat $ map (blockToDocbook opts) item + (inTags True "entry" [] . vcat) <$> mapM (blockToDocbook opts) item -- | Convert a list of inline elements to Docbook. -inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst +inlinesToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m Doc +inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst -- | Convert an inline element to Docbook. -inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook _ (Str str) = text $ escapeStringForXML str +inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m Doc +inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str inlineToDocbook opts (Emph lst) = - inTagsSimple "emphasis" $ inlinesToDocbook opts lst + inTagsSimple "emphasis" <$> inlinesToDocbook opts lst inlineToDocbook opts (Strong lst) = - inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst + inTags False "emphasis" [("role", "strong")] <$> inlinesToDocbook opts lst inlineToDocbook opts (Strikeout lst) = - inTags False "emphasis" [("role", "strikethrough")] $ + inTags False "emphasis" [("role", "strikethrough")] <$> inlinesToDocbook opts lst inlineToDocbook opts (Superscript lst) = - inTagsSimple "superscript" $ inlinesToDocbook opts lst + inTagsSimple "superscript" <$> inlinesToDocbook opts lst inlineToDocbook opts (Subscript lst) = - inTagsSimple "subscript" $ inlinesToDocbook opts lst + inTagsSimple "subscript" <$> inlinesToDocbook opts lst inlineToDocbook opts (SmallCaps lst) = - inTags False "emphasis" [("role", "smallcaps")] $ + inTags False "emphasis" [("role", "smallcaps")] <$> inlinesToDocbook opts lst inlineToDocbook opts (Quoted _ lst) = - inTagsSimple "quote" $ inlinesToDocbook opts lst + inTagsSimple "quote" <$> inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst inlineToDocbook opts (Span (ident,_,_) ils) = - (if null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) <> + ((if null ident + then mempty + else selfClosingTag "anchor" [("id", ident)]) <>) <$> inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = - inTagsSimple "literal" $ text (escapeStringForXML str) + return $ inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math t str) - | isMathML (writerHTMLMathMethod opts) = - case writeMathML dt <$> readTeX str of - Right r -> inTagsSimple tagtype - $ text $ Xml.ppcElement conf - $ fixNS - $ removeAttr r - Left _ -> inlinesToDocbook opts - $ texMathToInlines t str - | otherwise = inlinesToDocbook opts $ texMathToInlines t str - where (dt, tagtype) = case t of - InlineMath -> (DisplayInline,"inlineequation") - DisplayMath -> (DisplayBlock,"informalequation") + | isMathML (writerHTMLMathMethod opts) = do + res <- convertMath writeMathML t str + case res of + Right r -> return $ inTagsSimple tagtype + $ text $ Xml.ppcElement conf + $ fixNS + $ removeAttr r + Left il -> inlineToDocbook opts il + | otherwise = + texMathToInlines t str >>= inlinesToDocbook opts + where tagtype = case t of + InlineMath -> "inlineequation" + DisplayMath -> "informalequation" conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP removeAttr e = e{ Xml.elAttribs = [] } fixNS' qname = qname{ Xml.qPrefix = Just "mml" } fixNS = everywhere (mkT fixNS') -inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x - | otherwise = empty -inlineToDocbook _ LineBreak = text "\n" -inlineToDocbook _ Space = space +inlineToDocbook _ (RawInline f x) + | f == "html" || f == "docbook" = return $ text x + | otherwise = return empty +inlineToDocbook _ LineBreak = return $ text "\n" +-- currently ignore, would require the option to add custom +-- styles to the document +inlineToDocbook _ Space = return space -- because we use \n for LineBreak, we can't do soft breaks: -inlineToDocbook _ SoftBreak = space +inlineToDocbook _ SoftBreak = return space inlineToDocbook opts (Link attr txt (src, _)) | Just email <- stripPrefix "mailto:" src = let emailLink = inTagsSimple "email" $ text $ escapeStringForXML $ email in case txt of - [Str s] | escapeURI s == email -> emailLink - _ -> inlinesToDocbook opts txt <+> - char '(' <> emailLink <> char ')' - | otherwise = + [Str s] | escapeURI s == email -> return emailLink + _ -> do contents <- inlinesToDocbook opts txt + return $ contents <+> + char '(' <> emailLink <> char ')' + | otherwise = do + version <- ask (if isPrefixOf "#" src then inTags False "link" $ ("linkend", drop 1 src) : idAndRole attr - else if writerDocbook5 opts + else if version == DocBook5 then inTags False "link" $ ("xlink:href", src) : idAndRole attr - else inTags False "ulink" $ ("url", src) : idAndRole attr ) $ - inlinesToDocbook opts txt -inlineToDocbook opts (Image attr _ (src, tit)) = + else inTags False "ulink" $ ("url", src) : idAndRole attr ) + <$> inlinesToDocbook opts txt +inlineToDocbook opts (Image attr _ (src, tit)) = return $ let titleDoc = if null tit then empty else inTagsIndented "objectinfo" $ @@ -382,7 +418,7 @@ inlineToDocbook opts (Image attr _ (src, tit)) = in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ imageToDocbook opts attr src inlineToDocbook opts (Note contents) = - inTagsIndented "footnote" $ blocksToDocbook opts contents + inTagsIndented "footnote" <$> blocksToDocbook opts contents isMathML :: HTMLMathMethod -> Bool isMathML (MathML _) = True diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 3fc5d22a2..6a53485c4 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-} +{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns, DeriveFunctor #-} {- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> @@ -38,7 +38,6 @@ import qualified Data.Set as Set import qualified Text.Pandoc.UTF8 as UTF8 import Codec.Archive.Zip import Data.Time.Clock.POSIX -import System.Environment import Text.Pandoc.Compat.Time import Text.Pandoc.Definition import Text.Pandoc.Generic @@ -46,20 +45,19 @@ import Text.Pandoc.ImageSize import Text.Pandoc.Shared hiding (Element) import Text.Pandoc.Writers.Shared (fixDisplayMath) import Text.Pandoc.Options -import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Writers.Math import Text.Pandoc.Highlighting ( highlight ) import Text.Pandoc.Walk +import Text.Pandoc.Error (PandocError) import Text.XML.Light as XML import Text.TeXMath import Text.Pandoc.Readers.Docx.StyleMap -import Text.Pandoc.Readers.Docx.Util (elemName) import Control.Monad.Reader import Control.Monad.State import Skylighting -import Data.Unique (hashUnique, newUnique) -import System.Random (randomRIO) +import Control.Monad.Except (runExceptT) +import System.Random (randomR) import Text.Printf (printf) -import qualified Control.Exception as E import Data.Monoid ((<>)) import qualified Data.Text as T import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, @@ -67,6 +65,8 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, import Control.Applicative ((<|>)) import Data.Maybe (fromMaybe, mapMaybe, maybeToList, isNothing) import Data.Char (ord, isSpace, toLower) +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P data ListMarker = NoMarker | BulletMarker @@ -141,12 +141,12 @@ defaultWriterState = WriterState{ , stDelId = 1 , stStyleMaps = defaultStyleMaps , stFirstPara = False - , stTocTitle = normalizeInlines [Str "Table of Contents"] + , stTocTitle = [Str "Table of Contents"] , stDynamicParaProps = [] , stDynamicTextProps = [] } -type WS = ReaderT WriterEnv (StateT WriterState IO) +type WS m = ReaderT WriterEnv (StateT WriterState m) mknode :: Node t => String -> [(String,String)] -> t -> Element mknode s attrs = @@ -207,25 +207,28 @@ isValidChar (ord -> c) | otherwise = False metaValueToInlines :: MetaValue -> [Inline] -metaValueToInlines (MetaString s) = normalizeInlines [Str s] +metaValueToInlines (MetaString s) = [Str s] metaValueToInlines (MetaInlines ils) = ils metaValueToInlines (MetaBlocks bs) = query return bs metaValueToInlines (MetaBool b) = [Str $ show b] metaValueToInlines _ = [] --- | Produce an Docx file from a Pandoc document. -writeDocx :: WriterOptions -- ^ Writer options + + +writeDocx :: (PandocMonad m) + => WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO BL.ByteString + -> m BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts let doc' = walk fixDisplayMath $ doc - username <- lookup "USERNAME" <$> getEnvironment - utctime <- getCurrentTime - distArchive <- getDefaultReferenceDocx datadir - refArchive <- case writerReferenceDocx opts of - Just f -> liftM (toArchive . toLazy) $ B.readFile f - Nothing -> getDefaultReferenceDocx datadir + username <- P.lookupEnv "USERNAME" + utctime <- P.getCurrentTime + distArchive <- (toArchive . BL.fromStrict) <$> + P.readDataFile datadir "reference.docx" + refArchive <- case writerReferenceDoc opts of + Just f -> toArchive <$> P.readFileLazy f + Nothing -> return distArchive parsedDoc <- parseXml refArchive distArchive "word/document.xml" let wname f qn = qPrefix qn == Just "w" && f (qName qn) @@ -446,18 +449,11 @@ writeDocx opts doc@(Pandoc meta _) = do let newstyles = map newParaPropToOpenXml newDynamicParaProps ++ map newTextPropToOpenXml newDynamicTextProps ++ - (styleToOpenXml styleMaps $ writerHighlightStyle opts) - let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } - where - modifyContent - | writerHighlight opts = (++ map Elem newstyles) - | otherwise = filter notTokStyle - notTokStyle (Elem el) = notStyle el || notTokId el - notTokStyle _ = True - notStyle = (/= elemName' "style") . elName - notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") - tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) - elemName' = elemName (sNameSpaces styleMaps) "w" + (case writerHighlightStyle opts of + Nothing -> [] + Just sty -> (styleToOpenXml styleMaps sty)) + let styledoc' = styledoc{ elContent = elContent styledoc ++ + map Elem newstyles } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -603,7 +599,7 @@ styleToOpenXml sm style = $ backgroundColor style ) ] -copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry +copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry copyChildren refArchive distArchive path timestamp elNames = do ref <- parseXml refArchive distArchive path dist <- parseXml distArchive distArchive path @@ -622,7 +618,7 @@ copyChildren refArchive distArchive path timestamp elNames = do baseListId :: Int baseListId = 1000 -mkNumbering :: [ListMarker] -> IO [Element] +mkNumbering :: (PandocMonad m) => [ListMarker] -> m [Element] mkNumbering lists = do elts <- mapM mkAbstractNum (ordNub lists) return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] @@ -638,9 +634,10 @@ mkNum marker numid = map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] -mkAbstractNum :: ListMarker -> IO Element +mkAbstractNum :: (PandocMonad m) => ListMarker -> m Element mkAbstractNum marker = do - nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) + gen <- P.newStdGen + let (nsid, _) = randomR (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) gen return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () @@ -692,10 +689,11 @@ mkLvl marker lvl = patternFor TwoParens s = "(" ++ s ++ ")" patternFor _ s = s ++ "." -getNumId :: WS Int +getNumId :: (PandocMonad m) => WS m Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists -makeTOC :: WriterOptions -> WS [Element] + +makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element] makeTOC opts | writerTableOfContents opts = do let depth = "1-"++(show (writerTOCDepth opts)) let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" @@ -725,7 +723,7 @@ makeTOC _ = return [] -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). -writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) +writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element]) writeOpenXML opts (Pandoc meta blocks) = do let tit = docTitle meta ++ case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> LineBreak : xs @@ -760,13 +758,13 @@ writeOpenXML opts (Pandoc meta blocks) = do return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. -blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] +blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls pCustomStyle :: String -> Element pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () -pStyleM :: String -> WS XML.Element +pStyleM :: (PandocMonad m) => String -> WS m XML.Element pStyleM styleName = do styleMaps <- gets stStyleMaps let sty' = getStyleId styleName $ sParaStyleMap styleMaps @@ -775,26 +773,26 @@ pStyleM styleName = do rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () -rStyleM :: String -> WS XML.Element +rStyleM :: (PandocMonad m) => String -> WS m XML.Element rStyleM styleName = do styleMaps <- gets stStyleMaps let sty' = getStyleId styleName $ sCharStyleMap styleMaps return $ mknode "w:rStyle" [("w:val",sty')] () -getUniqueId :: MonadIO m => m String +getUniqueId :: (PandocMonad m) => m String -- the + 20 is to ensure that there are no clashes with the rIds -- already in word/document.xml.rel -getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique +getUniqueId = (show . (+ 20)) <$> P.newUniqueHash -- | Key for specifying user-defined docx styles. dynamicStyleKey :: String dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. -blockToOpenXML :: WriterOptions -> Block -> WS [Element] +blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk -blockToOpenXML' :: WriterOptions -> Block -> WS [Element] +blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] blockToOpenXML' _ Null = return [] blockToOpenXML' opts (Div (ident,classes,kvs) bs) | Just sty <- lookup dynamicStyleKey kvs = do @@ -825,7 +823,7 @@ blockToOpenXML' opts (Header lev (ident,_,_) lst) = do then uniqueIdent lst usedIdents else ident modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s } - id' <- getUniqueId + id' <- (lift . lift) getUniqueId let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () @@ -945,7 +943,7 @@ blockToOpenXML' opts (DefinitionList items) = do setFirstPara return l -definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] +definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element] definitionListItemToOpenXML opts (term,defs) = do term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) @@ -953,12 +951,12 @@ definitionListItemToOpenXML opts (term,defs) = do $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' -addList :: ListMarker -> WS () +addList :: (PandocMonad m) => ListMarker -> WS m () addList marker = do lists <- gets stLists modify $ \st -> st{ stLists = lists ++ [marker] } -listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] +listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element] listItemToOpenXML _ _ [] = return [] listItemToOpenXML opts numid (first:rest) = do first' <- withNumId numid $ blockToOpenXML opts first @@ -974,30 +972,30 @@ alignmentToString alignment = case alignment of AlignDefault -> "left" -- | Convert a list of inline elements to OpenXML. -inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element] +inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element] inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst -withNumId :: Int -> WS a -> WS a +withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a withNumId numid = local $ \env -> env{ envListNumId = numid } -asList :: WS a -> WS a +asList :: (PandocMonad m) => WS m a -> WS m a asList = local $ \env -> env{ envListLevel = envListLevel env + 1 } -getTextProps :: WS [Element] +getTextProps :: (PandocMonad m) => WS m [Element] getTextProps = do props <- asks envTextProperties return $ if null props then [] else [mknode "w:rPr" [] props] -withTextProp :: Element -> WS a -> WS a +withTextProp :: PandocMonad m => Element -> WS m a -> WS m a withTextProp d p = local (\env -> env {envTextProperties = d : envTextProperties env}) p -withTextPropM :: WS Element -> WS a -> WS a +withTextPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a withTextPropM = (. flip withTextProp) . (>>=) -getParaProps :: Bool -> WS [Element] +getParaProps :: PandocMonad m => Bool -> WS m [Element] getParaProps displayMathPara = do props <- asks envParaProperties listLevel <- asks envListLevel @@ -1012,14 +1010,14 @@ getParaProps displayMathPara = do [] -> [] ps -> [mknode "w:pPr" [] ps] -withParaProp :: Element -> WS a -> WS a +withParaProp :: PandocMonad m => Element -> WS m a -> WS m a withParaProp d p = local (\env -> env {envParaProperties = d : envParaProperties env}) p -withParaPropM :: WS Element -> WS a -> WS a +withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a withParaPropM = (. flip withParaProp) . (>>=) -formattedString :: String -> WS [Element] +formattedString :: PandocMonad m => String -> WS m [Element] formattedString str = do props <- getTextProps inDel <- asks envInDel @@ -1028,14 +1026,14 @@ formattedString str = do [ mknode (if inDel then "w:delText" else "w:t") [("xml:space","preserve")] (stripInvalidChars str) ] ] -setFirstPara :: WS () +setFirstPara :: PandocMonad m => WS m () setFirstPara = modify $ \s -> s { stFirstPara = True } -- | Convert an inline element to OpenXML. -inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] +inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il -inlineToOpenXML' :: WriterOptions -> Inline -> WS [Element] +inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] inlineToOpenXML' _ (Str str) = formattedString str inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ") @@ -1109,16 +1107,11 @@ inlineToOpenXML' opts (Quoted quoteType lst) = SingleQuote -> ("\x2018", "\x2019") DoubleQuote -> ("\x201C", "\x201D") inlineToOpenXML' opts (Math mathType str) = do - let displayType = if mathType == DisplayMath - then DisplayBlock - else DisplayInline - when (displayType == DisplayBlock) setFirstPara - case writeOMML displayType <$> readTeX str of - Right r -> return [r] - Left e -> do - warn $ "Cannot convert the following TeX math, skipping:\n" ++ str ++ - "\n" ++ e - inlinesToOpenXML opts (texMathToInlines mathType str) + when (mathType == DisplayMath) setFirstPara + res <- (lift . lift) (convertMath writeOMML mathType str) + case res of + Right r -> return [r] + Left il -> inlineToOpenXML' opts il inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do let unhighlighted = intercalate [br] `fmap` @@ -1129,14 +1122,12 @@ inlineToOpenXML' opts (Code attrs str) = do [ rCustomStyle (show toktype) ] , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] withTextProp (rCustomStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted + $ case writerHighlightStyle opts >> highlight formatOpenXML attrs str of + Just h -> return h + Nothing -> unhighlighted inlineToOpenXML' opts (Note bs) = do notes <- gets stFootnotes - notenum <- getUniqueId + notenum <- (lift . lift) getUniqueId footnoteStyle <- rStyleM "Footnote Reference" let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle @@ -1167,7 +1158,7 @@ inlineToOpenXML' opts (Link _ txt (src,_)) = do id' <- case M.lookup src extlinks of Just i -> return i Nothing -> do - i <- ("rId"++) `fmap` getUniqueId + i <- ("rId"++) `fmap` ((lift . lift) getUniqueId) modify $ \st -> st{ stExternalLinks = M.insert src i extlinks } return i @@ -1179,15 +1170,14 @@ inlineToOpenXML' opts (Image attr alt (src, title)) = do case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] Nothing -> do - res <- liftIO $ - fetchItem' (writerMediaBag opts) (writerSourceURL opts) src + res <- runExceptT $ lift (P.fetchItem (writerSourceURL opts) src) case res of - Left (_ :: E.SomeException) -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." + Left (_ :: PandocError) -> do + P.warning ("Could not find image `" ++ src ++ "', skipping...") -- emit alt text inlinesToOpenXML opts alt Right (img, mt) -> do - ident <- ("rId"++) `fmap` getUniqueId + ident <- ("rId"++) `fmap` ((lift . lift) getUniqueId) let (xpt,ypt) = desiredSizeInPoints opts attr (either (const def) id (imageSize img)) -- 12700 emu = 1 pt @@ -1247,7 +1237,10 @@ inlineToOpenXML' opts (Image attr alt (src, title)) = do return [imgElt] br :: Element -br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] +br = breakElement "textWrapping" + +breakElement :: String -> Element +breakElement kind = mknode "w:r" [] [mknode "w:br" [("w:type",kind)] () ] -- Word will insert these footnotes into the settings.xml file -- (whether or not they're visible in the document). If they're in the @@ -1265,7 +1258,7 @@ defaultFootnotes = [ mknode "w:footnote" [ mknode "w:r" [] $ [ mknode "w:continuationSeparator" [] ()]]]] -parseXml :: Archive -> Archive -> String -> IO Element +parseXml :: (PandocMonad m) => Archive -> Archive -> String -> m Element parseXml refArchive distArchive relpath = case findEntryByPath relpath refArchive `mplus` findEntryByPath relpath distArchive of @@ -1283,7 +1276,7 @@ fitToPage (x, y) pageWidth (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y) | otherwise = (floor x, floor y) -withDirection :: WS a -> WS a +withDirection :: PandocMonad m => WS m a -> WS m a withDirection x = do isRTL <- asks envRTL paraProps <- asks envParaProperties diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 7459f1b42..79a371d4d 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -45,7 +45,7 @@ import Text.Pandoc.Options ( WriterOptions( , writerTemplate , writerWrapText), WrapOption(..) ) import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting - , camelCaseToHyphenated, trimr, normalize, substitute ) + , camelCaseToHyphenated, trimr, substitute ) import Text.Pandoc.Writers.Shared ( defField, metaToJSON ) import Text.Pandoc.ImageSize import Text.Pandoc.Templates ( renderTemplate' ) @@ -55,6 +55,7 @@ import Network.URI ( isURI ) import Control.Monad ( zipWithM ) import Control.Monad.State ( modify, State, get, evalState ) import Control.Monad.Reader ( ReaderT, runReaderT, ask, local ) +import Text.Pandoc.Class (PandocMonad) data WriterState = WriterState { stNotes :: Bool -- True if there are notes @@ -77,9 +78,9 @@ instance Default WriterEnvironment where type DokuWiki = ReaderT WriterEnvironment (State WriterState) -- | Convert Pandoc to DokuWiki. -writeDokuWiki :: WriterOptions -> Pandoc -> String -writeDokuWiki opts document = - runDokuWiki (pandocToDokuWiki opts $ normalize document) +writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeDokuWiki opts document = return $ + runDokuWiki (pandocToDokuWiki opts document) runDokuWiki :: DokuWiki a -> a runDokuWiki = flip evalState def . flip runReaderT def @@ -393,9 +394,16 @@ blockListToDokuWiki :: WriterOptions -- ^ Options -> DokuWiki String blockListToDokuWiki opts blocks = do backSlash <- stBackSlashLB <$> ask + let blocks' = consolidateRawBlocks blocks if backSlash - then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks - else vcat <$> mapM (blockToDokuWiki opts) blocks + then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks' + else vcat <$> mapM (blockToDokuWiki opts) blocks' + +consolidateRawBlocks :: [Block] -> [Block] +consolidateRawBlocks [] = [] +consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs) + | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs) +consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs -- | Convert list of Pandoc inline elements to DokuWiki. inlineListToDokuWiki :: WriterOptions -> [Inline] -> DokuWiki String @@ -465,7 +473,7 @@ inlineToDokuWiki _ (RawInline f str) | f == Format "html" = return $ "<html>" ++ str ++ "</html>" | otherwise = return "" -inlineToDokuWiki _ (LineBreak) = return "\\\\\n" +inlineToDokuWiki _ LineBreak = return "\\\\\n" inlineToDokuWiki opts SoftBreak = case writerWrapText opts of diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 00bf4a81c..ae77c10a2 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -28,26 +28,22 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to EPUB. -} -module Text.Pandoc.Writers.EPUB ( writeEPUB ) where -import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) +module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where import qualified Data.Map as M import qualified Data.Set as Set import Data.Maybe ( fromMaybe, catMaybes ) import Data.List ( isPrefixOf, isInfixOf, intercalate ) -import System.Environment ( getEnv ) import Text.Printf (printf) import System.FilePath ( takeExtension, takeFileName ) -import System.FilePath.Glob ( namesMatching ) import Network.HTTP ( urlEncode ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Text.Pandoc.UTF8 as UTF8 import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) -import Data.Time.Clock.POSIX ( getPOSIXTime ) import Text.Pandoc.Compat.Time import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim - , normalizeDate, readDataFile, stringify, warn - , hierarchicalize, fetchItem' ) + , normalizeDate, stringify + , hierarchicalize ) import qualified Text.Pandoc.Shared as S (Element(..)) import Text.Pandoc.Builder (fromList, setMeta) import Text.Pandoc.Options ( WriterOptions(..) @@ -57,18 +53,20 @@ import Text.Pandoc.Options ( WriterOptions(..) , ObfuscationMethod(NoObfuscation) ) import Text.Pandoc.Definition import Text.Pandoc.Walk (walk, walkM, query) -import Control.Monad.State (modify, get, State, put, evalState) -import Control.Monad (mplus, liftM, when) +import Text.Pandoc.UUID (getUUID) +import Control.Monad.State (modify, get, gets, State, StateT, put, evalState, evalStateT, lift) +import Control.Monad (mplus, when, zipWithM) import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs , strContent, lookupAttr, Node(..), QName(..), parseXML , onlyElems, node, ppElement) -import Text.Pandoc.UUID (getRandomUUID) -import Text.Pandoc.Writers.HTML ( writeHtml ) +import Text.Pandoc.Writers.HTML ( writeHtmlStringForEPUB ) import Data.Char ( toLower, isDigit, isAlphaNum ) import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import qualified Control.Exception as E -import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) +import Control.Monad.Except (throwError, catchError) +import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -76,6 +74,12 @@ import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) -- in filenames, chapter0003.xhtml. data Chapter = Chapter (Maybe [Int]) [Block] +data EPUBState = EPUBState { + stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] + } + +type E m = StateT EPUBState m + data EPUBMetadata = EPUBMetadata{ epubIdentifier :: [Identifier] , epubTitle :: [Title] @@ -143,7 +147,7 @@ removeNote :: Inline -> Inline removeNote (Note _) = Str "" removeNote x = x -getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata +getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta let elts = onlyElems $ parseXML $ writerEpubMetadata opts @@ -151,7 +155,7 @@ getEPUBMetadata opts meta = do let addIdentifier m = if null (epubIdentifier m) then do - randomId <- fmap show getRandomUUID + randomId <- (show . getUUID) <$> lift P.newStdGen return $ m{ epubIdentifier = [Identifier randomId Nothing] } else return m let addLanguage m = @@ -159,16 +163,19 @@ getEPUBMetadata opts meta = do then case lookup "lang" (writerVariables opts) of Just x -> return m{ epubLanguage = x } Nothing -> do - localeLang <- E.catch (liftM - (map (\c -> if c == '_' then '-' else c) . - takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: E.SomeException) in return "en-US") + mLang <- lift $ P.lookupEnv "LANG" + let localeLang = + case mLang of + Just lang -> + map (\c -> if c == '_' then '-' else c) $ + takeWhile (/='.') lang + Nothing -> "en-US" return m{ epubLanguage = localeLang } else return m let fixDate m = if null (epubDate m) then do - currentTime <- getCurrentTime + currentTime <- lift P.getCurrentTime return $ m{ epubDate = [ Date{ dateText = showDateTimeISO8601 currentTime , dateEvent = Nothing } ] } @@ -329,21 +336,49 @@ metadataFromMeta opts meta = EPUBMetadata{ Just "rtl" -> Just RTL _ -> Nothing +-- | Produce an EPUB2 file from a Pandoc document. +writeEPUB2 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB2 = writeEPUB EPUB2 + +-- | Produce an EPUB3 file from a Pandoc document. +writeEPUB3 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB3 = writeEPUB EPUB3 + -- | Produce an EPUB file from a Pandoc document. -writeEPUB :: WriterOptions -- ^ Writer options +writeEPUB :: PandocMonad m + => EPUBVersion + -> WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO B.ByteString -writeEPUB opts doc@(Pandoc meta _) = do - let version = fromMaybe EPUB2 (writerEpubVersion opts) + -> m B.ByteString +writeEPUB epubVersion opts doc = + let initState = EPUBState { stMediaPaths = [] + } + in + evalStateT (pandocToEPUB epubVersion opts doc) + initState + +pandocToEPUB :: PandocMonad m + => EPUBVersion + -> WriterOptions + -> Pandoc + -> E m B.ByteString +pandocToEPUB version opts doc@(Pandoc meta _) = do let epub3 = version == EPUB3 - epochtime <- floor `fmap` getPOSIXTime + let writeHtml o = fmap UTF8.fromStringLazy . + writeHtmlStringForEPUB version o + epochtime <- floor <$> lift P.getPOSIXTime let mkEntry path content = toEntry path epochtime content let vars = ("epub3", if epub3 then "true" else "false") : ("css", "stylesheet.css") : writerVariables opts let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerSectionDivs = True - , writerHtml5 = epub3 , writerVariables = vars , writerHTMLMathMethod = if epub3 @@ -358,32 +393,31 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> return ([],[]) Just img -> do let coverImage = "media/" ++ takeFileName img - let cpContent = renderHtml $ writeHtml + cpContent <- lift $ writeHtml opts'{ writerVariables = ("coverpage","true"):vars } (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) - imgContent <- B.readFile img + imgContent <- lift $ P.readFileLazy img return ( [mkEntry "cover.xhtml" cpContent] , [mkEntry coverImage imgContent] ) -- title page - let tpContent = renderHtml $ writeHtml opts'{ - writerVariables = ("titlepage","true"):vars } - (Pandoc meta []) + tpContent <- lift $ writeHtml opts'{ + writerVariables = ("titlepage","true"):vars } + (Pandoc meta []) let tpEntry = mkEntry "title_page.xhtml" tpContent -- handle pictures - mediaRef <- newIORef [] - Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= - walkM (transformBlock opts' mediaRef) - picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef - + -- mediaRef <- P.newIORef [] + Pandoc _ blocks <- walkM (transformInline opts') doc >>= + walkM (transformBlock opts') + picEntries <- (catMaybes . map (snd . snd)) <$> (gets stMediaPaths) -- handle fonts let matchingGlob f = do - xs <- namesMatching f + xs <- lift $ P.glob f when (null xs) $ - warn $ f ++ " did not match any font files." + lift $ P.warning $ f ++ " did not match any font files." return xs - let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f + let mkFontEntry f = mkEntry (takeFileName f) `fmap` (lift $ P.readFileLazy f) fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') fontEntries <- mapM mkFontEntry fontFiles @@ -467,20 +501,18 @@ writeEPUB opts doc@(Pandoc meta _) = do Chapter mbnum $ walk fixInternalReferences bs) chapters' - let chapToEntry :: Int -> Chapter -> Entry - chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) - $ renderHtml - $ writeHtml opts'{ writerNumberOffset = - fromMaybe [] mbnum } - $ case bs of - (Header _ _ xs : _) -> - -- remove notes or we get doubled footnotes - Pandoc (setMeta "title" (walk removeNote $ fromList xs) - nullMeta) bs - _ -> - Pandoc nullMeta bs - - let chapterEntries = zipWith chapToEntry [1..] chapters + let chapToEntry num (Chapter mbnum bs) = + mkEntry (showChapter num) <$> + (writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum } + $ case bs of + (Header _ _ xs : _) -> + -- remove notes or we get doubled footnotes + Pandoc (setMeta "title" (walk removeNote $ fromList xs) + nullMeta) bs + _ -> + Pandoc nullMeta bs) + + chapterEntries <- lift $ zipWithM chapToEntry [1..] chapters -- incredibly inefficient (TODO): let containsMathML ent = epub3 && @@ -517,10 +549,10 @@ writeEPUB opts doc@(Pandoc meta _) = do let tocTitle = fromMaybe plainTitle $ metaValueToString <$> lookupMeta "toc-title" meta - let uuid = case epubIdentifier metadata of - (x:_) -> identifierText x -- use first identifier as UUID - [] -> error "epubIdentifier is null" -- shouldn't happen - currentTime <- getCurrentTime + uuid <- case epubIdentifier metadata of + (x:_) -> return $ identifierText x -- use first identifier as UUID + [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen + currentTime <- lift $ P.getCurrentTime let contentsData = UTF8.fromStringLazy $ ppTopElement $ unode "package" ! [("version", case version of EPUB2 -> "2.0" @@ -575,8 +607,9 @@ writeEPUB opts doc@(Pandoc meta _) = do let tocLevel = writerTOCDepth opts - let navPointNode :: (Int -> String -> String -> [Element] -> Element) - -> S.Element -> State Int Element + let navPointNode :: PandocMonad m + => (Int -> String -> String -> [Element] -> Element) + -> S.Element -> StateT Int m Element navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do n <- get modify (+1) @@ -586,15 +619,15 @@ writeEPUB opts doc@(Pandoc meta _) = do let tit = if writerNumberSections opts && not (null nums) then showNums nums ++ " " ++ tit' else tit' - let src = case lookup ident reftable of - Just x -> x - Nothing -> error (ident ++ " not found in reftable") + src <- case lookup ident reftable of + Just x -> return x + Nothing -> throwError $ PandocSomeError $ ident ++ " not found in reftable" let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel isSec _ = False let subsecs = filter isSec children subs <- mapM (navPointNode formatter) subsecs return $ formatter n tit src subs - navPointNode _ (S.Blk _) = error "navPointNode encountered Blk" + navPointNode _ (S.Blk _) = throwError $ PandocSomeError "navPointNode encountered Blk" let navMapFormatter :: Int -> String -> String -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! @@ -607,6 +640,7 @@ writeEPUB opts doc@(Pandoc meta _) = do [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) , unode "content" ! [("src","title_page.xhtml")] $ () ] + navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1 let tocData = UTF8.fromStringLazy $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ @@ -625,7 +659,7 @@ writeEPUB opts doc@(Pandoc meta _) = do ("content", toId img)] $ ()] , unode "docTitle" $ unode "text" $ plainTitle , unode "navMap" $ - tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1 + tpNode : navMap ] let tocEntry = mkEntry "toc.ncx" tocData @@ -639,11 +673,12 @@ writeEPUB opts doc@(Pandoc meta _) = do (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if epub3 then "nav" else "div" + tocBlocks <- lift $ evalStateT (mapM (navPointNode navXhtmlFormatter) secs) 1 let navBlocks = [RawBlock (Format "html") $ ppElement $ unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle - , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]] + , unode "ol" ! [("class","toc")] $ tocBlocks ]] let landmarks = if epub3 then [RawBlock (Format "html") $ ppElement $ unode "nav" ! [("epub:type","landmarks") @@ -664,8 +699,7 @@ writeEPUB opts doc@(Pandoc meta _) = do ] ] else [] - let navData = renderHtml $ writeHtml - opts'{ writerVariables = ("navpage","true"):vars } + navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):vars } (Pandoc (setMeta "title" (walk removeNote $ fromList $ docTitle' meta) nullMeta) (navBlocks ++ landmarks)) @@ -692,10 +726,10 @@ writeEPUB opts doc@(Pandoc meta _) = do -- stylesheet stylesheet <- case epubStylesheet metadata of - Just (StylesheetPath fp) -> UTF8.readFile fp + Just (StylesheetPath fp) -> UTF8.toStringLazy <$> (lift $ P.readFileLazy fp) Just (StylesheetContents s) -> return s Nothing -> UTF8.toString `fmap` - readDataFile (writerUserDataDir opts) "epub.css" + (lift $ P.readDataFile (writerUserDataDir opts) "epub.css") let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet -- construct archive @@ -811,79 +845,79 @@ metadataElement version md currentTime = showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media +transformTag :: PandocMonad m + => WriterOptions + -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Tag String - -> IO (Tag String) -transformTag opts mediaRef tag@(TagOpen name attr) + -> E m (Tag String) +transformTag opts tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && lookup "data-external" attr == Nothing = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef opts mediaRef src - newposter <- modifyMediaRef opts mediaRef poster + newsrc <- modifyMediaRef opts src + newposter <- modifyMediaRef opts poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ [("src", newsrc) | not (null newsrc)] ++ [("poster", newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ _ tag = return tag +transformTag _ tag = return tag -modifyMediaRef :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] +modifyMediaRef :: PandocMonad m + => WriterOptions -> FilePath - -> IO FilePath -modifyMediaRef _ _ "" = return "" -modifyMediaRef opts mediaRef oldsrc = do - media <- readIORef mediaRef + -> E m FilePath +modifyMediaRef _ "" = return "" +modifyMediaRef opts oldsrc = do + media <- gets stMediaPaths case lookup oldsrc media of Just (n,_) -> return n - Nothing -> do - res <- fetchItem' (writerMediaBag opts) - (writerSourceURL opts) oldsrc - (new, mbEntry) <- - case res of - Left _ -> do - warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." - return (oldsrc, Nothing) - Right (img,mbMime) -> do - let new = "media/file" ++ show (length media) ++ - fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) - (('.':) <$> (mbMime >>= extensionFromMimeType)) - epochtime <- floor `fmap` getPOSIXTime - let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img - return (new, Just entry) - modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): ) - return new - -transformBlock :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media + Nothing -> catchError + (do (img, mbMime) <- P.fetchItem (writerSourceURL opts) oldsrc + let new = "media/file" ++ show (length media) ++ + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) + epochtime <- floor `fmap` lift P.getPOSIXTime + let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img + modify $ \st -> st{ stMediaPaths = + (oldsrc, (new, Just entry)):media} + return new) + (\e -> do + P.warning $ "Could not find media `" ++ oldsrc ++ + "', skipping...\n" ++ show e + return oldsrc) + +transformBlock :: PandocMonad m + => WriterOptions + -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Block - -> IO Block -transformBlock opts mediaRef (RawBlock fmt raw) + -> E m Block +transformBlock opts (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM (transformTag opts) tags return $ RawBlock fmt (renderTags' tags') -transformBlock _ _ b = return b +transformBlock _ b = return b -transformInline :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media +transformInline :: PandocMonad m + => WriterOptions + -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media -> Inline - -> IO Inline -transformInline opts mediaRef (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef opts mediaRef src + -> E m Inline +transformInline opts (Image attr lab (src,tit)) = do + newsrc <- modifyMediaRef opts src return $ Image attr lab (newsrc, tit) -transformInline opts mediaRef (x@(Math t m)) +transformInline opts (x@(Math t m)) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef opts mediaRef (url ++ urlEncode m) + newsrc <- modifyMediaRef opts (url ++ urlEncode m) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")] -transformInline opts mediaRef (RawInline fmt raw) +transformInline opts (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM (transformTag opts) tags return $ RawInline fmt (renderTags' tags') -transformInline _ _ x = return x +transformInline _ x = return x (!) :: (t -> Element) -> [(String, String)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 5538ca061..600d34499 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -27,27 +27,28 @@ FictionBook is an XML-based e-book format. For more information see: -} module Text.Pandoc.Writers.FB2 (writeFB2) where -import Control.Monad.State (StateT, evalStateT, get, modify) -import Control.Monad.State (liftM, liftM2, liftIO) +import Control.Monad.State (StateT, evalStateT, get, modify, lift) +import Control.Monad.State (liftM) import Data.ByteString.Base64 (encode) import Data.Char (toLower, isSpace, isAscii, isControl) import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix) import Data.Either (lefts, rights) -import Network.Browser (browse, request, setAllowRedirects, setOutHandler) -import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody) -import Network.HTTP (lookupHeader, HeaderName(..), urlEncode) -import Network.URI (isURI, unEscapeString) -import System.FilePath (takeExtension) +import Network.HTTP (urlEncode) +import Network.URI (isURI) import Text.XML.Light -import qualified Control.Exception as E -import qualified Data.ByteString as B import qualified Text.XML.Light as X import qualified Text.XML.Light.Cursor as XC +import qualified Data.ByteString.Char8 as B8 +import Control.Monad.Except (throwError, catchError) + import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize, linesToPara) +import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -60,7 +61,7 @@ data FbRenderState = FbRenderState } deriving (Show) -- | FictionBook building monad. -type FBM = StateT FbRenderState IO +type FBM m = StateT FbRenderState m newFB :: FbRenderState newFB = FbRenderState { footnotes = [], imagesToFetch = [] @@ -73,17 +74,24 @@ instance Show ImageMode where show InlineImage = "inlineImageType" -- | Produce an FB2 document from a 'Pandoc' document. -writeFB2 :: WriterOptions -- ^ conversion options +writeFB2 :: PandocMonad m + => WriterOptions -- ^ conversion options -> Pandoc -- ^ document to convert - -> IO String -- ^ FictionBook2 document (not encoded yet) -writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do + -> m String -- ^ FictionBook2 document (not encoded yet) +writeFB2 opts doc = flip evalStateT newFB $ pandocToFB2 opts doc + +pandocToFB2 :: PandocMonad m + => WriterOptions + -> Pandoc + -> FBM m String +pandocToFB2 opts (Pandoc meta blocks) = do modify (\s -> s { writerOptions = opts }) desc <- description meta fp <- frontpage meta secs <- renderSections 1 blocks let body = el "body" $ fp ++ secs notes <- renderFootnotes - (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s) + (imgs,missing) <- liftM imagesToFetch get >>= \s -> lift (fetchImages s) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) return $ xml_head ++ (showContent fb2_xml) ++ "\n" @@ -94,62 +102,67 @@ writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do xlink = "http://www.w3.org/1999/xlink" in [ uattr "xmlns" xmlns , attr ("xmlns", "l") xlink ] - -- - frontpage :: Meta -> FBM [Content] - frontpage meta' = do - t <- cMapM toXml . docTitle $ meta' - return $ - [ el "title" (el "p" t) - , el "annotation" (map (el "p" . cMap plain) - (docAuthors meta' ++ [docDate meta'])) - ] - description :: Meta -> FBM Content - description meta' = do - bt <- booktitle meta' - let as = authors meta' - dd <- docdate meta' - return $ el "description" - [ el "title-info" (bt ++ as ++ dd) - , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version - ] - booktitle :: Meta -> FBM [Content] - booktitle meta' = do - t <- cMapM toXml . docTitle $ meta' - return $ if null t - then [] - else [ el "book-title" t ] - authors :: Meta -> [Content] - authors meta' = cMap author (docAuthors meta') - author :: [Inline] -> [Content] - author ss = - let ws = words . cMap plain $ ss - email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws) - ws' = filter ('@' `notElem`) ws - names = case ws' of - (nickname:[]) -> [ el "nickname" nickname ] - (fname:lname:[]) -> [ el "first-name" fname - , el "last-name" lname ] - (fname:rest) -> [ el "first-name" fname - , el "middle-name" (concat . init $ rest) - , el "last-name" (last rest) ] - ([]) -> [] - in list $ el "author" (names ++ email) - docdate :: Meta -> FBM [Content] - docdate meta' = do - let ss = docDate meta' - d <- cMapM toXml ss - return $ if null d - then [] - else [el "date" d] + +frontpage :: PandocMonad m => Meta -> FBM m [Content] +frontpage meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ + [ el "title" (el "p" t) + , el "annotation" (map (el "p" . cMap plain) + (docAuthors meta' ++ [docDate meta'])) + ] + +description :: PandocMonad m => Meta -> FBM m Content +description meta' = do + bt <- booktitle meta' + let as = authors meta' + dd <- docdate meta' + return $ el "description" + [ el "title-info" (bt ++ as ++ dd) + , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version + ] + +booktitle :: PandocMonad m => Meta -> FBM m [Content] +booktitle meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ if null t + then [] + else [ el "book-title" t ] + +authors :: Meta -> [Content] +authors meta' = cMap author (docAuthors meta') + +author :: [Inline] -> [Content] +author ss = + let ws = words . cMap plain $ ss + email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws) + ws' = filter ('@' `notElem`) ws + names = case ws' of + (nickname:[]) -> [ el "nickname" nickname ] + (fname:lname:[]) -> [ el "first-name" fname + , el "last-name" lname ] + (fname:rest) -> [ el "first-name" fname + , el "middle-name" (concat . init $ rest) + , el "last-name" (last rest) ] + ([]) -> [] + in list $ el "author" (names ++ email) + +docdate :: PandocMonad m => Meta -> FBM m [Content] +docdate meta' = do + let ss = docDate meta' + d <- cMapM toXml ss + return $ if null d + then [] + else [el "date" d] -- | Divide the stream of blocks into sections and convert to XML -- representation. -renderSections :: Int -> [Block] -> FBM [Content] +renderSections :: PandocMonad m => Int -> [Block] -> FBM m [Content] renderSections level blocks = do let secs = splitSections level blocks mapM (renderSection level) secs -renderSection :: Int -> ([Inline], [Block]) -> FBM Content +renderSection :: PandocMonad m => Int -> ([Inline], [Block]) -> FBM m Content renderSection level (ttl, body) = do title <- if null ttl then return [] @@ -196,7 +209,7 @@ splitSections level blocks = reverse $ revSplit (reverse blocks) sameLevel _ = False -- | Make another FictionBook body with footnotes. -renderFootnotes :: FBM [Content] +renderFootnotes :: PandocMonad m => FBM m [Content] renderFootnotes = do fns <- footnotes `liftM` get if null fns @@ -210,14 +223,14 @@ renderFootnotes = do -- | Fetch images and encode them for the FictionBook XML. -- Return image data and a list of hrefs of the missing images. -fetchImages :: [(String,String)] -> IO ([Content],[String]) +fetchImages :: PandocMonad m => [(String,String)] -> m ([Content],[String]) fetchImages links = do imgs <- mapM (uncurry fetchImage) links return $ (rights imgs, lefts imgs) -- | Fetch image data from disk or from network and make a <binary> XML section. -- Return either (Left hrefOfMissingImage) or (Right xmlContent). -fetchImage :: String -> String -> IO (Either String Content) +fetchImage :: PandocMonad m => String -> String -> m (Either String Content) fetchImage href link = do mbimg <- case (isURI link, readDataURI link) of @@ -227,16 +240,19 @@ fetchImage href link = do then return (Just (mime',base64)) else return Nothing (True, Just _) -> return Nothing -- not base64-encoded - (True, Nothing) -> fetchURL link - (False, _) -> do - d <- nothingOnError $ B.readFile (unEscapeString link) - let t = case map toLower (takeExtension link) of - ".png" -> Just "image/png" - ".jpg" -> Just "image/jpeg" - ".jpeg" -> Just "image/jpeg" - ".jpe" -> Just "image/jpeg" - _ -> Nothing -- only PNG and JPEG are supported in FB2 - return $ liftM2 (,) t (liftM (toStr . encode) d) + _ -> do + catchError (do (bs, mbmime) <- P.fetchItem Nothing link + case mbmime of + Nothing -> do + P.warning ("Could not determine mime type for " + ++ link) + return Nothing + Just mime -> return $ Just (mime, + B8.unpack $ encode bs)) + (\e -> + do P.warning ("Could not fetch " ++ link ++ + ":\n" ++ show e) + return Nothing) case mbimg of Just (imgtype, imgdata) -> do return . Right $ el "binary" @@ -244,11 +260,7 @@ fetchImage href link = do , uattr "content-type" imgtype] , txt imgdata ) _ -> return (Left ('#':href)) - where - nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString)) - nothingOnError action = liftM Just action `E.catch` omnihandler - omnihandler :: E.SomeException -> IO (Maybe B.ByteString) - omnihandler _ = return Nothing + -- | Extract mime type and encoded data from the Data URI. readDataURI :: String -- ^ URI @@ -286,24 +298,6 @@ isMimeType s = valid c = isAscii c && not (isControl c) && not (isSpace c) && c `notElem` "()<>@,;:\\\"/[]?=" --- | Fetch URL, return its Content-Type and binary data on success. -fetchURL :: String -> IO (Maybe (String, String)) -fetchURL url = do - flip catchIO_ (return Nothing) $ do - r <- browse $ do - setOutHandler (const (return ())) - setAllowRedirects True - liftM snd . request . getRequest $ url - let content_type = lookupHeader HdrContentType (getHeaders r) - content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r - return $ liftM2 (,) content_type content - -toBS :: String -> B.ByteString -toBS = B.pack . map (toEnum . fromEnum) - -toStr :: B.ByteString -> String -toStr = map (toEnum . fromEnum) . B.unpack - footnoteID :: Int -> String footnoteID i = "n" ++ (show i) @@ -311,7 +305,7 @@ linkID :: Int -> String linkID i = "l" ++ (show i) -- | Convert a block-level Pandoc's element to FictionBook XML representation. -blockToXml :: Block -> FBM [Content] +blockToXml :: PandocMonad m => Block -> FBM m [Content] blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula -- title beginning with fig: indicates that the image is a figure @@ -364,7 +358,7 @@ blockToXml (DefinitionList defs) = needsBreak (Plain ins) = LineBreak `notElem` ins needsBreak _ = True blockToXml (Header _ _ _) = -- should never happen, see renderSections - error "unexpected header in section text" + throwError $ PandocShouldNeverHappenError "unexpected header in section text" blockToXml HorizontalRule = return [ el "empty-line" () , el "p" (txt (replicate 10 '—')) @@ -375,11 +369,11 @@ blockToXml (Table caption aligns _ headers rows) = do c <- return . el "emphasis" =<< cMapM toXml caption return [el "table" (hd : bd), el "p" c] where - mkrow :: String -> [TableCell] -> [Alignment] -> FBM Content + mkrow :: PandocMonad m => String -> [TableCell] -> [Alignment] -> FBM m Content mkrow tag cells aligns' = (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns')) -- - mkcell :: String -> (TableCell, Alignment) -> FBM Content + mkcell :: PandocMonad m => String -> (TableCell, Alignment) -> FBM m Content mkcell tag (cell, align) = do cblocks <- cMapM blockToXml cell return $ el tag ([align_attr align], cblocks) @@ -423,7 +417,7 @@ indent = indentBlock in intercalate [LineBreak] $ map ((Str spacer):) lns -- | Convert a Pandoc's Inline element to FictionBook XML representation. -toXml :: Inline -> FBM [Content] +toXml :: PandocMonad m => Inline -> FBM m [Content] toXml (Str s) = return [txt s] toXml (Span _ ils) = cMapM toXml ils toXml (Emph ss) = list `liftM` wrap "emphasis" ss @@ -474,7 +468,7 @@ toXml (Note bs) = do , uattr "type" "note" ] , fn_ref ) -insertMath :: ImageMode -> String -> FBM [Content] +insertMath :: PandocMonad m => ImageMode -> String -> FBM m [Content] insertMath immode formula = do htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get case htmlMath of @@ -485,7 +479,7 @@ insertMath immode formula = do insertImage immode img _ -> return [el "code" formula] -insertImage :: ImageMode -> Inline -> FBM [Content] +insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] insertImage immode (Image _ alt (url,ttl)) = do images <- imagesToFetch `liftM` get let n = 1 + length images @@ -551,7 +545,7 @@ replaceImagesWithAlt missingHrefs body = -- | Wrap all inlines with an XML tag (given its unqualified name). -wrap :: String -> [Inline] -> FBM Content +wrap :: PandocMonad m => String -> [Inline] -> FBM m Content wrap tagname inlines = el tagname `liftM` cMapM toXml inlines -- " Create a singleton list. diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 3c8c264d2..9037bfbec 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -28,15 +28,27 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to HTML. -} -module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where +module Text.Pandoc.Writers.HTML ( + writeHtml4, + writeHtml4String, + writeHtml5, + writeHtml5String, + writeHtmlStringForEPUB, + writeS5, + writeSlidy, + writeSlideous, + writeDZSlides, + writeRevealJs + ) where import Text.Pandoc.Definition +import Text.Pandoc.Walk import Data.Monoid ((<>)) import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.ImageSize import Text.Pandoc.Templates -import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Writers.Math import Text.Pandoc.Slides import Text.Pandoc.Highlighting ( highlight, styleToCss, formatHtmlInline, formatHtmlBlock ) @@ -68,6 +80,9 @@ import Text.XML.Light (unode, elChildren, unqual) import qualified Text.XML.Light as XML import System.FilePath (takeExtension) import Data.Aeson (Value) +import Control.Monad.Except (throwError) +import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes @@ -76,12 +91,17 @@ data WriterState = WriterState , stHighlighting :: Bool -- ^ Syntax highlighting is used , stSecNum :: [Int] -- ^ Number of current section , stElement :: Bool -- ^ Processing an Element + , stHtml5 :: Bool -- ^ Use HTML5 + , stEPUBVersion :: Maybe EPUBVersion -- ^ EPUB version if for epub + , stSlideVariant :: HTMLSlideVariant } defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, stHighlighting = False, stSecNum = [], - stElement = False} + stElement = False, stHtml5 = False, + stEPUBVersion = Nothing, + stSlideVariant = NoSlides} -- Helpers to render HTML with the appropriate function. @@ -98,28 +118,91 @@ nl opts = if writerWrapText opts == WrapNone then mempty else preEscapedString "\n" --- | Convert Pandoc document to Html string. -writeHtmlString :: WriterOptions -> Pandoc -> String -writeHtmlString opts d = - let (body, context) = evalState (pandocToHtml opts d) defaultWriterState - in case writerTemplate opts of - Nothing -> renderHtml body - Just tpl -> renderTemplate' tpl $ - defField "body" (renderHtml body) context - --- | Convert Pandoc document to Html structure. -writeHtml :: WriterOptions -> Pandoc -> Html -writeHtml opts d = - let (body, context) = evalState (pandocToHtml opts d) defaultWriterState - in case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl $ - defField "body" (renderHtml body) context +-- | Convert Pandoc document to Html 5 string. +writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeHtml5String = writeHtmlString' + defaultWriterState{ stHtml5 = True } + +-- | Convert Pandoc document to Html 5 structure. +writeHtml5 :: PandocMonad m => WriterOptions -> Pandoc -> m Html +writeHtml5 = writeHtml' defaultWriterState{ stHtml5 = True } + +-- | Convert Pandoc document to Html 4 string. +writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeHtml4String = writeHtmlString' + defaultWriterState{ stHtml5 = False } + +-- | Convert Pandoc document to Html 4 structure. +writeHtml4 :: PandocMonad m => WriterOptions -> Pandoc -> m Html +writeHtml4 = writeHtml' defaultWriterState{ stHtml5 = False } + +-- | Convert Pandoc document to Html appropriate for an epub version. +writeHtmlStringForEPUB :: PandocMonad m + => EPUBVersion -> WriterOptions -> Pandoc -> m String +writeHtmlStringForEPUB version = writeHtmlString' + defaultWriterState{ stHtml5 = version == EPUB3, + stEPUBVersion = Just version } + +-- | Convert Pandoc document to Reveal JS HTML slide show. +writeRevealJs :: PandocMonad m + => WriterOptions -> Pandoc -> m String +writeRevealJs = writeHtmlSlideShow' RevealJsSlides + +-- | Convert Pandoc document to S5 HTML slide show. +writeS5 :: PandocMonad m + => WriterOptions -> Pandoc -> m String +writeS5 = writeHtmlSlideShow' S5Slides + +-- | Convert Pandoc document to Slidy HTML slide show. +writeSlidy :: PandocMonad m + => WriterOptions -> Pandoc -> m String +writeSlidy = writeHtmlSlideShow' SlidySlides + +-- | Convert Pandoc document to Slideous HTML slide show. +writeSlideous :: PandocMonad m + => WriterOptions -> Pandoc -> m String +writeSlideous = writeHtmlSlideShow' SlideousSlides + +-- | Convert Pandoc document to DZSlides HTML slide show. +writeDZSlides :: PandocMonad m + => WriterOptions -> Pandoc -> m String +writeDZSlides = writeHtmlSlideShow' DZSlides + +writeHtmlSlideShow' :: PandocMonad m + => HTMLSlideVariant -> WriterOptions -> Pandoc -> m String +writeHtmlSlideShow' variant = writeHtmlString' + defaultWriterState{ stSlideVariant = variant + , stHtml5 = case variant of + RevealJsSlides -> True + S5Slides -> False + SlidySlides -> False + DZSlides -> True + SlideousSlides -> False + NoSlides -> False + } + +writeHtmlString' :: PandocMonad m + => WriterState -> WriterOptions -> Pandoc -> m String +writeHtmlString' st opts d = do + (body, context) <- evalStateT (pandocToHtml opts d) st + return $ case writerTemplate opts of + Nothing -> renderHtml body + Just tpl -> renderTemplate' tpl $ + defField "body" (renderHtml body) context + +writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html +writeHtml' st opts d = do + (body, context) <- evalStateT (pandocToHtml opts d) st + return $ case writerTemplate opts of + Nothing -> body + Just tpl -> renderTemplate' tpl $ + defField "body" (renderHtml body) context -- result is (title, authors, date, toc, body, new variables) -pandocToHtml :: WriterOptions +pandocToHtml :: PandocMonad m + => WriterOptions -> Pandoc - -> State WriterState (Html, Value) + -> StateT WriterState m (Html, Value) pandocToHtml opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (fmap renderHtml . blockListToHtml opts) @@ -129,18 +212,19 @@ pandocToHtml opts (Pandoc meta blocks) = do let authsMeta = map stringifyHTML $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts + slideVariant <- gets stSlideVariant let sects = hierarchicalize $ - if writerSlideVariant opts == NoSlides + if slideVariant == NoSlides then blocks else prepSlides slideLevel blocks - toc <- if writerTableOfContents opts + toc <- if writerTableOfContents opts && slideVariant /= S5Slides then tableOfContents opts sects else return Nothing blocks' <- liftM (mconcat . intersperse (nl opts)) $ mapM (elementToHtml slideLevel opts) sects st <- get - let notes = reverse (stNotes st) - let thebody = blocks' >> footnoteSection opts notes + notes <- footnoteSection opts (reverse (stNotes st)) + let thebody = blocks' >> notes let math = case writerHTMLMathMethod opts of LaTeXMathML (Just url) -> H.script ! A.src (toValue url) @@ -153,7 +237,7 @@ pandocToHtml opts (Pandoc meta blocks) = do MathJax url -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" - $ case writerSlideVariant opts of + $ case slideVariant of SlideousSlides -> preEscapedString "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" @@ -167,15 +251,17 @@ pandocToHtml opts (Pandoc meta blocks) = do (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) _ -> case lookup "mathml-script" (writerVariables opts) of - Just s | not (writerHtml5 opts) -> + Just s | not (stHtml5 st) -> H.script ! A.type_ "text/javascript" $ preEscapedString ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n") | otherwise -> mempty Nothing -> mempty let context = (if stHighlighting st - then defField "highlighting-css" - (styleToCss $ writerHighlightStyle opts) + then case writerHighlightStyle opts of + Just sty -> defField "highlighting-css" + (styleToCss sty) + Nothing -> id else id) $ (if stMath st then defField "math" (renderHtml math) @@ -192,7 +278,7 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "slideous-url" ("slideous" :: String) $ defField "revealjs-url" ("reveal.js" :: String) $ defField "s5-url" ("s5/default" :: String) $ - defField "html5" (writerHtml5 opts) $ + defField "html5" (stHtml5 st) $ metadata return (thebody, context) @@ -203,33 +289,41 @@ prefixedId opts s = "" -> mempty _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s -toList :: (Html -> Html) -> WriterOptions -> ([Html] -> Html) +toList :: PandocMonad m + => (Html -> Html) + -> WriterOptions + -> [Html] + -> StateT WriterState m Html toList listop opts items = do - if (writerIncremental opts) - then if (writerSlideVariant opts /= RevealJsSlides) - then (listop $ mconcat items) ! A.class_ "incremental" - else listop $ mconcat $ map (! A.class_ "fragment") items - else listop $ mconcat items + slideVariant <- gets stSlideVariant + return $ + if (writerIncremental opts) + then if (slideVariant /= RevealJsSlides) + then (listop $ mconcat items) ! A.class_ "incremental" + else listop $ mconcat $ map (! A.class_ "fragment") items + else listop $ mconcat items -unordList :: WriterOptions -> [Html] -> Html +unordList :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html unordList opts = toList H.ul opts . toListItems opts -ordList :: WriterOptions -> [Html] -> Html +ordList :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html ordList opts = toList H.ol opts . toListItems opts -defList :: WriterOptions -> [Html] -> Html +defList :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html defList opts items = toList H.dl opts (items ++ [nl opts]) -- | Construct table of contents from list of elements. -tableOfContents :: WriterOptions -> [Element] -> State WriterState (Maybe Html) +tableOfContents :: PandocMonad m => WriterOptions -> [Element] -> StateT WriterState m (Maybe Html) tableOfContents _ [] = return Nothing tableOfContents opts sects = do - let opts' = opts { writerIgnoreNotes = True } - contents <- mapM (elementToListItem opts') sects + contents <- mapM (elementToListItem opts) sects let tocList = catMaybes contents - return $ if null tocList - then Nothing - else Just $ unordList opts tocList + if null tocList + then return Nothing + else Just <$> unordList opts tocList -- | Convert section number to string showSecNum :: [Int] -> String @@ -237,7 +331,7 @@ showSecNum = concat . intersperse "." . map show -- | Converts an Element to a list item for a table of contents, -- retrieving the appropriate identifier from state. -elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html) +elementToListItem :: PandocMonad m => WriterOptions -> Element -> StateT WriterState m (Maybe Html) -- Don't include the empty headers created in slide shows -- shows when an hrule is used to separate slides without a new title: elementToListItem _ (Sec _ _ _ [Str "\0"] _) = return Nothing @@ -249,13 +343,14 @@ elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs) then (H.span ! A.class_ "toc-section-number" $ toHtml $ showSecNum num') >> preEscapedString " " else mempty - txt <- liftM (sectnum >>) $ inlineListToHtml opts headerText + txt <- liftM (sectnum >>) $ inlineListToHtml opts $ walk deNote headerText subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes - let subList = if null subHeads - then mempty - else unordList opts subHeads + subList <- if null subHeads + then return mempty + else unordList opts subHeads -- in reveal.js, we need #/apples, not #apples: - let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides] + slideVariant <- gets stSlideVariant + let revealSlash = ['/' | slideVariant== RevealJsSlides] return $ Just $ if null id' then (H.a $ toHtml txt) >> subList @@ -265,12 +360,14 @@ elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs) elementToListItem _ _ = return Nothing -- | Convert an Element to Html. -elementToHtml :: Int -> WriterOptions -> Element -> State WriterState Html +elementToHtml :: PandocMonad m => Int -> WriterOptions -> Element -> StateT WriterState m Html elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elements) = do - let slide = writerSlideVariant opts /= NoSlides && level <= slideLevel + slideVariant <- gets stSlideVariant + let slide = slideVariant /= NoSlides && level <= slideLevel let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0) modify $ \st -> st{stSecNum = num'} -- update section number + html5 <- gets stHtml5 let titleSlide = slide && level < slideLevel header' <- if title' == [Str "\0"] -- marker for hrule then return mempty @@ -285,7 +382,7 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen isSec (Blk _) = False let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] isPause _ = False - let fragmentClass = case writerSlideVariant opts of + let fragmentClass = case slideVariant of RevealJsSlides -> "fragment" _ -> "incremental" let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\"" @@ -301,15 +398,15 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && - not (writerHtml5 opts) ] ++ + not html5 ] ++ ["level" ++ show level | slide || writerSectionDivs opts ] ++ classes - let secttag = if writerHtml5 opts + let secttag = if html5 then H5.section else H.div let attr = (id',classes',keyvals) return $ if titleSlide - then (if writerSlideVariant opts == RevealJsSlides + then (if slideVariant == RevealJsSlides then H5.section else id) $ mconcat $ (addAttrs opts attr $ secttag $ header') : innerContents @@ -321,19 +418,23 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen -- | Convert list of Note blocks to a footnote <div>. -- Assumes notes are sorted. -footnoteSection :: WriterOptions -> [Html] -> Html -footnoteSection opts notes = - if null notes - then mempty - else nl opts >> (container - $ nl opts >> hrtag >> nl opts >> - H.ol (mconcat notes >> nl opts) >> nl opts) - where container x = if writerHtml5 opts - then H5.section ! A.class_ "footnotes" $ x - else if writerSlideVariant opts /= NoSlides - then H.div ! A.class_ "footnotes slide" $ x - else H.div ! A.class_ "footnotes" $ x - hrtag = if writerHtml5 opts then H5.hr else H.hr +footnoteSection :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html +footnoteSection opts notes = do + html5 <- gets stHtml5 + slideVariant <- gets stSlideVariant + let hrtag = if html5 then H5.hr else H.hr + let container x = if html5 + then H5.section ! A.class_ "footnotes" $ x + else if slideVariant /= NoSlides + then H.div ! A.class_ "footnotes slide" $ x + else H.div ! A.class_ "footnotes" $ x + return $ + if null notes + then mempty + else nl opts >> (container + $ nl opts >> hrtag >> nl opts >> + H.ol (mconcat notes >> nl opts) >> nl opts) -- | Parse a mailto link; return Just (name, domain) or Nothing. parseMailto :: String -> Maybe (String, String) @@ -346,9 +447,9 @@ parseMailto s = do _ -> fail "not a mailto: URL" -- | Obfuscate a "mailto:" link. -obfuscateLink :: WriterOptions -> Attr -> Html -> String -> Html +obfuscateLink :: PandocMonad m => WriterOptions -> Attr -> Html -> String -> m Html obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = - addAttrs opts attr $ H.a ! A.href (toValue s) $ txt + return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ txt obfuscateLink opts attr (renderHtml -> txt) s = let meth = writerEmailObfuscation opts s' = map toLower (take 7 s) ++ drop 7 s @@ -364,9 +465,11 @@ obfuscateLink opts attr (renderHtml -> txt) s = in case meth of ReferenceObfuscation -> -- need to use preEscapedString or &'s are escaped to & in URL + return $ preEscapedString $ "<a href=\"" ++ (obfuscateString s') ++ "\" class=\"email\">" ++ (obfuscateString txt) ++ "</a>" JavascriptObfuscation -> + return $ (H.script ! A.type_ "text/javascript" $ preEscapedString ("\n<!--\nh='" ++ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ @@ -374,8 +477,8 @@ obfuscateLink opts attr (renderHtml -> txt) s = "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail\">'+" ++ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >> H.noscript (preEscapedString $ obfuscateString altText) - _ -> error $ "Unknown obfuscation method: " ++ show meth - _ -> addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email + _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth + _ -> return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String @@ -434,19 +537,20 @@ treatAsImage fp = in null ext || ext `elem` imageExts -- | Convert Pandoc block element to HTML. -blockToHtml :: WriterOptions -> Block -> State WriterState Html +blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html blockToHtml _ Null = return mempty blockToHtml opts (Plain lst) = inlineListToHtml opts lst -- title beginning with fig: indicates that the image is a figure blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = do img <- inlineToHtml opts (Image attr txt (s,tit)) - let tocapt = if writerHtml5 opts + html5 <- gets stHtml5 + let tocapt = if html5 then H5.figcaption else H.p ! A.class_ "caption" capt <- if null txt then return mempty else tocapt `fmap` inlineListToHtml opts txt - return $ if writerHtml5 opts + return $ if html5 then H5.figure $ mconcat [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat @@ -467,17 +571,19 @@ blockToHtml opts (LineBlock lns) = htmlLines <- mconcat . intersperse lf <$> mapM (inlineListToHtml opts) lns return $ H.div ! A.style "white-space: pre-line;" $ htmlLines blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do + html5 <- gets stHtml5 let speakerNotes = "notes" `elem` classes -- we don't want incremental output inside speaker notes, see #1394 let opts' = if speakerNotes then opts{ writerIncremental = False } else opts contents <- blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts - let (divtag, classes') = if writerHtml5 opts && "section" `elem` classes + let (divtag, classes') = if html5 && "section" `elem` classes then (H5.section, filter (/= "section") classes) else (H.div, classes) + slideVariant <- gets stSlideVariant return $ if speakerNotes - then case writerSlideVariant opts of + then case slideVariant of RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' DZSlides -> (addAttrs opts' attr $ H5.div $ contents') ! (H5.customAttribute "role" "note") @@ -490,7 +596,9 @@ blockToHtml opts (RawBlock f str) allowsMathEnvironments (writerHTMLMathMethod opts) && isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str] | otherwise = return mempty -blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr +blockToHtml _ (HorizontalRule) = do + html5 <- gets stHtml5 + return $ if html5 then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do let tolhs = isEnabled Ext_literate_haskell opts && any (\c -> map toLower c == "haskell") classes && @@ -503,19 +611,21 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do adjCode = if tolhs then unlines . map ("> " ++) . lines $ rawCode else rawCode - hlCode = if writerHighlight opts -- check highlighting options - then highlight formatHtmlBlock (id',classes',keyvals) adjCode + hlCode = if isJust (writerHighlightStyle opts) + then highlight formatHtmlBlock + (id',classes',keyvals) adjCode else Nothing case hlCode of Nothing -> return $ addAttrs opts (id',classes,keyvals) $ H.pre $ H.code $ toHtml adjCode Just h -> modify (\st -> st{ stHighlighting = True }) >> return (addAttrs opts (id',[],keyvals) h) -blockToHtml opts (BlockQuote blocks) = +blockToHtml opts (BlockQuote blocks) = do -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; -- otherwise incremental - if writerSlideVariant opts /= NoSlides + slideVariant <- gets stSlideVariant + if slideVariant /= NoSlides then let inc = not (writerIncremental opts) in case blocks of [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) @@ -552,9 +662,10 @@ blockToHtml opts (Header level attr@(_,classes,_) lst) = do _ -> H.p contents' blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst - return $ unordList opts contents + unordList opts contents blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst + html5 <- gets stHtml5 let numstyle' = case numstyle of Example -> "decimal" _ -> camelCaseToHyphenated $ show numstyle @@ -565,7 +676,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do then [A.class_ "example"] else []) ++ (if numstyle /= DefaultStyle - then if writerHtml5 opts + then if html5 then [A.type_ $ case numstyle of Decimal -> "1" @@ -577,7 +688,8 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do else [A.style $ toValue $ "list-style-type: " ++ numstyle'] else []) - return $ foldl (!) (ordList opts contents) attribs + l <- ordList opts contents + return $ foldl (!) l attribs blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- if null term @@ -587,13 +699,14 @@ blockToHtml opts (DefinitionList lst) = do blockListToHtml opts) defs return $ mconcat $ nl opts : term' : nl opts : intersperse (nl opts) defs') lst - return $ defList opts contents + defList opts contents blockToHtml opts (Table capt aligns widths headers rows') = do captionDoc <- if null capt then return mempty else do cs <- inlineListToHtml opts capt return $ H.caption cs >> nl opts + html5 <- gets stHtml5 let percent w = show (truncate (100*w) :: Integer) ++ "%" let coltags = if all (== 0.0) widths then mempty @@ -601,7 +714,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do H.colgroup $ do nl opts mapM_ (\w -> do - if writerHtml5 opts + if html5 then H.col ! A.style (toValue $ "width: " ++ percent w) else H.col ! A.width (toValue $ percent w) @@ -624,11 +737,12 @@ blockToHtml opts (Table capt aligns widths headers rows') = do else tbl ! A.style (toValue $ "width:" ++ show (round (totalWidth * 100) :: Int) ++ "%;") -tableRowToHtml :: WriterOptions +tableRowToHtml :: PandocMonad m + => WriterOptions -> [Alignment] -> Int -> [[Block]] - -> State WriterState Html + -> StateT WriterState m Html tableRowToHtml opts aligns rownum cols' = do let mkcell = if rownum == 0 then H.th else H.td let rowclass = case rownum of @@ -648,15 +762,17 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "" -tableItemToHtml :: WriterOptions +tableItemToHtml :: PandocMonad m + => WriterOptions -> (Html -> Html) -> Alignment -> [Block] - -> State WriterState Html + -> StateT WriterState m Html tableItemToHtml opts tag' align' item = do contents <- blockListToHtml opts item + html5 <- gets stHtml5 let alignStr = alignmentToString align' - let attribs = if writerHtml5 opts + let attribs = if html5 then A.style (toValue $ "text-align: " ++ alignStr ++ ";") else A.align (toValue alignStr) let tag'' = if null alignStr @@ -670,12 +786,12 @@ toListItems opts items = map (toListItem opts) items ++ [nl opts] toListItem :: WriterOptions -> Html -> Html toListItem opts item = nl opts >> H.li item -blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html +blockListToHtml :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Html blockListToHtml opts lst = fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst -- | Convert list of Pandoc inline elements to HTML. -inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html +inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . mconcat @@ -694,8 +810,10 @@ annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, -- | Convert Pandoc inline element to HTML. -inlineToHtml :: WriterOptions -> Inline -> State WriterState Html -inlineToHtml opts inline = +inlineToHtml :: PandocMonad m + => WriterOptions -> Inline -> StateT WriterState m Html +inlineToHtml opts inline = do + html5 <- gets stHtml5 case inline of (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " @@ -703,7 +821,7 @@ inlineToHtml opts inline = WrapNone -> preEscapedString " " WrapAuto -> preEscapedString " " WrapPreserve -> preEscapedString "\n" - (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br) + (LineBreak) -> return $ (if html5 then H5.br else H.br) <> strToHtml "\n" (Span (id',classes,kvs) ils) -> inlineListToHtml opts ils >>= @@ -731,8 +849,9 @@ inlineToHtml opts inline = modify $ \st -> st{ stHighlighting = True } return $ addAttrs opts (id',[],keyvals) h where (id',_,keyvals) = attr - hlCode = if writerHighlight opts - then highlight formatHtmlInline attr str + hlCode = if isJust (writerHighlightStyle opts) + then highlight formatHtmlInline + attr str else Nothing (Strikeout lst) -> inlineListToHtml opts lst >>= return . H.del @@ -771,12 +890,12 @@ inlineToHtml opts inline = InlineMath -> H.span ! A.class_ mathClass $ m DisplayMath -> H.div ! A.class_ mathClass $ m WebTeX url -> do - let imtag = if writerHtml5 opts then H5.img else H.img + let imtag = if html5 then H5.img else H.img let m = imtag ! A.style "vertical-align:middle" ! A.src (toValue $ url ++ urlEncode str) ! A.alt (toValue str) ! A.title (toValue str) - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if html5 then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag @@ -785,17 +904,14 @@ inlineToHtml opts inline = InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" MathML _ -> do - let dt = if t == InlineMath - then DisplayInline - else DisplayBlock let conf = useShortEmptyTags (const False) defaultConfigPP - case writeMathML dt <$> readTeX str of + res <- lift $ convertMath writeMathML t str + case res of Right r -> return $ preEscapedString $ ppcElement conf (annotateMML r str) - Left _ -> inlineListToHtml opts - (texMathToInlines t str) >>= - return . (H.span ! A.class_ mathClass) + Left il -> (H.span ! A.class_ mathClass) <$> + inlineToHtml opts il MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" @@ -805,9 +921,9 @@ inlineToHtml opts inline = InlineMath -> str DisplayMath -> "\\displaystyle " ++ str) PlainMath -> do - x <- inlineListToHtml opts (texMathToInlines t str) + x <- lift (texMathToInlines t str) >>= inlineListToHtml opts let m = H.span ! A.class_ mathClass $ x - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if html5 then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag @@ -816,12 +932,13 @@ inlineToHtml opts inline = | otherwise -> return mempty (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts attr linkText s + lift $ obfuscateLink opts attr linkText s (Link attr txt (s,tit)) -> do linkText <- inlineListToHtml opts txt + slideVariant <- gets stSlideVariant let s' = case s of - '#':xs | writerSlideVariant opts == - RevealJsSlides -> '#':'/':xs + '#':xs | slideVariant == RevealJsSlides + -> '#':'/':xs _ -> s let link = H.a ! A.href (toValue s') $ linkText let link' = if txt == [Str (unEscapeString s)] @@ -837,7 +954,7 @@ inlineToHtml opts inline = [A.title $ toValue tit | not (null tit)] ++ [A.alt $ toValue alternate' | not (null txt)] ++ imgAttrsToHtml opts attr - let tag = if writerHtml5 opts then H5.img else H.img + let tag = if html5 then H5.img else H.img return $ foldl (!) tag attributes -- note: null title included, as in Markdown.pl (Image attr _ (s,tit)) -> do @@ -846,37 +963,36 @@ inlineToHtml opts inline = imgAttrsToHtml opts attr return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl - (Note contents) - | writerIgnoreNotes opts -> return mempty - | otherwise -> do + (Note contents) -> do notes <- gets stNotes let number = (length notes) + 1 let ref = show number htmlContents <- blockListToNote opts ref contents + epubVersion <- gets stEPUBVersion -- push contents onto front of notes modify $ \st -> st {stNotes = (htmlContents:notes)} - let revealSlash = ['/' | writerSlideVariant opts - == RevealJsSlides] + slideVariant <- gets stSlideVariant + let revealSlash = ['/' | slideVariant == RevealJsSlides] let link = H.a ! A.href (toValue $ "#" ++ revealSlash ++ writerIdentifierPrefix opts ++ "fn" ++ ref) ! A.class_ "footnoteRef" ! prefixedId opts ("fnref" ++ ref) - $ (if isJust (writerEpubVersion opts) + $ (if isJust epubVersion then id else H.sup) $ toHtml ref - return $ case writerEpubVersion opts of + return $ case epubVersion of Just EPUB3 -> link ! customAttribute "epub:type" "noteref" _ -> link (Cite cits il)-> do contents <- inlineListToHtml opts il let citationIds = unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents - return $ if writerHtml5 opts + return $ if html5 then result ! customAttribute "data-cites" (toValue citationIds) else result -blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html +blockListToNote :: PandocMonad m => WriterOptions -> String -> [Block] -> StateT WriterState m Html blockListToNote opts ref blocks = -- If last block is Para or Plain, include the backlink at the end of -- that block. Otherwise, insert a new Plain block with the backlink. @@ -894,7 +1010,8 @@ blockListToNote opts ref blocks = Plain backlink] in do contents <- blockListToHtml opts blocks' let noteItem = H.li ! (prefixedId opts ("fn" ++ ref)) $ contents - let noteItem' = case writerEpubVersion opts of + epubVersion <- gets stEPUBVersion + let noteItem' = case epubVersion of Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote" _ -> noteItem return $ nl opts >> noteItem' diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 29fdafe15..1c160ea1c 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -39,9 +39,10 @@ import Text.Pandoc.Options import Data.List ( intersperse, transpose ) import Text.Pandoc.Pretty import Control.Monad.State -import Text.Pandoc.Readers.TeXMath (texMathToInlines) +import Text.Pandoc.Writers.Math (texMathToInlines) import Network.URI (isURI) import Data.Default +import Text.Pandoc.Class (PandocMonad) type Notes = [[Block]] data WriterState = WriterState { stNotes :: Notes } @@ -49,13 +50,14 @@ instance Default WriterState where def = WriterState{ stNotes = [] } -- | Convert Pandoc to Haddock. -writeHaddock :: WriterOptions -> Pandoc -> String +writeHaddock :: PandocMonad m => WriterOptions -> Pandoc -> m String writeHaddock opts document = - evalState (pandocToHaddock opts{ + evalStateT (pandocToHaddock opts{ writerWrapText = writerWrapText opts } document) def -- | Return haddock representation of document. -pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String +pandocToHaddock :: PandocMonad m + => WriterOptions -> Pandoc -> StateT WriterState m String pandocToHaddock opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts @@ -78,7 +80,8 @@ pandocToHaddock opts (Pandoc meta blocks) = do Just tpl -> return $ renderTemplate' tpl context -- | Return haddock representation of notes. -notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToHaddock :: PandocMonad m + => WriterOptions -> [[Block]] -> StateT WriterState m Doc notesToHaddock opts notes = if null notes then return empty @@ -92,9 +95,10 @@ escapeString = escapeStringUsing haddockEscapes where haddockEscapes = backslashEscapes "\\/'`\"@<" -- | Convert Pandoc block element to haddock. -blockToHaddock :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc +blockToHaddock :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> StateT WriterState m Doc blockToHaddock _ Null = return empty blockToHaddock opts (Div _ ils) = do contents <- blockListToHaddock opts ils @@ -167,8 +171,9 @@ blockToHaddock opts (DefinitionList items) = do contents <- mapM (definitionListItemToHaddock opts) items return $ cat contents <> blankline -pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> State WriterState Doc +pandocTable :: PandocMonad m + => WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> StateT WriterState m Doc pandocTable opts headless aligns widths rawHeaders rawRows = do let isSimple = all (==0) widths let alignHeader alignment = case alignment of @@ -207,8 +212,9 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do else border return $ head'' $$ underline $$ body $$ bottom -gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> State WriterState Doc +gridTable :: PandocMonad m + => WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> StateT WriterState m Doc gridTable opts headless _aligns widths headers' rawRows = do let numcols = length headers' let widths' = if all (==0) widths @@ -235,7 +241,8 @@ gridTable opts headless _aligns widths headers' rawRows = do return $ border '-' $$ head'' $$ body $$ border '-' -- | Convert bullet list item (list of blocks) to haddock -bulletListItemToHaddock :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToHaddock :: PandocMonad m + => WriterOptions -> [Block] -> StateT WriterState m Doc bulletListItemToHaddock opts items = do contents <- blockListToHaddock opts items let sps = replicate (writerTabStop opts - 2) ' ' @@ -250,10 +257,11 @@ bulletListItemToHaddock opts items = do return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to haddock -orderedListItemToHaddock :: WriterOptions -- ^ options - -> String -- ^ list item marker - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc +orderedListItemToHaddock :: PandocMonad m + => WriterOptions -- ^ options + -> String -- ^ list item marker + -> [Block] -- ^ list item (list of blocks) + -> StateT WriterState m Doc orderedListItemToHaddock opts marker items = do contents <- blockListToHaddock opts items let sps = case length marker - writerTabStop opts of @@ -263,9 +271,10 @@ orderedListItemToHaddock opts marker items = do return $ hang (writerTabStop opts) start $ contents <> cr -- | Convert definition list item (label, list of blocks) to haddock -definitionListItemToHaddock :: WriterOptions - -> ([Inline],[[Block]]) - -> State WriterState Doc +definitionListItemToHaddock :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> StateT WriterState m Doc definitionListItemToHaddock opts (label, defs) = do labelText <- inlineListToHaddock opts label defs' <- mapM (mapM (blockToHaddock opts)) defs @@ -273,19 +282,22 @@ definitionListItemToHaddock opts (label, defs) = do return $ nowrap (brackets labelText) <> cr <> contents <> cr -- | Convert list of Pandoc block elements to haddock -blockListToHaddock :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc +blockListToHaddock :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> StateT WriterState m Doc blockListToHaddock opts blocks = mapM (blockToHaddock opts) blocks >>= return . cat -- | Convert list of Pandoc inline elements to haddock. -inlineListToHaddock :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToHaddock :: PandocMonad m + => WriterOptions -> [Inline] -> StateT WriterState m Doc inlineListToHaddock opts lst = mapM (inlineToHaddock opts) lst >>= return . cat -- | Convert Pandoc inline element to haddock. -inlineToHaddock :: WriterOptions -> Inline -> State WriterState Doc +inlineToHaddock :: PandocMonad m + => WriterOptions -> Inline -> StateT WriterState m Doc inlineToHaddock opts (Span (ident,_,_) ils) = do contents <- inlineListToHaddock opts ils if not (null ident) && null ils @@ -321,12 +333,12 @@ inlineToHaddock opts (Math mt str) = do let adjust x = case mt of DisplayMath -> cr <> x <> cr InlineMath -> x - adjust `fmap` (inlineListToHaddock opts $ texMathToInlines mt str) + adjust <$> (lift (texMathToInlines mt str) >>= inlineListToHaddock opts) inlineToHaddock _ (RawInline f str) | f == "haddock" = return $ text str | otherwise = return empty -- no line break in haddock (see above on CodeBlock) -inlineToHaddock _ (LineBreak) = return cr +inlineToHaddock _ LineBreak = return cr inlineToHaddock opts SoftBreak = case writerWrapText opts of WrapAuto -> return space diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 8f0d21cf5..41bca11b2 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings, FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings, FlexibleContexts, ScopedTypeVariables #-} {- | Module : Text.Pandoc.Writers.ICML @@ -15,10 +15,11 @@ into InDesign with File -> Place. -} module Text.Pandoc.Writers.ICML (writeICML) where import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError) import Text.Pandoc.XML -import Text.Pandoc.Readers.TeXMath (texMathToInlines) +import Text.Pandoc.Writers.Math (texMathToInlines) import Text.Pandoc.Writers.Shared -import Text.Pandoc.Shared (linesToPara, splitBy, fetchItem, warn) +import Text.Pandoc.Shared (linesToPara, splitBy) import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Pretty @@ -26,8 +27,11 @@ import Text.Pandoc.ImageSize import Data.List (isPrefixOf, isInfixOf, stripPrefix, intersperse) import Data.Text as Text (breakOnAll, pack) import Control.Monad.State +import Control.Monad.Except (runExceptT) import Network.URI (isURI) import qualified Data.Set as Set +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P type Style = [String] type Hyperlink = [(Int, String)] @@ -40,7 +44,7 @@ data WriterState = WriterState{ , maxListDepth :: Int } -type WS a = StateT WriterState IO a +type WS m = StateT WriterState m defaultWriterState :: WriterState defaultWriterState = WriterState{ @@ -121,9 +125,8 @@ subListParName = "subParagraph" footnoteName = "Footnote" citeName = "Cite" - -- | Convert Pandoc document to string in ICML format. -writeICML :: WriterOptions -> Pandoc -> IO String +writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m String writeICML opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts @@ -283,13 +286,13 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs -- | Convert a list of Pandoc blocks to ICML. -blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc +blocksToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc blocksToICML opts style lst = do docs <- mapM (blockToICML opts style) lst return $ intersperseBrs docs -- | Convert a Pandoc block element to ICML. -blockToICML :: WriterOptions -> Style -> Block -> WS Doc +blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m Doc blockToICML opts style (Plain lst) = parStyle opts style lst -- title beginning with fig: indicates that the image is a figure blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do @@ -359,7 +362,7 @@ blockToICML opts style (Div _ lst) = blocksToICML opts style lst blockToICML _ _ Null = return empty -- | Convert a list of lists of blocks to ICML list items. -listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc +listItemsToICML :: PandocMonad m => WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS m Doc listItemsToICML _ _ _ _ [] = return empty listItemsToICML opts listType style attribs (first:rest) = do st <- get @@ -374,7 +377,7 @@ listItemsToICML opts listType style attribs (first:rest) = do return $ intersperseBrs docs -- | Convert a list of blocks to ICML list items. -listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc +listItemToICML :: PandocMonad m => WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS m Doc listItemToICML opts style isFirst attribs item = let makeNumbStart (Just (beginsWith, numbStl, _)) = let doN DefaultStyle = [] @@ -401,7 +404,7 @@ listItemToICML opts style isFirst attribs item = return $ intersperseBrs (f : r) else blocksToICML opts stl' item -definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc +definitionListItemToICML :: PandocMonad m => WriterOptions -> Style -> ([Inline],[[Block]]) -> WS m Doc definitionListItemToICML opts style (term,defs) = do term' <- parStyle opts (defListTermName:style) term defs' <- mapM (blocksToICML opts (defListDefName:style)) defs @@ -409,11 +412,11 @@ definitionListItemToICML opts style (term,defs) = do -- | Convert a list of inline elements to ICML. -inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc +inlinesToICML :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst) -- | Convert an inline element to ICML. -inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc +inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m Doc inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst @@ -433,7 +436,8 @@ inlineToICML opts style SoftBreak = WrapPreserve -> charStyle style cr inlineToICML _ style LineBreak = charStyle style $ text lineSeparator inlineToICML opts style (Math mt str) = - cat <$> mapM (inlineToICML opts style) (texMathToInlines mt str) + lift (texMathToInlines mt str) >>= + (fmap cat . mapM (inlineToICML opts style)) inlineToICML _ _ (RawInline f str) | f == Format "icml" = return $ text str | otherwise = return empty @@ -452,7 +456,7 @@ inlineToICML opts style (Note lst) = footnoteToICML opts style lst inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst -- | Convert a list of block elements to an ICML footnote. -footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc +footnoteToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc footnoteToICML opts style lst = let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls insertTab block = blockToICML opts (footnoteName:style) block @@ -483,7 +487,7 @@ intersperseBrs :: [Doc] -> Doc intersperseBrs = vcat . intersperse (selfClosingTag "Br" []) . filter (not . isEmpty) -- | Wrap a list of inline elements in an ICML Paragraph Style -parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc +parStyle :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc parStyle opts style lst = let slipIn x y = if null y then x @@ -507,7 +511,7 @@ parStyle opts style lst = state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st }) -- | Wrap a Doc in an ICML Character Style. -charStyle :: Style -> Doc -> WS Doc +charStyle :: PandocMonad m => Style -> Doc -> WS m Doc charStyle style content = let (stlStr, attrs) = styleToStrAttr style doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content @@ -529,18 +533,18 @@ styleToStrAttr style = in (stlStr, attrs) -- | Assemble an ICML Image. -imageICML :: WriterOptions -> Style -> Attr -> Target -> WS Doc +imageICML :: PandocMonad m => WriterOptions -> Style -> Attr -> Target -> WS m Doc imageICML opts style attr (src, _) = do - res <- liftIO $ fetchItem (writerSourceURL opts) src + res <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src imgS <- case res of - Left (_) -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." + Left (_ :: PandocError) -> do + lift $ P.warning $ "Could not find image `" ++ src ++ "', skipping..." return def Right (img, _) -> do case imageSize img of Right size -> return size Left msg -> do - warn $ "Could not determine image size in `" ++ + lift $ P.warning $ "Could not determine image size in `" ++ src ++ "': " ++ msg return def let (ow, oh) = sizeInPoints imgS diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 88934eb44..67318a549 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -29,7 +29,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into LaTeX. -} -module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where +module Text.Pandoc.Writers.LaTeX ( + writeLaTeX + , writeBeamer + ) where import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Shared @@ -54,6 +57,7 @@ import Text.Pandoc.Slides import Text.Pandoc.Highlighting (highlight, styleToLaTeX, formatLaTeXInline, formatLaTeXBlock, toListingsLanguage) +import Text.Pandoc.Class (PandocMonad) data WriterState = WriterState { stInNote :: Bool -- true if we're in a note @@ -75,26 +79,46 @@ data WriterState = , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit , stInternalLinks :: [String] -- list of internal link targets , stUsesEuro :: Bool -- true if euro symbol used + , stBeamer :: Bool -- produce beamer } +startingState :: WriterOptions -> WriterState +startingState options = WriterState { + stInNote = False + , stInQuote = False + , stInMinipage = False + , stInHeading = False + , stNotes = [] + , stOLLevel = 1 + , stOptions = options + , stVerbInNote = False + , stTable = False + , stStrikeout = False + , stUrl = False + , stGraphics = False + , stLHS = False + , stBook = (case writerTopLevelDivision options of + TopLevelPart -> True + TopLevelChapter -> True + _ -> False) + , stCsquotes = False + , stHighlighting = False + , stIncremental = writerIncremental options + , stInternalLinks = [] + , stUsesEuro = False + , stBeamer = False } + -- | Convert Pandoc to LaTeX. -writeLaTeX :: WriterOptions -> Pandoc -> String -writeLaTeX options document = +writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeLaTeX options document = return $ + evalState (pandocToLaTeX options document) $ + startingState options + +-- | Convert Pandoc to LaTeX Beamer. +writeBeamer :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeBeamer options document = return $ evalState (pandocToLaTeX options document) $ - WriterState { stInNote = False, stInQuote = False, - stInMinipage = False, stInHeading = False, - stNotes = [], stOLLevel = 1, - stOptions = options, stVerbInNote = False, - stTable = False, stStrikeout = False, - stUrl = False, stGraphics = False, - stLHS = False, - stBook = (case writerTopLevelDivision options of - TopLevelPart -> True - TopLevelChapter -> True - _ -> False), - stCsquotes = False, stHighlighting = False, - stIncremental = writerIncremental options, - stInternalLinks = [], stUsesEuro = False } + (startingState options){ stBeamer = True } pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String pandocToLaTeX options (Pandoc meta blocks) = do @@ -143,7 +167,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do else case last blocks' of Header 1 _ il -> (init blocks', il) _ -> (blocks', []) - blocks''' <- if writerBeamer options + beamer <- gets stBeamer + blocks''' <- if beamer then toSlides blocks'' else return blocks'' body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''' @@ -170,7 +195,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "body" main $ defField "title-meta" titleMeta $ defField "author-meta" (intercalate "; " authorsMeta) $ - defField "documentclass" (if writerBeamer options + defField "documentclass" (if beamer then ("beamer" :: String) else if stBook st then "book" @@ -185,10 +210,13 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "book-class" (stBook st) $ defField "euro" (stUsesEuro st) $ defField "listings" (writerListings options || stLHS st) $ - defField "beamer" (writerBeamer options) $ + defField "beamer" beamer $ (if stHighlighting st - then defField "highlighting-macros" (styleToLaTeX - $ writerHighlightStyle options ) + then case writerHighlightStyle options of + Just sty -> + defField "highlighting-macros" + (styleToLaTeX sty) + Nothing -> id else id) $ (case writerCiteMethod options of Natbib -> defField "biblio-title" biblioTitle . @@ -271,7 +299,7 @@ stringToLaTeX _ [] = return "" stringToLaTeX ctx (x:xs) = do opts <- gets stOptions rest <- stringToLaTeX ctx xs - let ligatures = writerTeXLigatures opts && ctx == TextString + let ligatures = isEnabled Ext_smart opts && ctx == TextString let isUrl = ctx == URLString when (x == '€') $ modify $ \st -> st{ stUsesEuro = True } @@ -384,7 +412,7 @@ blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc blockToLaTeX Null = return empty blockToLaTeX (Div (identifier,classes,kvs) bs) = do - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer ref <- toLabel identifier let linkAnchor = if null identifier then empty @@ -435,7 +463,7 @@ blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = d else figure $$ footnotes -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer if beamer then blockToLaTeX (RawBlock "latex" "\\pause") else inlineListToLaTeX [Str ".",Space,Str ".",Space,Str "."] @@ -444,7 +472,7 @@ blockToLaTeX (Para lst) = blockToLaTeX (LineBlock lns) = do blockToLaTeX $ linesToPara lns blockToLaTeX (BlockQuote lst) = do - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer case lst of [b] | beamer && isListBlock b -> do oldIncremental <- gets stIncremental @@ -511,10 +539,11 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do return (flush $ linkAnchor $$ text (T.unpack h)) case () of _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && - "literate" `elem` classes -> lhsCodeBlock - | writerListings opts -> listingsCodeBlock - | writerHighlight opts && not (null classes) -> highlightedCodeBlock - | otherwise -> rawCodeBlock + "literate" `elem` classes -> lhsCodeBlock + | writerListings opts -> listingsCodeBlock + | not (null classes) && isJust (writerHighlightStyle opts) + -> highlightedCodeBlock + | otherwise -> rawCodeBlock blockToLaTeX (RawBlock f x) | f == Format "latex" || f == Format "tex" = return $ text x @@ -522,7 +551,7 @@ blockToLaTeX (RawBlock f x) blockToLaTeX (BulletList []) = return empty -- otherwise latex error blockToLaTeX (BulletList lst) = do incremental <- gets stIncremental - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer let inc = if beamer && incremental then "[<+->]" else "" items <- mapM listItemToLaTeX lst let spacing = if isTightList lst @@ -767,7 +796,8 @@ sectionHeader unnumbered ident level lst = do let topLevelDivision = if book && writerTopLevelDivision opts == TopLevelDefault then TopLevelChapter else writerTopLevelDivision opts - let level' = if writerBeamer opts && + beamer <- gets stBeamer + let level' = if beamer && topLevelDivision `elem` [TopLevelPart, TopLevelChapter] -- beamer has parts but no chapters then if level == 1 then -1 else level - 1 @@ -903,7 +933,8 @@ inlineToLaTeX (Code (_,classes,_) str) = do inHeading <- gets stInHeading case () of _ | writerListings opts && not inHeading -> listingsCode - | writerHighlight opts && not (null classes) -> highlightCode + | isJust (writerHighlightStyle opts) && not (null classes) + -> highlightCode | otherwise -> rawCode where listingsCode = do inNote <- gets stInNote @@ -937,11 +968,11 @@ inlineToLaTeX (Quoted qt lst) = do let inner = s1 <> contents <> s2 return $ case qt of DoubleQuote -> - if writerTeXLigatures opts + if isEnabled Ext_smart opts then text "``" <> inner <> text "''" else char '\x201C' <> inner <> char '\x201D' SingleQuote -> - if writerTeXLigatures opts + if isEnabled Ext_smart opts then char '`' <> inner <> char '\'' else char '\x2018' <> inner <> char '\x2019' inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str @@ -1016,9 +1047,9 @@ inlineToLaTeX (Note contents) = do (CodeBlock _ _ : _) -> cr _ -> empty let noteContents = nest 2 contents' <> optnl - opts <- gets stOptions + beamer <- gets stBeamer -- in beamer slides, display footnote from current overlay forward - let beamerMark = if writerBeamer opts + let beamerMark = if beamer then text "<.->" else empty modify $ \st -> st{ stNotes = noteContents : stNotes st } @@ -1316,10 +1347,6 @@ commonFromBcp47 x = fromIso $ head x fromIso "vi" = "vietnamese" fromIso _ = "" -deNote :: Inline -> Inline -deNote (Note _) = RawInline (Format "latex") "" -deNote x = x - pDocumentOptions :: P.Parsec String () [String] pDocumentOptions = do P.char '[' diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 98b08b08b..36ed5fab0 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -34,24 +34,27 @@ import Text.Pandoc.Templates import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options -import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Writers.Math import Text.Printf ( printf ) import Data.List ( stripPrefix, intersperse, intercalate ) import Data.Maybe (fromMaybe) import Text.Pandoc.Pretty import Text.Pandoc.Builder (deleteMeta) import Control.Monad.State +import Text.Pandoc.Error +import Control.Monad.Except (throwError) +import Text.Pandoc.Class (PandocMonad) type Notes = [[Block]] data WriterState = WriterState { stNotes :: Notes , stHasTables :: Bool } -- | Convert Pandoc to Man. -writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False) +writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeMan opts document = evalStateT (pandocToMan opts document) (WriterState [] False) -- | Return groff man representation of document. -pandocToMan :: WriterOptions -> Pandoc -> State WriterState String +pandocToMan :: PandocMonad m => WriterOptions -> Pandoc -> StateT WriterState m String pandocToMan opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts @@ -93,7 +96,7 @@ pandocToMan opts (Pandoc meta blocks) = do Just tpl -> return $ renderTemplate' tpl context -- | Return man representation of notes. -notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc notesToMan opts notes = if null notes then return empty @@ -101,7 +104,7 @@ notesToMan opts notes = return . (text ".SH NOTES" $$) . vcat -- | Return man representation of a note. -noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToMan :: PandocMonad m => WriterOptions -> Int -> [Block] -> StateT WriterState m Doc noteToMan opts num note = do contents <- blockListToMan opts note let marker = cr <> text ".SS " <> brackets (text (show num)) @@ -160,9 +163,10 @@ splitSentences xs = in if null rest then [sent] else sent : splitSentences rest -- | Convert Pandoc block element to man. -blockToMan :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc +blockToMan :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> StateT WriterState m Doc blockToMan _ Null = return empty blockToMan opts (Div _ bs) = blockListToMan opts bs blockToMan opts (Plain inlines) = @@ -236,7 +240,7 @@ blockToMan opts (DefinitionList items) = do return (vcat contents) -- | Convert bullet list item (list of blocks) to man. -bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToMan :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Doc bulletListItemToMan _ [] = return empty bulletListItemToMan opts ((Para first):rest) = bulletListItemToMan opts ((Plain first):rest) @@ -254,11 +258,12 @@ bulletListItemToMan opts (first:rest) = do return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" -- | Convert ordered list item (a list of blocks) to man. -orderedListItemToMan :: WriterOptions -- ^ options - -> String -- ^ order marker for list item - -> Int -- ^ number of spaces to indent - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc +orderedListItemToMan :: PandocMonad m + => WriterOptions -- ^ options + -> String -- ^ order marker for list item + -> Int -- ^ number of spaces to indent + -> [Block] -- ^ list item (list of blocks) + -> StateT WriterState m Doc orderedListItemToMan _ _ _ [] = return empty orderedListItemToMan opts num indent ((Para first):rest) = orderedListItemToMan opts num indent ((Plain first):rest) @@ -273,18 +278,19 @@ orderedListItemToMan opts num indent (first:rest) = do return $ first'' $$ rest'' -- | Convert definition list item (label, list of blocks) to man. -definitionListItemToMan :: WriterOptions - -> ([Inline],[[Block]]) - -> State WriterState Doc +definitionListItemToMan :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> StateT WriterState m Doc definitionListItemToMan opts (label, defs) = do labelText <- inlineListToMan opts label contents <- if null defs then return empty else liftM vcat $ forM defs $ \blocks -> do - let (first, rest) = case blocks of - ((Para x):y) -> (Plain x,y) - (x:y) -> (x,y) - [] -> error "blocks is null" + (first, rest) <- case blocks of + ((Para x):y) -> return (Plain x,y) + (x:y) -> return (x,y) + [] -> throwError $ PandocSomeError "blocks is null" rest' <- liftM vcat $ mapM (\item -> blockToMan opts item) rest first' <- blockToMan opts first @@ -292,18 +298,19 @@ definitionListItemToMan opts (label, defs) = do return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents -- | Convert list of Pandoc block elements to man. -blockListToMan :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc +blockListToMan :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> StateT WriterState m Doc blockListToMan opts blocks = mapM (blockToMan opts) blocks >>= (return . vcat) -- | Convert list of Pandoc inline elements to man. -inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToMan :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to man. -inlineToMan :: WriterOptions -> Inline -> State WriterState Doc +inlineToMan :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Doc inlineToMan opts (Span _ ils) = inlineListToMan opts ils inlineToMan opts (Emph lst) = do contents <- inlineListToMan opts lst @@ -335,14 +342,14 @@ inlineToMan _ (Str str@('.':_)) = return $ afterBreak "\\&" <> text (escapeString str) inlineToMan _ (Str str) = return $ text $ escapeString str inlineToMan opts (Math InlineMath str) = - inlineListToMan opts $ texMathToInlines InlineMath str + lift (texMathToInlines InlineMath str) >>= inlineListToMan opts inlineToMan opts (Math DisplayMath str) = do - contents <- inlineListToMan opts $ texMathToInlines DisplayMath str + contents <- lift (texMathToInlines DisplayMath str) >>= inlineListToMan opts return $ cr <> text ".RS" $$ contents $$ text ".RE" inlineToMan _ (RawInline f str) | f == Format "man" = return $ text str | otherwise = return empty -inlineToMan _ (LineBreak) = return $ +inlineToMan _ LineBreak = return $ cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr inlineToMan _ SoftBreak = return space inlineToMan _ Space = return space diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index e3bb3eea0..e965528cc 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -46,8 +46,9 @@ import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.Reader import Control.Monad.State -import Text.Pandoc.Writers.HTML (writeHtmlString) -import Text.Pandoc.Readers.TeXMath (texMathToInlines) +import Control.Monad.Except (throwError) +import Text.Pandoc.Writers.HTML (writeHtml5String) +import Text.Pandoc.Writers.Math (texMathToInlines) import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) import Network.URI (isURI) import Data.Default @@ -57,15 +58,17 @@ import qualified Data.Vector as V import qualified Data.Text as T import qualified Data.Set as Set import Network.HTTP ( urlEncode ) +import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad) type Notes = [[Block]] type Ref = ([Inline], Target, Attr) type Refs = [Ref] -type MD = ReaderT WriterEnv (State WriterState) +type MD m = ReaderT WriterEnv (StateT WriterState m) -evalMD :: MD a -> WriterEnv -> WriterState -> a -evalMD md env st = evalState (runReaderT md env) st +evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a +evalMD md env st = evalStateT (runReaderT md env) st data WriterEnv = WriterEnv { envInList :: Bool , envPlain :: Bool @@ -96,7 +99,7 @@ instance Default WriterState } -- | Convert Pandoc to Markdown. -writeMarkdown :: WriterOptions -> Pandoc -> String +writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m String writeMarkdown opts document = evalMD (pandocToMarkdown opts{ writerWrapText = if isEnabled Ext_hard_line_breaks opts @@ -106,7 +109,7 @@ writeMarkdown opts document = -- | Convert Pandoc to plain text (like markdown, but without links, -- pictures, or inline formatting). -writePlain :: WriterOptions -> Pandoc -> String +writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m String writePlain opts document = evalMD (pandocToMarkdown opts document) def{ envPlain = True } def @@ -171,7 +174,7 @@ jsonToYaml (Number n) = text $ show n jsonToYaml _ = empty -- | Return markdown representation of document. -pandocToMarkdown :: WriterOptions -> Pandoc -> MD String +pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m String pandocToMarkdown opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts @@ -196,9 +199,9 @@ pandocToMarkdown opts (Pandoc meta blocks) = do | otherwise -> empty Nothing -> empty let headerBlocks = filter isHeaderBlock blocks - let toc = if writerTableOfContents opts - then tableOfContents opts headerBlocks - else empty + toc <- if writerTableOfContents opts + then tableOfContents opts headerBlocks + else return empty -- Strip off final 'references' header if markdown citations enabled let blocks' = if isEnabled Ext_citations opts then case reverse blocks of @@ -221,13 +224,14 @@ pandocToMarkdown opts (Pandoc meta blocks) = do Just tpl -> return $ renderTemplate' tpl context -- | Return markdown representation of reference key table. -refsToMarkdown :: WriterOptions -> Refs -> MD Doc +refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat -- | Return markdown representation of a reference key. -keyToMarkdown :: WriterOptions +keyToMarkdown :: PandocMonad m + => WriterOptions -> Ref - -> MD Doc + -> MD m Doc keyToMarkdown opts (label, (src, tit), attr) = do label' <- inlineListToMarkdown opts label let tit' = if null tit @@ -238,7 +242,7 @@ keyToMarkdown opts (label, (src, tit), attr) = do <> linkAttributes opts attr -- | Return markdown representation of notes. -notesToMarkdown :: WriterOptions -> [[Block]] -> MD Doc +notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc notesToMarkdown opts notes = do n <- gets stNoteNum notes' <- mapM (\(num, note) -> noteToMarkdown opts num note) (zip [n..] notes) @@ -246,7 +250,7 @@ notesToMarkdown opts notes = do return $ vsep notes' -- | Return markdown representation of a note. -noteToMarkdown :: WriterOptions -> Int -> [Block] -> MD Doc +noteToMarkdown :: PandocMonad m => WriterOptions -> Int -> [Block] -> MD m Doc noteToMarkdown opts num blocks = do contents <- blockListToMarkdown opts blocks let num' = text $ writerIdentifierPrefix opts ++ show num @@ -276,14 +280,16 @@ escapeString opts = escapeStringUsing markdownEscapes (if isEnabled Ext_tex_math_dollars opts then ('$':) else id) $ - "\\`*_[]#" + "\\`*_[]#" ++ + if isEnabled Ext_smart opts + then "\"'" + else "" -- | Construct table of contents from list of header blocks. -tableOfContents :: WriterOptions -> [Block] -> Doc +tableOfContents :: PandocMonad m => WriterOptions -> [Block] -> m Doc tableOfContents opts headers = - let opts' = opts { writerIgnoreNotes = True } - contents = BulletList $ map (elementToListItem opts) $ hierarchicalize headers - in evalMD (blockToMarkdown opts' contents) def def + let contents = BulletList $ map (elementToListItem opts) $ hierarchicalize headers + in evalMD (blockToMarkdown opts contents) def def -- | Converts an Element to a list item for a table of contents, elementToListItem :: WriterOptions -> Element -> [Block] @@ -292,8 +298,9 @@ elementToListItem opts (Sec lev _nums (ident,_,_) headerText subsecs) [ BulletList (map (elementToListItem opts) subsecs) | not (null subsecs) && lev < writerTOCDepth opts ] where headerLink = if null ident - then headerText - else [Link nullAttr headerText ('#':ident, "")] + then walk deNote headerText + else [Link nullAttr (walk deNote headerText) + ('#':ident, "")] elementToListItem _ (Blk _) = [] attrsToMarkdown :: Attr -> Doc @@ -334,7 +341,7 @@ beginsWithOrderedListMarker str = Left _ -> False Right _ -> True -notesAndRefs :: WriterOptions -> MD Doc +notesAndRefs :: PandocMonad m => WriterOptions -> MD m Doc notesAndRefs opts = do notes' <- reverse <$> gets stNotes >>= notesToMarkdown opts modify $ \s -> s { stNotes = [] } @@ -345,16 +352,17 @@ notesAndRefs opts = do if | writerReferenceLocation opts == EndOfDocument -> empty | isEmpty notes' && isEmpty refs' -> empty | otherwise -> blankline - + return $ (if isEmpty notes' then empty else blankline <> notes') <> (if isEmpty refs' then empty else blankline <> refs') <> endSpacing -- | Convert Pandoc block element to markdown. -blockToMarkdown :: WriterOptions -- ^ Options +blockToMarkdown :: PandocMonad m + => WriterOptions -- ^ Options -> Block -- ^ Block element - -> MD Doc + -> MD m Doc blockToMarkdown opts blk = local (\env -> env {envBlockLevel = envBlockLevel env + 1}) $ do doc <- blockToMarkdown' opts blk @@ -363,9 +371,10 @@ blockToMarkdown opts blk = then notesAndRefs opts >>= (\d -> return $ doc <> d) else return doc -blockToMarkdown' :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> MD Doc +blockToMarkdown' :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> MD m Doc blockToMarkdown' _ Null = return empty blockToMarkdown' opts (Div attrs ils) = do contents <- blockListToMarkdown opts ils @@ -526,8 +535,8 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do gridTable opts (all null headers) aligns widths rawHeaders rawRows | isEnabled Ext_raw_html opts -> fmap (id,) $ - return $ text $ writeHtmlString def - $ Pandoc nullMeta [t] + text <$> + (writeHtml5String def $ Pandoc nullMeta [t]) | otherwise -> return $ (id, text "[TABLE]") return $ nst $ tbl $$ blankline $$ caption'' $$ blankline blockToMarkdown' opts (BulletList items) = do @@ -550,7 +559,7 @@ blockToMarkdown' opts (DefinitionList items) = do contents <- inList $ mapM (definitionListItemToMarkdown opts) items return $ cat contents <> blankline -inList :: MD a -> MD a +inList :: Monad m => MD m a -> MD m a inList p = local (\env -> env {envInList = True}) p addMarkdownAttribute :: String -> String @@ -562,7 +571,7 @@ addMarkdownAttribute s = x /= "markdown"] _ -> s -pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD Doc +pipeTable :: PandocMonad m => Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD m Doc pipeTable headless aligns rawHeaders rawRows = do let sp = text " " let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty @@ -590,8 +599,8 @@ pipeTable headless aligns rawHeaders rawRows = do let body = vcat $ map torow rawRows return $ header $$ border $$ body -pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> MD Doc +pandocTable :: PandocMonad m => WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> MD m Doc pandocTable opts headless aligns widths rawHeaders rawRows = do let isSimple = all (==0) widths let alignHeader alignment = case alignment of @@ -642,8 +651,8 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do else border return $ head'' $$ underline $$ body $$ bottom -gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> MD Doc +gridTable :: PandocMonad m => WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> MD m Doc gridTable opts headless aligns widths headers' rawRows = do let numcols = length headers' let widths' = if all (==0) widths @@ -697,7 +706,7 @@ itemEndsWithTightList bs = _ -> False -- | Convert bullet list item (list of blocks) to markdown. -bulletListItemToMarkdown :: WriterOptions -> [Block] -> MD Doc +bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc bulletListItemToMarkdown opts bs = do contents <- blockListToMarkdown opts bs let sps = replicate (writerTabStop opts - 2) ' ' @@ -709,10 +718,11 @@ bulletListItemToMarkdown opts bs = do return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to markdown. -orderedListItemToMarkdown :: WriterOptions -- ^ options +orderedListItemToMarkdown :: PandocMonad m + => WriterOptions -- ^ options -> String -- ^ list item marker -> [Block] -- ^ list item (list of blocks) - -> MD Doc + -> MD m Doc orderedListItemToMarkdown opts marker bs = do contents <- blockListToMarkdown opts bs let sps = case length marker - writerTabStop opts of @@ -726,9 +736,10 @@ orderedListItemToMarkdown opts marker bs = do return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert definition list item (label, list of blocks) to markdown. -definitionListItemToMarkdown :: WriterOptions +definitionListItemToMarkdown :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) - -> MD Doc + -> MD m Doc definitionListItemToMarkdown opts (label, defs) = do labelText <- inlineListToMarkdown opts label defs' <- mapM (mapM (blockToMarkdown opts)) defs @@ -758,9 +769,10 @@ definitionListItemToMarkdown opts (label, defs) = do vsep (map vsep defs') <> blankline -- | Convert list of Pandoc block elements to markdown. -blockListToMarkdown :: WriterOptions -- ^ Options +blockListToMarkdown :: PandocMonad m + => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> MD Doc + -> MD m Doc blockListToMarkdown opts blocks = mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat -- insert comment between list and indented code block, or the @@ -787,25 +799,25 @@ blockListToMarkdown opts blocks = -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. -getReference :: Attr -> [Inline] -> Target -> MD [Inline] +getReference :: PandocMonad m => Attr -> [Inline] -> Target -> MD m [Inline] getReference attr label target = do st <- get case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of Just (ref, _, _) -> return ref Nothing -> do - let label' = case find (\(l,_,_) -> l == label) (stRefs st) of - Just _ -> -- label is used; generate numerical label - case find (\n -> notElem [Str (show n)] - (map (\(l,_,_) -> l) (stRefs st))) - [1..(10000 :: Integer)] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" - Nothing -> label + label' <- case find (\(l,_,_) -> l == label) (stRefs st) of + Just _ -> -- label is used; generate numerical label + case find (\n -> notElem [Str (show n)] + (map (\(l,_,_) -> l) (stRefs st))) + [1..(10000 :: Integer)] of + Just x -> return [Str (show x)] + Nothing -> throwError $ PandocSomeError "no unique label" + Nothing -> return label modify (\s -> s{ stRefs = (label', target, attr) : stRefs st }) return label' -- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: WriterOptions -> [Inline] -> MD Doc +inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc inlineListToMarkdown opts lst = do inlist <- asks envInList go (if inlist then avoidBadWrapsInList lst else lst) @@ -866,7 +878,7 @@ isRight (Right _) = True isRight (Left _) = False -- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: WriterOptions -> Inline -> MD Doc +inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m Doc inlineToMarkdown opts (Span attrs ils) = do plain <- asks envPlain contents <- inlineListToMarkdown opts ils @@ -940,10 +952,14 @@ inlineToMarkdown opts (SmallCaps lst) = do else inlineListToMarkdown opts $ capitalize lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ "‘" <> contents <> "’" + return $ if isEnabled Ext_smart opts + then "'" <> contents <> "'" + else "‘" <> contents <> "’" inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ "“" <> contents <> "”" + return $ if isEnabled Ext_smart opts + then "\"" <> contents <> "\"" + else "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = do let tickGroups = filter (\s -> '`' `elem` s) $ group str let longest = if null tickGroups @@ -960,9 +976,13 @@ inlineToMarkdown opts (Code attr str) = do else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs inlineToMarkdown opts (Str str) = do isPlain <- asks envPlain - if isPlain - then return $ text str - else return $ text $ escapeString opts str + let str' = (if isEnabled Ext_smart opts + then unsmartify opts + else id) $ + if isPlain + then str + else escapeString opts str + return $ text str' inlineToMarkdown opts (Math InlineMath str) = case writerHTMLMathMethod opts of WebTeX url -> @@ -976,9 +996,9 @@ inlineToMarkdown opts (Math InlineMath str) = return $ "\\\\(" <> text str <> "\\\\)" | otherwise -> do plain <- asks envPlain - inlineListToMarkdown opts $ - (if plain then makeMathPlainer else id) $ - texMathToInlines InlineMath str + texMathToInlines InlineMath str >>= + inlineListToMarkdown opts . + (if plain then makeMathPlainer else id) inlineToMarkdown opts (Math DisplayMath str) = case writerHTMLMathMethod opts of WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` @@ -991,7 +1011,7 @@ inlineToMarkdown opts (Math DisplayMath str) = | isEnabled Ext_tex_math_double_backslash opts -> return $ "\\\\[" <> text str <> "\\\\]" | otherwise -> (\x -> cr <> x <> cr) `fmap` - inlineListToMarkdown opts (texMathToInlines DisplayMath str) + (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) inlineToMarkdown opts (RawInline f str) = do plain <- asks envPlain if not plain && @@ -1052,7 +1072,7 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - return $ text $ trim $ writeHtmlString def $ Pandoc nullMeta [Plain [lnk]] + (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) | otherwise = do plain <- asks envPlain linktext <- inlineListToMarkdown opts txt @@ -1091,7 +1111,7 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - return $ text $ trim $ writeHtmlString def $ Pandoc nullMeta [Plain [img]] + (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [img]]) | otherwise = do plain <- asks envPlain let txt = if null alternate || alternate == [Str source] @@ -1115,3 +1135,16 @@ makeMathPlainer = walk go where go (Emph xs) = Span nullAttr xs go x = x + +unsmartify :: WriterOptions -> String -> String +unsmartify opts ('\8217':xs) = '\'' : unsmartify opts xs +unsmartify opts ('\8230':xs) = "..." ++ unsmartify opts xs +unsmartify opts ('\8211':xs) + | isEnabled Ext_old_dashes opts = '-' : unsmartify opts xs + | otherwise = "--" ++ unsmartify opts xs +unsmartify opts ('\8212':xs) + | isEnabled Ext_old_dashes opts = "--" ++ unsmartify opts xs + | otherwise = "---" ++ unsmartify opts xs +unsmartify opts (x:xs) = x : unsmartify opts xs +unsmartify _ [] = [] + diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs new file mode 100644 index 000000000..b959ce972 --- /dev/null +++ b/src/Text/Pandoc/Writers/Math.hs @@ -0,0 +1,49 @@ +module Text.Pandoc.Writers.Math + ( texMathToInlines + , convertMath + ) +where + +import Text.Pandoc.Class +import Text.Pandoc.Definition +import Text.TeXMath (Exp, writePandoc, DisplayType(..), readTeX) + +-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. +-- Defaults to raw formula between @$@ or @$$@ characters if entire formula +-- can't be converted. +texMathToInlines :: PandocMonad m + => MathType + -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> m [Inline] +texMathToInlines mt inp = do + res <- convertMath writePandoc mt inp + case res of + Right (Just ils) -> return ils + Right (Nothing) -> do + warning $ "Could not render TeX math as unicode, rendering as raw TeX:\n" ++ inp + return [mkFallback mt inp] + Left il -> return [il] + +mkFallback :: MathType -> String -> Inline +mkFallback mt str = Str (delim ++ str ++ delim) + where delim = case mt of + DisplayMath -> "$$" + InlineMath -> "$" + +-- | Converts a raw TeX math formula using a writer function, +-- issuing a warning and producing a fallback (a raw string) +-- on failure. +convertMath :: PandocMonad m + => (DisplayType -> [Exp] -> a) -> MathType -> String + -> m (Either Inline a) +convertMath writer mt str = do + case writer dt <$> readTeX str of + Right r -> return (Right r) + Left e -> do + warning $ "Could not convert TeX math, rendering as raw TeX:\n" ++ + str ++ "\n" ++ e + return (Left $ mkFallback mt str) + where dt = case mt of + DisplayMath -> DisplayBlock + InlineMath -> DisplayInline + diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 78d4651e7..dc6206e6c 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -42,6 +42,7 @@ import Data.List ( intersect, intercalate ) import Network.URI ( isURI ) import Control.Monad.Reader import Control.Monad.State +import Text.Pandoc.Class (PandocMonad) data WriterState = WriterState { stNotes :: Bool -- True if there are notes @@ -57,8 +58,8 @@ data WriterReader = WriterReader { type MediaWikiWriter = ReaderT WriterReader (State WriterState) -- | Convert Pandoc to MediaWiki. -writeMediaWiki :: WriterOptions -> Pandoc -> String -writeMediaWiki opts document = +writeMediaWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeMediaWiki opts document = return $ let initialState = WriterState { stNotes = False, stOptions = opts } env = WriterReader { options = opts, listLevel = [], useTags = False } in evalState (runReaderT (pandocToMediaWiki document) env) initialState @@ -402,7 +403,7 @@ inlineToMediaWiki (RawInline f str) | f == Format "html" = return str | otherwise = return "" -inlineToMediaWiki (LineBreak) = return "<br />\n" +inlineToMediaWiki LineBreak = return "<br />\n" inlineToMediaWiki SoftBreak = do wrapText <- gets (writerWrapText . stOptions) diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 87e23aeeb..2421fd94d 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -34,6 +34,7 @@ import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) import Data.List ( intersperse ) import Text.Pandoc.Definition import Text.Pandoc.Pretty +import Text.Pandoc.Class (PandocMonad) prettyList :: [Doc] -> Doc prettyList ds = @@ -66,8 +67,8 @@ prettyBlock (Div attr blocks) = prettyBlock block = text $ show block -- | Prettyprint Pandoc document. -writeNative :: WriterOptions -> Pandoc -> String -writeNative opts (Pandoc meta blocks) = +writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeNative opts (Pandoc meta blocks) = return $ let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index ce4d456a3..5672719f9 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -29,7 +29,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where -import Data.IORef import Data.List ( isPrefixOf ) import Data.Maybe ( fromMaybe ) import Text.XML.Light.Output @@ -38,40 +37,59 @@ import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) -import Text.Pandoc.Shared ( stringify, fetchItem', warn, - getDefaultReferenceODT ) +import Text.Pandoc.Shared ( stringify ) import Text.Pandoc.ImageSize import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared ( fixDisplayMath ) import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) -import Control.Monad (liftM) +import Control.Monad.State +import Control.Monad.Except (runExceptT) +import Text.Pandoc.Error (PandocError) import Text.Pandoc.XML import Text.Pandoc.Pretty -import qualified Control.Exception as E -import Data.Time.Clock.POSIX ( getPOSIXTime ) import System.FilePath ( takeExtension, takeDirectory, (<.>)) +import Text.Pandoc.Class ( PandocMonad ) +import qualified Text.Pandoc.Class as P + +data ODTState = ODTState { stEntries :: [Entry] + } + +type O m = StateT ODTState m -- | Produce an ODT file from a Pandoc document. -writeODT :: WriterOptions -- ^ Writer options +writeODT :: PandocMonad m + => WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO B.ByteString -writeODT opts doc@(Pandoc meta _) = do + -> m B.ByteString +writeODT opts doc = + let initState = ODTState{ stEntries = [] + } + in + evalStateT (pandocToODT opts doc) initState + +-- | Produce an ODT file from a Pandoc document. +pandocToODT :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> O m B.ByteString +pandocToODT opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts let title = docTitle meta refArchive <- - case writerReferenceODT opts of - Just f -> liftM toArchive $ B.readFile f - Nothing -> getDefaultReferenceODT datadir + case writerReferenceDoc opts of + Just f -> liftM toArchive $ lift $ P.readFileLazy f + Nothing -> lift $ (toArchive . B.fromStrict) <$> + P.readDataFile datadir "reference.odt" -- handle formulas and pictures - picEntriesRef <- newIORef ([] :: [Entry]) - doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc - let newContents = writeOpenDocument opts{writerWrapText = WrapNone} doc' - epochtime <- floor `fmap` getPOSIXTime + -- picEntriesRef <- P.newIORef ([] :: [Entry]) + doc' <- walkM (transformPicMath opts) $ walk fixDisplayMath doc + newContents <- lift $ writeOpenDocument opts{writerWrapText = WrapNone} doc' + epochtime <- floor `fmap` (lift P.getPOSIXTime) let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents - picEntries <- readIORef picEntriesRef + picEntries <- gets stEntries let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive @@ -126,18 +144,18 @@ writeODT opts doc@(Pandoc meta _) = do return $ fromArchive archive'' -- | transform both Image and Math elements -transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPicMath opts entriesRef (Image attr@(id', cls, _) lab (src,t)) = do - res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src +transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline +transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = do + res <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src case res of - Left (_ :: E.SomeException) -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." + Left (_ :: PandocError) -> do + lift $ P.warning $ "Could not find image `" ++ src ++ "', skipping..." return $ Emph lab Right (img, mbMimeType) -> do (ptX, ptY) <- case imageSize img of Right s -> return $ sizeInPoints s Left msg -> do - warn $ "Could not determine image size in `" ++ + lift $ P.warning $ "Could not determine image size in `" ++ src ++ "': " ++ msg return (100, 100) let dims = @@ -155,28 +173,28 @@ transformPicMath opts entriesRef (Image attr@(id', cls, _) lab (src,t)) = do Just dim -> Just $ Inch $ inInch opts dim Nothing -> Nothing let newattr = (id', cls, dims) - entries <- readIORef entriesRef + entries <- gets stEntries let extension = fromMaybe (takeExtension $ takeWhile (/='?') src) (mbMimeType >>= extensionFromMimeType) let newsrc = "Pictures/" ++ show (length entries) <.> extension let toLazy = B.fromChunks . (:[]) - epochtime <- floor `fmap` getPOSIXTime + epochtime <- floor `fmap` (lift P.getPOSIXTime) let entry = toEntry newsrc epochtime $ toLazy img - modifyIORef entriesRef (entry:) + modify $ \st -> st{ stEntries = entry : entries } return $ Image newattr lab (newsrc, t) -transformPicMath _ entriesRef (Math t math) = do - entries <- readIORef entriesRef +transformPicMath _ (Math t math) = do + entries <- gets stEntries let dt = if t == InlineMath then DisplayInline else DisplayBlock case writeMathML dt <$> readTeX math of Left _ -> return $ Math t math Right r -> do let conf = useShortEmptyTags (const False) defaultConfigPP let mathml = ppcTopElement conf r - epochtime <- floor `fmap` getPOSIXTime + epochtime <- floor `fmap` (lift $ P.getPOSIXTime) let dirname = "Formula-" ++ show (length entries) ++ "/" let fname = dirname ++ "content.xml" let entry = toEntry fname epochtime (fromStringLazy mathml) - modifyIORef entriesRef (entry:) + modify $ \st -> st{ stEntries = entry : entries } return $ RawInline (Format "opendocument") $ render Nothing $ inTags False "draw:frame" [("text:anchor-type", if t == DisplayMath @@ -189,4 +207,4 @@ transformPicMath _ entriesRef (Math t math) = do , ("xlink:show", "embed") , ("xlink:actuate", "onLoad")] -transformPicMath _ _ x = return x +transformPicMath _ x = return x diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 20c2c5cbc..bc0cfc300 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -35,34 +35,37 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Writers.HTML (writeHtml5String) import Text.Pandoc.Writers.Markdown (writeMarkdown) import Text.Pandoc.Pretty import Text.Pandoc.Compat.Time import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Error +import Control.Monad.Except (throwError) +import Text.Pandoc.Class (PandocMonad) -- | Convert Pandoc document to string in OPML format. -writeOPML :: WriterOptions -> Pandoc -> String -writeOPML opts (Pandoc meta blocks) = +writeOPML :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeOPML opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing meta' = B.setMeta "date" (B.str $ convertDate $ docDate meta) meta - Just metadata = metaToJSON opts - (Just . writeMarkdown def . Pandoc nullMeta) - (Just . trimr . writeMarkdown def . Pandoc nullMeta . - (\ils -> [Plain ils])) - meta' - main = render colwidth $ vcat (map (elementToOPML opts) elements) - context = defField "body" main metadata - in case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + metadata <- metaToJSON opts + (writeMarkdown def . Pandoc nullMeta) + (\ils -> trimr <$> (writeMarkdown def $ Pandoc nullMeta [Plain ils])) + meta' + main <- (render colwidth . vcat) <$> (mapM (elementToOPML opts) elements) + let context = defField "body" main metadata + return $ case writerTemplate opts of + Nothing -> main + Just tpl -> renderTemplate' tpl context -writeHtmlInlines :: [Inline] -> String -writeHtmlInlines ils = trim $ writeHtmlString def - $ Pandoc nullMeta [Plain ils] + +writeHtmlInlines :: PandocMonad m => [Inline] -> m String +writeHtmlInlines ils = + trim <$> (writeHtml5String def $ Pandoc nullMeta [Plain ils]) -- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT showDateTimeRFC822 :: UTCTime -> String @@ -78,17 +81,23 @@ convertDate ils = maybe "" showDateTimeRFC822 $ defaultTimeLocale "%F" =<< (normalizeDate $ stringify ils) -- | Convert an Element to OPML. -elementToOPML :: WriterOptions -> Element -> Doc -elementToOPML _ (Blk _) = empty -elementToOPML opts (Sec _ _num _ title elements) = - let isBlk (Blk _) = True +elementToOPML :: PandocMonad m => WriterOptions -> Element -> m Doc +elementToOPML _ (Blk _) = return empty +elementToOPML opts (Sec _ _num _ title elements) = do + let isBlk :: Element -> Bool + isBlk (Blk _) = True isBlk _ = False - fromBlk (Blk x) = x - fromBlk _ = error "fromBlk called on non-block" + + fromBlk :: PandocMonad m => Element -> m Block + fromBlk (Blk x) = return x + fromBlk _ = throwError $ PandocSomeError "fromBlk called on non-block" + (blocks, rest) = span isBlk elements - attrs = [("text", writeHtmlInlines title)] ++ - [("_note", writeMarkdown def (Pandoc nullMeta - (map fromBlk blocks))) - | not (null blocks)] - in inTags True "outline" attrs $ - vcat (map (elementToOPML opts) rest) + htmlIls <- writeHtmlInlines title + md <- if null blocks + then return [] + else do blks <- mapM fromBlk blocks + writeMarkdown def $ Pandoc nullMeta blks + let attrs = [("text", htmlIls)] ++ [("_note", md) | not (null blocks)] + o <- mapM (elementToOPML opts) rest + return $ inTags True "outline" attrs $ vcat o diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 8f0e037c5..59470c2f9 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Options import Text.Pandoc.XML import Text.Pandoc.Shared (linesToPara) import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Writers.Math import Text.Pandoc.Pretty import Text.Printf ( printf ) import Control.Arrow ( (***), (>>>) ) @@ -46,6 +46,7 @@ import qualified Data.Map as Map import Text.Pandoc.Writers.Shared import Data.List (sortBy) import Data.Ord (comparing) +import Text.Pandoc.Class (PandocMonad) -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -56,6 +57,8 @@ plainToPara x = x -- OpenDocument writer -- +type OD m = StateT WriterState m + data WriterState = WriterState { stNotes :: [Doc] , stTableStyles :: [Doc] @@ -88,40 +91,40 @@ defaultWriterState = when :: Bool -> Doc -> Doc when p a = if p then a else empty -addTableStyle :: Doc -> State WriterState () +addTableStyle :: PandocMonad m => Doc -> OD m () addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s } -addNote :: Doc -> State WriterState () +addNote :: PandocMonad m => Doc -> OD m () addNote i = modify $ \s -> s { stNotes = i : stNotes s } -addParaStyle :: Doc -> State WriterState () +addParaStyle :: PandocMonad m => Doc -> OD m () addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s } -addTextStyle :: Set.Set TextStyle -> (String, Doc) -> State WriterState () +addTextStyle :: PandocMonad m => Set.Set TextStyle -> (String, Doc) -> OD m () addTextStyle attrs i = modify $ \s -> s { stTextStyles = Map.insert attrs i (stTextStyles s) } -addTextStyleAttr :: TextStyle -> State WriterState () +addTextStyleAttr :: PandocMonad m => TextStyle -> OD m () addTextStyleAttr t = modify $ \s -> s { stTextStyleAttr = Set.insert t (stTextStyleAttr s) } -increaseIndent :: State WriterState () +increaseIndent :: PandocMonad m => OD m () increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s } -resetIndent :: State WriterState () +resetIndent :: PandocMonad m => OD m () resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 } -inTightList :: State WriterState a -> State WriterState a +inTightList :: PandocMonad m => OD m a -> OD m a inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r -> modify (\s -> s { stTight = False }) >> return r -setInDefinitionList :: Bool -> State WriterState () +setInDefinitionList :: PandocMonad m => Bool -> OD m () setInDefinitionList b = modify $ \s -> s { stInDefinition = b } -setFirstPara :: State WriterState () +setFirstPara :: PandocMonad m => OD m () setFirstPara = modify $ \s -> s { stFirstPara = True } -inParagraphTags :: Doc -> State WriterState Doc +inParagraphTags :: PandocMonad m => Doc -> OD m Doc inParagraphTags d | isEmpty d = return empty inParagraphTags d = do b <- gets stFirstPara @@ -137,7 +140,7 @@ inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] inSpanTags :: String -> Doc -> Doc inSpanTags s = inTags False "text:span" [("text:style-name",s)] -withTextStyle :: TextStyle -> State WriterState a -> State WriterState a +withTextStyle :: PandocMonad m => TextStyle -> OD m a -> OD m a withTextStyle s f = do oldTextStyleAttr <- gets stTextStyleAttr addTextStyleAttr s @@ -145,7 +148,7 @@ withTextStyle s f = do modify $ \st -> st{ stTextStyleAttr = oldTextStyleAttr } return res -inTextStyle :: Doc -> State WriterState Doc +inTextStyle :: PandocMonad m => Doc -> OD m Doc inTextStyle d = do at <- gets stTextStyleAttr if Set.null at @@ -166,7 +169,7 @@ inTextStyle d = do return $ inTags False "text:span" [("text:style-name",styleName)] d -inHeaderTags :: Int -> Doc -> State WriterState Doc +inHeaderTags :: PandocMonad m => Int -> Doc -> OD m Doc inHeaderTags i d = return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) , ("text:outline-level", show i)] d @@ -189,13 +192,13 @@ handleSpaces s rm [] = empty -- | Convert Pandoc document to string in OpenDocument format. -writeOpenDocument :: WriterOptions -> Pandoc -> String -writeOpenDocument opts (Pandoc meta blocks) = +writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeOpenDocument opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - render' = render colwidth - ((body, metadata),s) = flip runState + let render' = render colwidth + ((body, metadata),s) <- flip runStateT defaultWriterState $ do m <- metaToJSON opts (fmap (render colwidth) . blocksToOpenDocument opts) @@ -203,38 +206,41 @@ writeOpenDocument opts (Pandoc meta blocks) = meta b <- render' `fmap` blocksToOpenDocument opts blocks return (b, m) - styles = stTableStyles s ++ stParaStyles s ++ + let styles = stTableStyles s ++ stParaStyles s ++ map snd (reverse $ sortBy (comparing fst) $ Map.elems (stTextStyles s)) listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l) - listStyles = map listStyle (stListStyles s) - automaticStyles = vcat $ reverse $ styles ++ listStyles - context = defField "body" body + let listStyles = map listStyle (stListStyles s) + let automaticStyles = vcat $ reverse $ styles ++ listStyles + let context = defField "body" body $ defField "automatic-styles" (render' automaticStyles) $ metadata - in case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl context + return $ case writerTemplate opts of + Nothing -> body + Just tpl -> renderTemplate' tpl context -withParagraphStyle :: WriterOptions -> String -> [Block] -> State WriterState Doc +withParagraphStyle :: PandocMonad m + => WriterOptions -> String -> [Block] -> OD m Doc withParagraphStyle o s (b:bs) | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l | otherwise = go =<< blockToOpenDocument o b where go i = (<>) i <$> withParagraphStyle o s bs withParagraphStyle _ _ [] = return empty -inPreformattedTags :: String -> State WriterState Doc +inPreformattedTags :: PandocMonad m => String -> OD m Doc inPreformattedTags s = do n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")] return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s -orderedListToOpenDocument :: WriterOptions -> Int -> [[Block]] -> State WriterState Doc +orderedListToOpenDocument :: PandocMonad m + => WriterOptions -> Int -> [[Block]] -> OD m Doc orderedListToOpenDocument o pn bs = vcat . map (inTagsIndented "text:list-item") <$> mapM (orderedItemToOpenDocument o pn . map plainToPara) bs -orderedItemToOpenDocument :: WriterOptions -> Int -> [Block] -> State WriterState Doc +orderedItemToOpenDocument :: PandocMonad m + => WriterOptions -> Int -> [Block] -> OD m Doc orderedItemToOpenDocument o n (b:bs) | OrderedList a l <- b = newLevel a l | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l @@ -254,7 +260,8 @@ isTightList (b:_) | Plain {} : _ <- b = True | otherwise = False -newOrderedListStyle :: Bool -> ListAttributes -> State WriterState (Int,Int) +newOrderedListStyle :: PandocMonad m + => Bool -> ListAttributes -> OD m (Int,Int) newOrderedListStyle b a = do ln <- (+) 1 . length <$> gets stListStyles let nbs = orderedListLevelStyle a (ln, []) @@ -262,7 +269,8 @@ newOrderedListStyle b a = do modify $ \s -> s { stListStyles = nbs : stListStyles s } return (ln,pn) -bulletListToOpenDocument :: WriterOptions -> [[Block]] -> State WriterState Doc +bulletListToOpenDocument :: PandocMonad m + => WriterOptions -> [[Block]] -> OD m Doc bulletListToOpenDocument o b = do ln <- (+) 1 . length <$> gets stListStyles (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln @@ -270,11 +278,13 @@ bulletListToOpenDocument o b = do is <- listItemsToOpenDocument ("P" ++ show pn) o b return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is -listItemsToOpenDocument :: String -> WriterOptions -> [[Block]] -> State WriterState Doc +listItemsToOpenDocument :: PandocMonad m + => String -> WriterOptions -> [[Block]] -> OD m Doc listItemsToOpenDocument s o is = vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is -deflistItemToOpenDocument :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState Doc +deflistItemToOpenDocument :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) -> OD m Doc deflistItemToOpenDocument o (t,d) = do let ts = if isTightList d then "Definition_20_Term_20_Tight" else "Definition_20_Term" @@ -284,7 +294,8 @@ deflistItemToOpenDocument o (t,d) = do d' <- liftM vcat $ mapM (withParagraphStyle o ds . (map plainToPara)) d return $ t' $$ d' -inBlockQuote :: WriterOptions -> Int -> [Block] -> State WriterState Doc +inBlockQuote :: PandocMonad m + => WriterOptions -> Int -> [Block] -> OD m Doc inBlockQuote o i (b:bs) | BlockQuote l <- b = do increaseIndent ni <- paraStyle @@ -296,11 +307,11 @@ inBlockQuote o i (b:bs) inBlockQuote _ _ [] = resetIndent >> return empty -- | Convert a list of Pandoc blocks to OpenDocument. -blocksToOpenDocument :: WriterOptions -> [Block] -> State WriterState Doc +blocksToOpenDocument :: PandocMonad m => WriterOptions -> [Block] -> OD m Doc blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. -blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc +blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m Doc blockToOpenDocument o bs | Plain b <- bs = if null b then return empty @@ -370,17 +381,23 @@ blockToOpenDocument o bs captionDoc <- withParagraphStyle o "FigureCaption" [Para caption] return $ imageDoc $$ captionDoc -colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc +colHeadsToOpenDocument :: PandocMonad m + => WriterOptions -> String -> [String] -> [[Block]] + -> OD m Doc colHeadsToOpenDocument o tn ns hs = inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o tn) (zip ns hs) -tableRowToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc +tableRowToOpenDocument :: PandocMonad m + => WriterOptions -> String -> [String] -> [[Block]] + -> OD m Doc tableRowToOpenDocument o tn ns cs = inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o tn) (zip ns cs) -tableItemToOpenDocument :: WriterOptions -> String -> (String,[Block])-> State WriterState Doc +tableItemToOpenDocument :: PandocMonad m + => WriterOptions -> String -> (String,[Block]) + -> OD m Doc tableItemToOpenDocument o tn (n,i) = let a = [ ("table:style-name" , tn ++ ".A1" ) , ("office:value-type", "string" ) @@ -389,10 +406,10 @@ tableItemToOpenDocument o tn (n,i) = withParagraphStyle o n (map plainToPara i) -- | Convert a list of inline elements to OpenDocument. -inlinesToOpenDocument :: WriterOptions -> [Inline] -> State WriterState Doc +inlinesToOpenDocument :: PandocMonad m => WriterOptions -> [Inline] -> OD m Doc inlinesToOpenDocument o l = hcat <$> toChunks o l -toChunks :: WriterOptions -> [Inline] -> State WriterState [Doc] +toChunks :: PandocMonad m => WriterOptions -> [Inline] -> OD m [Doc] toChunks _ [] = return [] toChunks o (x : xs) | isChunkable x = do @@ -413,7 +430,7 @@ isChunkable SoftBreak = True isChunkable _ = False -- | Convert an inline element to OpenDocument. -inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc +inlineToOpenDocument :: PandocMonad m => WriterOptions -> Inline -> OD m Doc inlineToOpenDocument o ils = case ils of Space -> return space @@ -432,7 +449,8 @@ inlineToOpenDocument o ils SmallCaps l -> withTextStyle SmallC $ inlinesToOpenDocument o l Quoted t l -> inQuotes t <$> inlinesToOpenDocument o l Code _ s -> inlinedCode $ preformatted s - Math t s -> inlinesToOpenDocument o (texMathToInlines t s) + Math t s -> lift (texMathToInlines t s) >>= + inlinesToOpenDocument o Cite _ l -> inlinesToOpenDocument o l RawInline f s -> if f == Format "opendocument" then return $ text s @@ -473,18 +491,18 @@ inlineToOpenDocument o ils addNote nn return nn -bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc])) -bulletListStyle l = - let doStyles i = inTags True "text:list-level-style-bullet" - [ ("text:level" , show (i + 1) ) - , ("text:style-name" , "Bullet_20_Symbols") - , ("style:num-suffix", "." ) - , ("text:bullet-char", [bulletList !! i] ) - ] (listLevelStyle (1 + i)) - bulletList = map chr $ cycle [8226,8227,8259] - listElStyle = map doStyles [0..9] - in do pn <- paraListStyle l - return (pn, (l, listElStyle)) +bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc])) +bulletListStyle l = do + let doStyles i = inTags True "text:list-level-style-bullet" + [ ("text:level" , show (i + 1) ) + , ("text:style-name" , "Bullet_20_Symbols") + , ("style:num-suffix", "." ) + , ("text:bullet-char", [bulletList !! i] ) + ] (listLevelStyle (1 + i)) + bulletList = map chr $ cycle [8226,8227,8259] + listElStyle = map doStyles [0..9] + pn <- paraListStyle l + return (pn, (l, listElStyle)) orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc]) orderedListLevelStyle (s,n, d) (l,ls) = @@ -538,10 +556,10 @@ tableStyle num wcs = columnStyles = map colStyle wcs in table $$ vcat columnStyles $$ cellStyle -paraStyle :: [(String,String)] -> State WriterState Int +paraStyle :: PandocMonad m => [(String,String)] -> OD m Int paraStyle attrs = do pn <- (+) 1 . length <$> gets stParaStyles - i <- (*) 0.5 . fromIntegral <$> gets stIndentPara :: State WriterState Double + i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara b <- gets stInDefinition t <- gets stTight let styleAttr = [ ("style:name" , "P" ++ show pn) @@ -562,7 +580,7 @@ paraStyle attrs = do addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps return pn -paraListStyle :: Int -> State WriterState Int +paraListStyle :: PandocMonad m => Int -> OD m Int paraListStyle l = paraStyle [("style:parent-style-name","Text_20_body") ,("style:list-style-name", "L" ++ show l )] diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 4302459cc..09c924397 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -42,6 +42,7 @@ import Text.Pandoc.Templates (renderTemplate') import Data.Char ( isAlphaNum, toLower ) import Data.List ( isPrefixOf, intersect, intersperse, partition, transpose ) import Control.Monad.State +import Text.Pandoc.Class (PandocMonad) data WriterState = WriterState { stNotes :: [[Block]] @@ -52,8 +53,8 @@ data WriterState = } -- | Convert Pandoc to Org. -writeOrg :: WriterOptions -> Pandoc -> String -writeOrg opts document = +writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeOrg opts document = return $ let st = WriterState { stNotes = [], stLinks = False, stImages = False, stHasMath = False, stOptions = opts } @@ -349,7 +350,7 @@ inlineToOrg (RawInline f@(Format f') str) = return $ if isRawFormat f then text str else "@@" <> text f' <> ":" <> text str <> "@@" -inlineToOrg (LineBreak) = return (text "\\\\" <> cr) +inlineToOrg LineBreak = return (text "\\\\" <> cr) inlineToOrg Space = return space inlineToOrg SoftBreak = do wrapText <- gets (writerWrapText . stOptions) diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 064434483..ee3ecd9cd 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -44,6 +44,7 @@ import Network.URI (isURI) import Text.Pandoc.Pretty import Control.Monad.State import Data.Char (isSpace, toLower) +import Text.Pandoc.Class (PandocMonad) type Refs = [([Inline], Target)] @@ -58,8 +59,8 @@ data WriterState = } -- | Convert Pandoc to RST. -writeRST :: WriterOptions -> Pandoc -> String -writeRST opts document = +writeRST :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeRST opts document = return $ let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, stHasRawTeX = False, stOptions = opts, diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 8f942b4d0..77f01e4a1 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> @@ -27,38 +28,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to RTF (rich text format). -} -module Text.Pandoc.Writers.RTF ( writeRTF, writeRTFWithEmbeddedImages ) where +module Text.Pandoc.Writers.RTF ( writeRTF + ) where import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared -import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Writers.Math import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk +import Text.Pandoc.Class (warning) import Data.List ( isSuffixOf, intercalate ) import Data.Char ( ord, chr, isDigit ) import qualified Data.ByteString as B import qualified Data.Map as M import Text.Printf ( printf ) import Text.Pandoc.ImageSize +import Control.Monad.Except (throwError, runExceptT, lift) +import Text.Pandoc.Error +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P -- | Convert Image inlines into a raw RTF embedded image, read from a file, -- or a MediaBag, or the internet. -- If file not found or filetype not jpeg or png, leave the inline unchanged. -rtfEmbedImage :: WriterOptions -> Inline -> IO Inline +rtfEmbedImage :: PandocMonad m => WriterOptions -> Inline -> m Inline rtfEmbedImage opts x@(Image attr _ (src,_)) = do - result <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src + result <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src case result of Right (imgdata, Just mime) | mime == "image/jpeg" || mime == "image/png" -> do let bytes = map (printf "%02x") $ B.unpack imgdata - let filetype = case mime of - "image/jpeg" -> "\\jpegblip" - "image/png" -> "\\pngblip" - _ -> error "Unknown file type" + filetype <- case mime of + "image/jpeg" -> return "\\jpegblip" + "image/png" -> return "\\pngblip" + _ -> throwError $ PandocSomeError "Unknown file type" sizeSpec <- case imageSize imgdata of Left msg -> do - warn $ "Could not determine image size in `" ++ + warning $ "Could not determine image size in `" ++ src ++ "': " ++ msg return "" Right sz -> return $ "\\picw" ++ show xpx ++ @@ -70,56 +77,61 @@ rtfEmbedImage opts x@(Image attr _ (src,_)) = do (xpt, ypt) = desiredSizeInPoints opts attr sz let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++ concat bytes ++ "}" - return $ if B.null imgdata - then x - else RawInline (Format "rtf") raw - _ -> return x + if B.null imgdata + then do + warning $ "Image " ++ src ++ " contained no data, skipping." + return x + else return $ RawInline (Format "rtf") raw + | otherwise -> do + warning $ "Image " ++ src ++ " is not a jpeg or png, skipping." + return x + Right (_, Nothing) -> do + warning $ "Could not determine image type for " ++ src ++ ", skipping." + return x + Left ( e :: PandocError ) -> do + warning $ "Could not fetch image " ++ src ++ "\n" ++ show e + return x rtfEmbedImage _ x = return x --- | Convert Pandoc to a string in rich text format, with --- images embedded as encoded binary data. -writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String -writeRTFWithEmbeddedImages options doc = - writeRTF options `fmap` walkM (rtfEmbedImage options) doc - -- | Convert Pandoc to a string in rich text format. -writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc meta@(Meta metamap) blocks) = +writeRTF :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeRTF options doc = do + -- handle images + Pandoc meta@(Meta metamap) blocks <- walkM (rtfEmbedImage options) doc let spacer = not $ all null $ docTitle meta : docDate meta : docAuthors meta - toPlain (MetaBlocks [Para ils]) = MetaInlines ils + let toPlain (MetaBlocks [Para ils]) = MetaInlines ils toPlain x = x - -- adjust title, author, date so we don't get para inside para - meta' = Meta $ M.adjust toPlain "title" + -- adjust title, author, date so we don't get para inside para + let meta' = Meta $ M.adjust toPlain "title" . M.adjust toPlain "author" . M.adjust toPlain "date" $ metamap - Just metadata = metaToJSON options - (Just . concatMap (blockToRTF 0 AlignDefault)) - (Just . inlineListToRTF) + metadata <- metaToJSON options + (fmap concat . mapM (blockToRTF 0 AlignDefault)) + (inlinesToRTF) meta' - body = concatMap (blockToRTF 0 AlignDefault) blocks - isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options + body <- blocksToRTF 0 AlignDefault blocks + let isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options isTOCHeader _ = False - context = defField "body" body + toc <- tableOfContents $ filter isTOCHeader blocks + let context = defField "body" body $ defField "spacer" spacer $ (if writerTableOfContents options - then defField "toc" - (tableOfContents $ filter isTOCHeader blocks) + then defField "toc" toc else id) $ metadata - in case writerTemplate options of + return $ case writerTemplate options of Just tpl -> renderTemplate' tpl context Nothing -> case reverse body of ('\n':_) -> body _ -> body ++ "\n" -- | Construct table of contents from list of header blocks. -tableOfContents :: [Block] -> String -tableOfContents headers = - let contentsTree = hierarchicalize headers - in concatMap (blockToRTF 0 AlignDefault) $ - [Header 1 nullAttr [Str "Contents"], - BulletList (map elementToListItem contentsTree)] +tableOfContents :: PandocMonad m => [Block] -> m String +tableOfContents headers = do + let contents = map elementToListItem $ hierarchicalize headers + blocksToRTF 0 AlignDefault $ + [Header 1 nullAttr [Str "Contents"], BulletList contents] elementToListItem :: Element -> [Block] elementToListItem (Blk _) = [] @@ -221,66 +233,81 @@ orderedMarkers indent (start, style, delim) = _ -> orderedListMarkers (start, LowerAlpha, Period) else orderedListMarkers (start, style, delim) +blocksToRTF :: PandocMonad m + => Int + -> Alignment + -> [Block] + -> m String +blocksToRTF indent align = fmap concat . mapM (blockToRTF indent align) + -- | Convert Pandoc block element to RTF. -blockToRTF :: Int -- ^ indent level +blockToRTF :: PandocMonad m + => Int -- ^ indent level -> Alignment -- ^ alignment -> Block -- ^ block to convert - -> String -blockToRTF _ _ Null = "" + -> m String +blockToRTF _ _ Null = return "" blockToRTF indent alignment (Div _ bs) = - concatMap (blockToRTF indent alignment) bs + blocksToRTF indent alignment bs blockToRTF indent alignment (Plain lst) = - rtfCompact indent 0 alignment $ inlineListToRTF lst + rtfCompact indent 0 alignment <$> inlinesToRTF lst blockToRTF indent alignment (Para lst) = - rtfPar indent 0 alignment $ inlineListToRTF lst + rtfPar indent 0 alignment <$> inlinesToRTF lst blockToRTF indent alignment (LineBlock lns) = blockToRTF indent alignment $ linesToPara lns blockToRTF indent alignment (BlockQuote lst) = - concatMap (blockToRTF (indent + indentIncrement) alignment) lst + blocksToRTF (indent + indentIncrement) alignment lst blockToRTF indent _ (CodeBlock _ str) = - rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) + return $ rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) blockToRTF _ _ (RawBlock f str) - | f == Format "rtf" = str - | otherwise = "" -blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ - concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ - zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ - concatMap (definitionListItemToRTF alignment indent) lst -blockToRTF indent _ HorizontalRule = + | f == Format "rtf" = return str + | otherwise = return "" +blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . concat) <$> + mapM (listItemToRTF alignment indent (bulletMarker indent)) lst +blockToRTF indent alignment (OrderedList attribs lst) = + (spaceAtEnd . concat) <$> + mapM (\(x,y) -> listItemToRTF alignment indent x y) + (zip (orderedMarkers indent attribs) lst) +blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . concat) <$> + mapM (definitionListItemToRTF alignment indent) lst +blockToRTF indent _ HorizontalRule = return $ rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" -blockToRTF indent alignment (Header level _ lst) = rtfPar indent 0 alignment $ - "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst -blockToRTF indent alignment (Table caption aligns sizes headers rows) = - (if all null headers - then "" - else tableRowToRTF True indent aligns sizes headers) ++ - concatMap (tableRowToRTF False indent aligns sizes) rows ++ - rtfPar indent 0 alignment (inlineListToRTF caption) +blockToRTF indent alignment (Header level _ lst) = do + contents <- inlinesToRTF lst + return $ rtfPar indent 0 alignment $ + "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ contents +blockToRTF indent alignment (Table caption aligns sizes headers rows) = do + caption' <- inlinesToRTF caption + header' <- if all null headers + then return "" + else tableRowToRTF True indent aligns sizes headers + rows' <- concat <$> mapM (tableRowToRTF False indent aligns sizes) rows + return $ header' ++ rows' ++ rtfPar indent 0 alignment caption' -tableRowToRTF :: Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> String -tableRowToRTF header indent aligns sizes' cols = +tableRowToRTF :: PandocMonad m + => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m String +tableRowToRTF header indent aligns sizes' cols = do let totalTwips = 6 * 1440 -- 6 inches - sizes = if all (== 0) sizes' + let sizes = if all (== 0) sizes' then take (length cols) $ repeat (1.0 / fromIntegral (length cols)) else sizes' - columns = concat $ zipWith (tableItemToRTF indent) aligns cols - rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) + columns <- concat <$> mapM (\(x,y) -> tableItemToRTF indent x y) + (zip aligns cols) + let rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) (0 :: Integer) sizes - cellDefs = map (\edge -> (if header + let cellDefs = map (\edge -> (if header then "\\clbrdrb\\brdrs" else "") ++ "\\cellx" ++ show edge) rightEdges - start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ + let start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ "\\trkeep\\intbl\n{\n" - end = "}\n\\intbl\\row}\n" - in start ++ columns ++ end + let end = "}\n\\intbl\\row}\n" + return $ start ++ columns ++ end -tableItemToRTF :: Int -> Alignment -> [Block] -> String -tableItemToRTF indent alignment item = - let contents = concatMap (blockToRTF indent alignment) item - in "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" +tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m String +tableItemToRTF indent alignment item = do + contents <- blocksToRTF indent alignment item + return $ "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. @@ -291,73 +318,92 @@ spaceAtEnd str = else str -- | Convert list item (list of blocks) to RTF. -listItemToRTF :: Alignment -- ^ alignment +listItemToRTF :: PandocMonad m + => Alignment -- ^ alignment -> Int -- ^ indent level -> String -- ^ list start marker -> [Block] -- ^ list item (list of blocks) - -> [Char] -listItemToRTF alignment indent marker [] = + -> m String +listItemToRTF alignment indent marker [] = return $ rtfCompact (indent + listIncrement) (0 - listIncrement) alignment (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") -listItemToRTF alignment indent marker list = - let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list - listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++ - show listIncrement ++ "\\tab" - insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = +listItemToRTF alignment indent marker list = do + (first:rest) <- mapM (blockToRTF (indent + listIncrement) alignment) list + let listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ + "\\tx" ++ show listIncrement ++ "\\tab" + let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = listMarker ++ dropWhile isDigit xs insertListMarker ('\\':'f':'i':d:xs) | isDigit d = listMarker ++ dropWhile isDigit xs insertListMarker (x:xs) = x : insertListMarker xs insertListMarker [] = [] - -- insert the list marker into the (processed) first block - in insertListMarker first ++ concat rest + -- insert the list marker into the (processed) first block + return $ insertListMarker first ++ concat rest -- | Convert definition list item (label, list of blocks) to RTF. -definitionListItemToRTF :: Alignment -- ^ alignment +definitionListItemToRTF :: PandocMonad m + => Alignment -- ^ alignment -> Int -- ^ indent level -> ([Inline],[[Block]]) -- ^ list item (list of blocks) - -> [Char] -definitionListItemToRTF alignment indent (label, defs) = - let labelText = blockToRTF indent alignment (Plain label) - itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) $ - concat defs - in labelText ++ itemsText + -> m String +definitionListItemToRTF alignment indent (label, defs) = do + labelText <- blockToRTF indent alignment (Plain label) + itemsText <- blocksToRTF (indent + listIncrement) alignment (concat defs) + return $ labelText ++ itemsText -- | Convert list of inline items to RTF. -inlineListToRTF :: [Inline] -- ^ list of inlines to convert - -> String -inlineListToRTF lst = concatMap inlineToRTF lst +inlinesToRTF :: PandocMonad m + => [Inline] -- ^ list of inlines to convert + -> m String +inlinesToRTF lst = concat <$> mapM inlineToRTF lst -- | Convert inline item to RTF. -inlineToRTF :: Inline -- ^ inline to convert - -> String -inlineToRTF (Span _ lst) = inlineListToRTF lst -inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Quoted SingleQuote lst) = - "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" -inlineToRTF (Quoted DoubleQuote lst) = - "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" -inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" -inlineToRTF (Str str) = stringToRTF str -inlineToRTF (Math t str) = inlineListToRTF $ texMathToInlines t str -inlineToRTF (Cite _ lst) = inlineListToRTF lst +inlineToRTF :: PandocMonad m + => Inline -- ^ inline to convert + -> m String +inlineToRTF (Span _ lst) = inlinesToRTF lst +inlineToRTF (Emph lst) = do + contents <- inlinesToRTF lst + return $ "{\\i " ++ contents ++ "}" +inlineToRTF (Strong lst) = do + contents <- inlinesToRTF lst + return $ "{\\b " ++ contents ++ "}" +inlineToRTF (Strikeout lst) = do + contents <- inlinesToRTF lst + return $ "{\\strike " ++ contents ++ "}" +inlineToRTF (Superscript lst) = do + contents <- inlinesToRTF lst + return $ "{\\super " ++ contents ++ "}" +inlineToRTF (Subscript lst) = do + contents <- inlinesToRTF lst + return $ "{\\sub " ++ contents ++ "}" +inlineToRTF (SmallCaps lst) = do + contents <- inlinesToRTF lst + return $ "{\\scaps " ++ contents ++ "}" +inlineToRTF (Quoted SingleQuote lst) = do + contents <- inlinesToRTF lst + return $ "\\u8216'" ++ contents ++ "\\u8217'" +inlineToRTF (Quoted DoubleQuote lst) = do + contents <- inlinesToRTF lst + return $ "\\u8220\"" ++ contents ++ "\\u8221\"" +inlineToRTF (Code _ str) = return $ "{\\f1 " ++ (codeStringToRTF str) ++ "}" +inlineToRTF (Str str) = return $ stringToRTF str +inlineToRTF (Math t str) = texMathToInlines t str >>= inlinesToRTF +inlineToRTF (Cite _ lst) = inlinesToRTF lst inlineToRTF (RawInline f str) - | f == Format "rtf" = str - | otherwise = "" -inlineToRTF (LineBreak) = "\\line " -inlineToRTF SoftBreak = " " -inlineToRTF Space = " " -inlineToRTF (Link _ text (src, _)) = - "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ - "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" + | f == Format "rtf" = return str + | otherwise = return "" +inlineToRTF (LineBreak) = return "\\line " +inlineToRTF SoftBreak = return " " +inlineToRTF Space = return " " +inlineToRTF (Link _ text (src, _)) = do + contents <- inlinesToRTF text + return $ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ + "\"}}{\\fldrslt{\\ul\n" ++ contents ++ "\n}}}\n" inlineToRTF (Image _ _ (source, _)) = - "{\\cf1 [image: " ++ source ++ "]\\cf0}" -inlineToRTF (Note contents) = - "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" + return $ "{\\cf1 [image: " ++ source ++ "]\\cf0}" +inlineToRTF (Note contents) = do + body <- concat <$> mapM (blockToRTF 0 AlignDefault) contents + return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + body ++ "}" diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index 9bd23ac3b..c589c0c36 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -41,6 +41,7 @@ import Text.Pandoc.Highlighting ( languages, languagesByExtension ) import Text.Pandoc.Pretty import Text.Pandoc.ImageSize import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class ( PandocMonad ) -- | Convert list of authors to a docbook <author> section authorToTEI :: WriterOptions -> [Inline] -> B.Inlines @@ -53,8 +54,8 @@ authorToTEI opts name' = inTagsSimple "author" (text $ escapeStringForXML name) -- | Convert Pandoc document to string in Docbook format. -writeTEI :: WriterOptions -> Pandoc -> String -writeTEI opts (Pandoc meta blocks) = +writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeTEI opts (Pandoc meta blocks) = return $ let elements = hierarchicalize blocks colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index f2b9aa15f..a66ffe88b 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -44,6 +44,9 @@ import Text.Pandoc.ImageSize import Network.URI ( isURI, unEscapeString ) import System.FilePath import qualified Data.Set as Set +import Control.Monad.Except (throwError) +import Text.Pandoc.Error +import Text.Pandoc.Class ( PandocMonad) data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout @@ -59,10 +62,12 @@ data WriterState = - generated .texi files don't work when run through texi2dvi -} +type TI m = StateT WriterState m + -- | Convert Pandoc to Texinfo. -writeTexinfo :: WriterOptions -> Pandoc -> String +writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m String writeTexinfo options document = - evalState (pandocToTexinfo options $ wrapTop document) $ + evalStateT (pandocToTexinfo options $ wrapTop document) $ WriterState { stStrikeout = False, stSuperscript = False, stEscapeComma = False, stSubscript = False, stIdentifiers = Set.empty, stOptions = options} @@ -72,7 +77,7 @@ wrapTop :: Pandoc -> Pandoc wrapTop (Pandoc meta blocks) = Pandoc meta (Header 0 nullAttr (docTitle meta) : blocks) -pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState String +pandocToTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> TI m String pandocToTexinfo options (Pandoc meta blocks) = do let titlePage = not $ all null $ docTitle meta : docDate meta : docAuthors meta @@ -110,7 +115,7 @@ stringToTexinfo = escapeStringUsing texinfoEscapes , ('\x2019', "'") ] -escapeCommas :: State WriterState Doc -> State WriterState Doc +escapeCommas :: PandocMonad m => TI m Doc -> TI m Doc escapeCommas parser = do oldEscapeComma <- gets stEscapeComma modify $ \st -> st{ stEscapeComma = True } @@ -123,8 +128,9 @@ inCmd :: String -> Doc -> Doc inCmd cmd contents = char '@' <> text cmd <> braces contents -- | Convert Pandoc block element to Texinfo. -blockToTexinfo :: Block -- ^ Block to convert - -> State WriterState Doc +blockToTexinfo :: PandocMonad m + => Block -- ^ Block to convert + -> TI m Doc blockToTexinfo Null = return empty @@ -214,23 +220,27 @@ blockToTexinfo (Header 0 _ lst) = do return $ text "@node Top" $$ text "@top " <> txt <> blankline -blockToTexinfo (Header level _ lst) = do - node <- inlineListForNode lst - txt <- inlineListToTexinfo lst - idsUsed <- gets stIdentifiers - let id' = uniqueIdent lst idsUsed - modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } - return $ if (level > 0) && (level <= 4) - then blankline <> text "@node " <> node $$ - text (seccmd level) <> txt $$ - text "@anchor" <> braces (text $ '#':id') - else txt - where - seccmd 1 = "@chapter " - seccmd 2 = "@section " - seccmd 3 = "@subsection " - seccmd 4 = "@subsubsection " - seccmd _ = error "illegal seccmd level" +blockToTexinfo (Header level _ lst) + | level < 1 || level > 4 = blockToTexinfo (Para lst) + | otherwise = do + node <- inlineListForNode lst + txt <- inlineListToTexinfo lst + idsUsed <- gets stIdentifiers + let id' = uniqueIdent lst idsUsed + modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } + sec <- seccmd level + return $ if (level > 0) && (level <= 4) + then blankline <> text "@node " <> node $$ + text sec <> txt $$ + text "@anchor" <> braces (text $ '#':id') + else txt + where + seccmd :: PandocMonad m => Int -> TI m String + seccmd 1 = return "@chapter " + seccmd 2 = return "@section " + seccmd 3 = return "@subsection " + seccmd 4 = return "@subsubsection " + seccmd _ = throwError $ PandocSomeError "illegal seccmd level" blockToTexinfo (Table caption aligns widths heads rows) = do headers <- if all null heads @@ -256,28 +266,32 @@ blockToTexinfo (Table caption aligns widths heads rows) = do inCmd "caption" captionText $$ text "@end float" -tableHeadToTexinfo :: [Alignment] +tableHeadToTexinfo :: PandocMonad m + => [Alignment] -> [[Block]] - -> State WriterState Doc + -> TI m Doc tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem " -tableRowToTexinfo :: [Alignment] +tableRowToTexinfo :: PandocMonad m + => [Alignment] -> [[Block]] - -> State WriterState Doc + -> TI m Doc tableRowToTexinfo = tableAnyRowToTexinfo "@item " -tableAnyRowToTexinfo :: String +tableAnyRowToTexinfo :: PandocMonad m + => String -> [Alignment] -> [[Block]] - -> State WriterState Doc + -> TI m Doc tableAnyRowToTexinfo itemtype aligns cols = zipWithM alignedBlock aligns cols >>= return . (text itemtype $$) . foldl (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty -alignedBlock :: Alignment +alignedBlock :: PandocMonad m + => Alignment -> [Block] - -> State WriterState Doc + -> TI m Doc -- XXX @flushleft and @flushright text won't get word wrapped. Since word -- wrapping is more important than alignment, we ignore the alignment. alignedBlock _ = blockListToTexinfo @@ -292,8 +306,9 @@ alignedBlock _ col = blockListToTexinfo col -} -- | Convert Pandoc block elements to Texinfo. -blockListToTexinfo :: [Block] - -> State WriterState Doc +blockListToTexinfo :: PandocMonad m + => [Block] + -> TI m Doc blockListToTexinfo [] = return empty blockListToTexinfo (x:xs) = do x' <- blockToTexinfo x @@ -335,15 +350,17 @@ collectNodes level (x:xs) = _ -> collectNodes level xs -makeMenuLine :: Block - -> State WriterState Doc +makeMenuLine :: PandocMonad m + => Block + -> TI m Doc makeMenuLine (Header _ _ lst) = do txt <- inlineListForNode lst return $ text "* " <> txt <> text "::" -makeMenuLine _ = error "makeMenuLine called with non-Header block" +makeMenuLine _ = throwError $ PandocSomeError "makeMenuLine called with non-Header block" -listItemToTexinfo :: [Block] - -> State WriterState Doc +listItemToTexinfo :: PandocMonad m + => [Block] + -> TI m Doc listItemToTexinfo lst = do contents <- blockListToTexinfo lst let spacer = case reverse lst of @@ -351,8 +368,9 @@ listItemToTexinfo lst = do _ -> empty return $ text "@item" $$ contents <> spacer -defListItemToTexinfo :: ([Inline], [[Block]]) - -> State WriterState Doc +defListItemToTexinfo :: PandocMonad m + => ([Inline], [[Block]]) + -> TI m Doc defListItemToTexinfo (term, defs) = do term' <- inlineListToTexinfo term let defToTexinfo bs = do d <- blockListToTexinfo bs @@ -363,13 +381,15 @@ defListItemToTexinfo (term, defs) = do return $ text "@item " <> term' $+$ vcat defs' -- | Convert list of inline elements to Texinfo. -inlineListToTexinfo :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListToTexinfo :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> TI m Doc inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat -- | Convert list of inline elements to Texinfo acceptable for a node name. -inlineListForNode :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListForNode :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> TI m Doc inlineListForNode = return . text . stringToTexinfo . filter (not . disallowedInNode) . stringify @@ -378,8 +398,9 @@ disallowedInNode :: Char -> Bool disallowedInNode c = c `elem` (".,:()" :: String) -- | Convert inline element to Texinfo -inlineToTexinfo :: Inline -- ^ Inline to convert - -> State WriterState Doc +inlineToTexinfo :: PandocMonad m + => Inline -- ^ Inline to convert + -> TI m Doc inlineToTexinfo (Span _ lst) = inlineListToTexinfo lst diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index f73876fd2..45f1780cf 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -41,6 +41,7 @@ import Text.Pandoc.XML ( escapeStringForXML ) import Data.List ( intercalate ) import Control.Monad.State import Data.Char ( isSpace ) +import Text.Pandoc.Class ( PandocMonad ) data WriterState = WriterState { stNotes :: [String] -- Footnotes @@ -50,8 +51,8 @@ data WriterState = WriterState { } -- | Convert Pandoc to Textile. -writeTextile :: WriterOptions -> Pandoc -> String -writeTextile opts document = +writeTextile :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeTextile opts document = return $ evalState (pandocToTextile opts document) WriterState { stNotes = [], stListLevel = [], stStartNum = Nothing, stUseTags = False } @@ -435,7 +436,7 @@ inlineToTextile opts (RawInline f str) isEnabled Ext_raw_tex opts = return str | otherwise = return "" -inlineToTextile _ (LineBreak) = return "\n" +inlineToTextile _ LineBreak = return "\n" inlineToTextile _ SoftBreak = return " " diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index 423928c8a..42b168418 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -45,6 +45,7 @@ import Network.URI ( isURI ) import Control.Monad ( zipWithM ) import Control.Monad.State ( modify, State, get, evalState ) --import Control.Monad.Reader ( ReaderT, runReaderT, ask, local ) +import Text.Pandoc.Class ( PandocMonad ) data WriterState = WriterState { stItemNum :: Int, @@ -55,8 +56,8 @@ instance Default WriterState where def = WriterState { stItemNum = 1, stIndent = "" } -- | Convert Pandoc to ZimWiki. -writeZimWiki :: WriterOptions -> Pandoc -> String -writeZimWiki opts document = evalState (pandocToZimWiki opts document) (WriterState 1 "") +writeZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeZimWiki opts document = return $ evalState (pandocToZimWiki opts document) (WriterState 1 "") -- | Return ZimWiki representation of document. pandocToZimWiki :: WriterOptions -> Pandoc -> State WriterState String @@ -317,7 +318,7 @@ inlineToZimWiki opts (RawInline f str) | f == Format "html" = do cont <- indentFromHTML opts str; return cont | otherwise = return "" -inlineToZimWiki _ (LineBreak) = return "\n" -- was \\\\ +inlineToZimWiki _ LineBreak = return "\n" -- was \\\\ inlineToZimWiki opts SoftBreak = case writerWrapText opts of |