diff options
author | Igor Pashev <pashev.igor@gmail.com> | 2021-12-29 15:00:59 +0200 |
---|---|---|
committer | Igor Pashev <pashev.igor@gmail.com> | 2021-12-29 15:00:59 +0200 |
commit | b4361712899fd0183fea5513180cb383979616de (patch) | |
tree | 688ab7ee2ab3a8cd32b4e37b506099aec95388f7 /src/Text | |
parent | 726ad97faebe59e024d68d293e663c02bbe423c8 (diff) | |
parent | d960282b105a6469c760b4308a3b81da723b7256 (diff) | |
download | pandoc-b4361712899fd0183fea5513180cb383979616de.tar.gz |
Merge https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text')
130 files changed, 7194 insertions, 4498 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 98b072ffb..9eb9c2cf3 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE TupleSections #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} @@ -25,9 +26,9 @@ module Text.Pandoc.App ( , applyFilters ) where import qualified Control.Exception as E -import Control.Monad ( (>=>), when ) +import Control.Monad ( (>=>), when, forM_ ) import Control.Monad.Trans ( MonadIO(..) ) -import Control.Monad.Except (throwError) +import Control.Monad.Except (throwError, catchError) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as BL @@ -38,17 +39,20 @@ import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TE +import qualified Data.Text.Encoding as TSE import qualified Data.Text.Encoding.Error as TE import qualified Data.Text.Encoding.Error as TSE import Network.URI (URI (..), parseURI) import System.Directory (doesDirectoryExist) import System.Exit (exitSuccess) -import System.FilePath ( takeBaseName, takeExtension ) +import System.FilePath ( takeBaseName, takeExtension) import System.IO (nativeNewline, stdout) import qualified System.IO as IO (Newline (..)) import Text.Pandoc import Text.Pandoc.Builder (setMeta) -import Text.Pandoc.MIME (getCharset) +import Text.Pandoc.MediaBag (mediaItems) +import Text.Pandoc.MIME (getCharset, MimeType) +import Text.Pandoc.Image (svgToPng) import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, IpynbOutput (..)) @@ -64,6 +68,7 @@ import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, defaultUserDataDir, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString) import Text.Pandoc.Readers.Markdown (yamlToMeta) +import Text.Pandoc.Readers.Custom (readCustom) import qualified Text.Pandoc.UTF8 as UTF8 #ifndef _WINDOWS import System.Posix.IO (stdOutput) @@ -94,40 +99,24 @@ convertWithOpts opts = do let sources = case optInputFiles opts of Just xs | not (optIgnoreArgs opts) -> xs _ -> ["-"] - - let runIO' :: PandocIO a -> IO a - runIO' f = do - (res, reports) <- runIOorExplode $ do - setTrace (optTrace opts) - setVerbosity verbosity - x <- f - rs <- getLog - return (x, rs) - case optLogFile opts of - Nothing -> return () - Just logfile -> BL.writeFile logfile (encodeLogMessages reports) - let isWarning msg = messageVerbosity msg == WARNING - when (optFailIfWarnings opts && any isWarning reports) $ - E.throwIO PandocFailOnWarningError - return res - - let eol = case optEol opts of - CRLF -> IO.CRLF - LF -> IO.LF - Native -> nativeNewline #ifdef _WINDOWS let istty = True #else istty <- liftIO $ queryTerminal stdOutput #endif - runIO' $ do + res <- runIO $ do + + setTrace (optTrace opts) + setVerbosity verbosity setUserDataDir datadir setResourcePath (optResourcePath opts) setInputFiles (fromMaybe ["-"] (optInputFiles opts)) setOutputFile (optOutputFile opts) + inputs <- readSources sources + -- assign reader and writer based on options and filenames readerName <- case optFrom opts of Just f -> return f @@ -151,21 +140,28 @@ convertWithOpts opts = do <> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`." _ -> return () - (reader :: Reader PandocIO, readerExts) <- getReader readerName - - let convertTabs = tabFilter (if optPreserveTabs opts || - readerNameBase == "t2t" || - readerNameBase == "man" - then 0 - else optTabStop opts) - - - let readSources :: [FilePath] -> PandocIO [(FilePath, Text)] - readSources srcs = - mapM (\fp -> do - t <- readSource fp - return (if fp == "-" then "" else fp, convertTabs t)) srcs + let makeSandboxed pureReader = + let files = maybe id (:) (optReferenceDoc opts) . + maybe id (:) (optEpubMetadata opts) . + maybe id (:) (optEpubCoverImage opts) . + maybe id (:) (optCSL opts) . + maybe id (:) (optCitationAbbreviations opts) $ + optEpubFonts opts ++ + optBibliography opts + in case pureReader of + TextReader r -> TextReader $ \o t -> sandbox files (r o t) + ByteStringReader r + -> ByteStringReader $ \o t -> sandbox files (r o t) + + (reader, readerExts) <- + if ".lua" `T.isSuffixOf` readerName + then return (TextReader (readCustom (T.unpack readerName)), mempty) + else if optSandbox opts + then case runPure (getReader readerName) of + Left e -> throwError e + Right (r, rexts) -> return (makeSandboxed r, rexts) + else getReader readerName outputSettings <- optToOutputSettings opts let format = outputFormat outputSettings @@ -224,7 +220,7 @@ convertWithOpts opts = do case optMetadataFiles opts of [] -> return mempty paths -> mconcat <$> - mapM (\path -> do raw <- readFileLazy path + mapM (\path -> do raw <- readFileStrict path yamlToMeta readerOpts (Just path) raw) paths let transforms = (case optShiftHeadingLevelBy opts of @@ -254,20 +250,11 @@ convertWithOpts opts = do _ -> Format format) :)) $ [] - let sourceToDoc :: [FilePath] -> PandocIO Pandoc - sourceToDoc sources' = - case reader of - TextReader r - | readerNameBase == "json" -> - mconcat <$> mapM (readSource >=> r readerOpts) sources' - | optFileScope opts -> - -- Read source and convert tabs (see #6709) - let readSource' = fmap convertTabs . readSource - in mconcat <$> mapM (readSource' >=> r readerOpts) sources' - | otherwise -> - readSources sources' >>= r readerOpts - ByteStringReader r -> - mconcat <$> mapM (readFile' >=> r readerOpts) sources' + let convertTabs = tabFilter (if optPreserveTabs opts || + readerNameBase == "t2t" || + readerNameBase == "man" + then 0 + else optTabStop opts) when (readerNameBase == "markdown_github" || @@ -293,8 +280,25 @@ convertWithOpts opts = do maybe id (setMeta "citation-abbreviations") (optCitationAbbreviations opts) $ mempty - doc <- sourceToDoc sources >>= - ( (if isJust (optExtractMedia opts) + doc <- (case reader of + TextReader r + | readerNameBase == "json" -> + mconcat <$> + mapM (inputToText convertTabs + >=> r readerOpts . (:[])) inputs + | optFileScope opts -> + mconcat <$> mapM + (inputToText convertTabs + >=> r readerOpts . (:[])) + inputs + | otherwise -> mapM (inputToText convertTabs) inputs + >>= r readerOpts + ByteStringReader r -> + mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs) + >>= + ( (if not (optSandbox opts) && + (isJust (optExtractMedia opts) + || writerNameBase == "docx") -- for fallback pngs then fillMediaBag else return) >=> return . adjustMetadata (metadataFromFile <>) @@ -305,14 +309,28 @@ convertWithOpts opts = do >=> maybe return extractMedia (optExtractMedia opts) ) - case writer of - ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile + when (writerNameBase == "docx" && not (optSandbox opts)) $ do + -- create fallback pngs for svgs + items <- mediaItems <$> getMediaBag + forM_ items $ \(fp, mt, bs) -> + case T.takeWhile (/=';') mt of + "image/svg+xml" -> do + res <- svgToPng (writerDpi writerOptions) bs + case res of + Right bs' -> do + let fp' = fp <> ".png" + insertMedia fp' (Just "image/png") bs' + Left e -> report $ CouldNotConvertImage (T.pack fp) (tshow e) + _ -> return () + + output <- case writer of + ByteStringWriter f -> BinaryOutput <$> f writerOptions doc TextWriter f -> case outputPdfProgram outputSettings of Just pdfProg -> do res <- makePDF pdfProg (optPdfEngineOpts opts) f writerOptions doc case res of - Right pdf -> writeFnBinary outputFile pdf + Right pdf -> return $ BinaryOutput pdf Left err' -> throwError $ PandocPDFError $ TL.toStrict (TE.decodeUtf8With TE.lenientDecode err') @@ -321,11 +339,32 @@ convertWithOpts opts = do | standalone = t | T.null t || T.last t /= '\n' = t <> T.singleton '\n' | otherwise = t - output <- ensureNl <$> f writerOptions doc - writerFn eol outputFile =<< - if optSelfContained opts && htmlFormat format - then makeSelfContained output - else return output + textOutput <- ensureNl <$> f writerOptions doc + if optSelfContained opts && htmlFormat format + then TextOutput <$> makeSelfContained textOutput + else return $ TextOutput textOutput + reports <- getLog + return (output, reports) + + case res of + Left e -> E.throwIO e + Right (output, reports) -> do + case optLogFile opts of + Nothing -> return () + Just logfile -> BL.writeFile logfile (encodeLogMessages reports) + let isWarning msg = messageVerbosity msg == WARNING + when (optFailIfWarnings opts && any isWarning reports) $ + E.throwIO PandocFailOnWarningError + let eol = case optEol opts of + CRLF -> IO.CRLF + LF -> IO.LF + Native -> nativeNewline + case output of + TextOutput t -> writerFn eol outputFile t + BinaryOutput bs -> writeFnBinary outputFile bs + +data PandocOutput = TextOutput Text | BinaryOutput BL.ByteString + deriving (Show) type Transform = Pandoc -> Pandoc @@ -344,49 +383,68 @@ adjustMetadata f (Pandoc meta bs) = Pandoc (f meta) bs applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc applyTransforms transforms d = return $ foldr ($) d transforms -readSource :: FilePath -> PandocIO Text -readSource src = case parseURI src of - Just u | uriScheme u `elem` ["http:","https:"] -> - readURI src - | uriScheme u == "file:" -> liftIO $ - readTextFile (uriPathToPath $ T.pack $ uriPath u) - _ -> liftIO $ readTextFile src - where readTextFile :: FilePath -> IO Text - readTextFile fp = do - bs <- if src == "-" - then BS.getContents - else BS.readFile fp - E.catch (return $! UTF8.toText bs) - (\e -> E.throwIO $ case e of - TSE.DecodeError _ (Just w) -> - case BS.elemIndex w bs of - Just offset -> - PandocUTF8DecodingError (T.pack fp) offset w - _ -> PandocUTF8DecodingError (T.pack fp) 0 w - _ -> PandocAppError (tshow e)) - -readURI :: FilePath -> PandocIO Text -readURI src = do - (bs, mt) <- openURL (T.pack src) +readSources :: (PandocMonad m, MonadIO m) + => [FilePath] -> m [(FilePath, (BS.ByteString, Maybe MimeType))] +readSources srcs = + mapM (\fp -> do t <- readSource fp + return (if fp == "-" then "" else fp, t)) srcs + +readSource :: (PandocMonad m, MonadIO m) + => FilePath -> m (BS.ByteString, Maybe MimeType) +readSource "-" = (,Nothing) <$> readStdinStrict +readSource src = + case parseURI src of + Just u | uriScheme u `elem` ["http:","https:"] -> openURL (T.pack src) + | uriScheme u == "file:" -> + (,Nothing) <$> + readFileStrict (uriPathToPath $ T.pack $ uriPath u) + _ -> (,Nothing) <$> readFileStrict src + +utf8ToText :: PandocMonad m => FilePath -> BS.ByteString -> m Text +utf8ToText fp bs = + case TSE.decodeUtf8' . dropBOM $ bs of + Left (TSE.DecodeError _ (Just w)) -> + case BS.elemIndex w bs of + Just offset -> throwError $ PandocUTF8DecodingError (T.pack fp) offset w + Nothing -> throwError $ PandocUTF8DecodingError (T.pack fp) 0 w + Left e -> throwError $ PandocAppError (tshow e) + Right t -> return t + where + dropBOM bs' = + if "\xEF\xBB\xBF" `BS.isPrefixOf` bs' + then BS.drop 3 bs' + else bs' + + +inputToText :: PandocMonad m + => (Text -> Text) + -> (FilePath, (BS.ByteString, Maybe MimeType)) + -> m (FilePath, Text) +inputToText convTabs (fp, (bs,mt)) = + (fp,) . convTabs . T.filter (/='\r') <$> case mt >>= getCharset of - Just "UTF-8" -> return $ UTF8.toText bs + Just "UTF-8" -> utf8ToText fp bs Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs Just charset -> throwError $ PandocUnsupportedCharsetError charset - Nothing -> liftIO $ -- try first as UTF-8, then as latin1 - E.catch (return $! UTF8.toText bs) - (\case - TSE.DecodeError{} -> - return $ T.pack $ B8.unpack bs - e -> E.throwIO e) - -readFile' :: MonadIO m => FilePath -> m BL.ByteString -readFile' "-" = liftIO BL.getContents -readFile' f = liftIO $ BL.readFile f - -writeFnBinary :: MonadIO m => FilePath -> BL.ByteString -> m () -writeFnBinary "-" = liftIO . BL.putStr -writeFnBinary f = liftIO . BL.writeFile (UTF8.encodePath f) - -writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m () -writerFn eol "-" = liftIO . UTF8.putStrWith eol -writerFn eol f = liftIO . UTF8.writeFileWith eol f + Nothing -> catchError + (utf8ToText fp bs) + (\case + PandocUTF8DecodingError{} -> do + report $ NotUTF8Encoded + (if null fp + then "input" + else fp) + return $ T.pack $ B8.unpack bs + e -> throwError e) + +inputToLazyByteString :: (FilePath, (BS.ByteString, Maybe MimeType)) + -> BL.ByteString +inputToLazyByteString (_, (bs,_)) = BL.fromStrict bs + +writeFnBinary :: FilePath -> BL.ByteString -> IO () +writeFnBinary "-" = BL.putStr +writeFnBinary f = BL.writeFile (UTF8.encodePath f) + +writerFn :: IO.Newline -> FilePath -> Text -> IO () +writerFn eol "-" = UTF8.putStrWith eol +writerFn eol f = UTF8.writeFileWith eol f diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index a6df12715..759f8ac35 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -33,10 +33,8 @@ import Data.Bifunctor (second) import Data.Char (toLower) import Data.List (intercalate, sort, foldl') #ifdef _WINDOWS -#if MIN_VERSION_base(4,12,0) import Data.List (isPrefixOf) #endif -#endif import Data.Maybe (fromMaybe, isJust) import Data.Text (Text) import Safe (tailDef) @@ -188,6 +186,11 @@ options = (\opt -> return opt { optFileScope = True })) "" -- "Parse input files before combining" + , Option "" ["sandbox"] + (NoArg + (\opt -> return opt { optSandbox = True })) + "" + , Option "s" ["standalone"] (NoArg (\opt -> return opt { optStandalone = True })) @@ -332,14 +335,8 @@ options = , Option "" ["syntax-definition"] (ReqArg - (\arg opt -> do - let tr c d = map (\x -> if x == c then d else x) - let arg' = case arg of -- see #4836 - -- HXT confuses Windows path with URI - _:':':'\\':_ -> - "file:///" ++ tr '\\' '/' arg - _ -> normalizePath arg - return opt{ optSyntaxDefinitions = arg' : + (\arg opt -> + return opt{ optSyntaxDefinitions = normalizePath arg : optSyntaxDefinitions opt }) "FILE") "" -- "Syntax definition (xml) file" @@ -576,10 +573,10 @@ options = (ReqArg (\arg opt -> case safeStrRead arg of - Just t | t >= 1 && t <= 6 -> + Just t | t >= 0 && t <= 6 -> return opt { optSlideLevel = Just t } _ -> E.throwIO $ PandocOptionError - "slide level must be a number between 1 and 6") + "slide level must be a number between 0 and 6") "NUMBER") "" -- "Force header level for slides" @@ -1079,7 +1076,6 @@ readMetaValue s -- beginning with \\ to \\?\UNC\. -- See #5127. normalizePath :: FilePath -> FilePath #ifdef _WINDOWS -#if MIN_VERSION_base(4,12,0) normalizePath fp = if "\\\\" `isPrefixOf` fp && not ("\\\\?\\" `isPrefixOf` fp) then "\\\\?\\UNC\\" ++ drop 2 fp @@ -1087,6 +1083,3 @@ normalizePath fp = #else normalizePath = id #endif -#else -normalizePath = id -#endif diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs index bdf8c6667..e5fe7ad81 100644 --- a/src/Text/Pandoc/App/FormatHeuristics.hs +++ b/src/Text/Pandoc/App/FormatHeuristics.hs @@ -54,6 +54,7 @@ formatFromFilePath x = ".lhs" -> Just "markdown+lhs" ".ltx" -> Just "latex" ".markdown" -> Just "markdown" + ".markua" -> Just "markua" ".mkdn" -> Just "markdown" ".mkd" -> Just "markdown" ".mdwn" -> Just "markdown" @@ -74,7 +75,6 @@ formatFromFilePath x = ".s5" -> Just "s5" ".t2t" -> Just "t2t" ".tei" -> Just "tei" - ".tei.xml" -> Just "tei" ".tex" -> Just "latex" ".texi" -> Just "texinfo" ".texinfo" -> Just "texinfo" diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index d54d932b7..c5fac7951 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -29,7 +29,7 @@ import Control.Monad.Except (MonadIO, liftIO, throwError, (>=>), foldM) import Control.Monad.State.Strict (StateT, modify, gets) import System.FilePath ( addExtension, (</>), takeExtension, takeDirectory ) import System.Directory ( canonicalizePath ) -import Data.Char (isLower, toLower) +import Data.Char (toLower) import Data.Maybe (fromMaybe) import GHC.Generics hiding (Meta) import Text.Pandoc.Filter (Filter (..)) @@ -40,11 +40,10 @@ import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault), ReferenceLocation (EndOfDocument), ObfuscationMethod (NoObfuscation), CiteMethod (Citeproc)) -import Text.Pandoc.Class (readFileLazy, fileExists, setVerbosity, report, +import Text.Pandoc.Class (readFileStrict, fileExists, setVerbosity, report, PandocMonad(lookupEnv), getUserDataDir) import Text.Pandoc.Error (PandocError (PandocParseError, PandocSomeError)) -import Text.Pandoc.Shared (camelCaseStrToHyphenated, defaultUserDataDir, - findM, ordNub) +import Text.Pandoc.Shared (defaultUserDataDir, findM, ordNub) import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Readers.Metadata (yamlMap) import Text.Pandoc.Class.PandocPure @@ -54,21 +53,18 @@ import Data.Default (def) import qualified Data.Text as T import qualified Data.Map as M import Text.Pandoc.Definition (Meta(..), MetaValue(..)) -import Data.Aeson (defaultOptions, Options(..)) +import Data.Aeson (defaultOptions, Options(..), Result(..), fromJSON, camelTo2) import Data.Aeson.TH (deriveJSON) import Control.Applicative ((<|>)) -import Data.YAML +import Data.Yaml -- | The type of line-endings to be used when writing plain-text. data LineEnding = LF | CRLF | Native deriving (Show, Generic) -instance FromYAML LineEnding where - parseYAML = withStr "LineEnding" $ \t -> - case T.toLower t of - "lf" -> return LF - "crlf" -> return CRLF - "native" -> return Native - _ -> fail $ "Unknown line ending type " ++ show t +-- see https://github.com/jgm/pandoc/pull/4083 +-- using generic deriving caused long compilation times +$(deriveJSON + defaultOptions{ constructorTagModifier = map toLower } ''LineEnding) -- | How to handle output blocks in ipynb. data IpynbOutput = @@ -77,13 +73,8 @@ data IpynbOutput = | IpynbOutputBest deriving (Show, Generic) -instance FromYAML IpynbOutput where - parseYAML = withStr "LineEnding" $ \t -> - case t of - "none" -> return IpynbOutputNone - "all" -> return IpynbOutputAll - "best" -> return IpynbOutputBest - _ -> fail $ "Unknown ipynb output type " ++ show t +$(deriveJSON + defaultOptions{ fieldLabelModifier = map toLower . drop 11 } ''IpynbOutput) -- | Data structure for command line options. data Opt = Opt @@ -160,11 +151,18 @@ data Opt = Opt , optCSL :: Maybe FilePath -- ^ CSL stylesheet , optBibliography :: [FilePath] -- ^ Bibliography files , optCitationAbbreviations :: Maybe FilePath -- ^ Citation abbreviations + , optSandbox :: Bool } deriving (Generic, Show) -instance FromYAML (Opt -> Opt) where - parseYAML (Mapping _ _ m) = chain doOpt (M.toList m) - parseYAML n = failAtNode n "Expected a mapping" +$(deriveJSON + defaultOptions{ fieldLabelModifier = camelTo2 '-' . drop 3 } ''Opt) + +instance FromJSON (Opt -> Opt) where + parseJSON (Object m) = + case fromJSON (Object m) of + Error err' -> fail err' + Success (m' :: M.Map Text Value) -> chain doOpt (M.toList m') + parseJSON _ = fail "Expected a mapping" data DefaultsState = DefaultsState { @@ -173,22 +171,21 @@ data DefaultsState = DefaultsState } deriving (Show) instance (PandocMonad m, MonadIO m) - => FromYAML (Opt -> StateT DefaultsState m Opt) where - parseYAML (Mapping _ _ m) = do - let opts = M.mapKeys toText m - dataDir <- case M.lookup "data-dir" opts of - Nothing -> return Nothing - Just v -> Just . unpack <$> parseYAML v - f <- parseOptions (M.toList m) - case M.lookup "defaults" opts of - Just v -> do - g <- parseDefaults v dataDir - return $ g >=> f >=> resolveVarsInOpt - Nothing -> return $ f >=> resolveVarsInOpt - where - toText (Scalar _ (SStr s)) = s - toText _ = "" - parseYAML n = failAtNode n "Expected a mapping" + => FromJSON (Opt -> StateT DefaultsState m Opt) where + parseJSON (Object o) = + case fromJSON (Object o) of + Error err' -> fail err' + Success (opts :: M.Map Text Value) -> do + dataDir <- case M.lookup "data-dir" opts of + Nothing -> return Nothing + Just v -> Just . unpack <$> parseJSON v + f <- parseOptions (M.toList opts) + case M.lookup "defaults" opts of + Just v -> do + g <- parseDefaults v dataDir + return $ g >=> f >=> resolveVarsInOpt + Nothing -> return $ f >=> resolveVarsInOpt + parseJSON _ = fail "Expected a mapping" resolveVarsInOpt :: forall m. (PandocMonad m, MonadIO m) => Opt -> StateT DefaultsState m Opt @@ -302,7 +299,7 @@ resolveVarsInOpt parseDefaults :: (PandocMonad m, MonadIO m) - => Node Pos + => Value -> Maybe FilePath -> Parser (Opt -> StateT DefaultsState m Opt) parseDefaults n dataDir = parseDefsNames n >>= \ds -> return $ \o -> do @@ -321,11 +318,11 @@ parseDefaults n dataDir = parseDefsNames n >>= \ds -> return $ \o -> do "Error: Circular defaults file reference in " ++ "'" ++ defsParent ++ "'" else foldM applyDefaults o defsChildren - where parseDefsNames x = (parseYAML x >>= \xs -> return $ map unpack xs) - <|> (parseYAML x >>= \x' -> return [unpack x']) + where parseDefsNames x = (parseJSON x >>= \xs -> return $ map unpack xs) + <|> (parseJSON x >>= \x' -> return [unpack x']) parseOptions :: Monad m - => [(Node Pos, Node Pos)] + => [(Text, Value)] -> Parser (Opt -> StateT DefaultsState m Opt) parseOptions ns = do f <- chain doOpt' ns @@ -335,267 +332,267 @@ chain :: Monad m => (a -> m (b -> b)) -> [a] -> m (b -> b) chain f = foldM g id where g o n = f n >>= \o' -> return $ o' . o -doOpt' :: (Node Pos, Node Pos) -> Parser (Opt -> Opt) -doOpt' (k',v) = do - k <- parseStringKey k' +doOpt' :: (Text, Value) -> Parser (Opt -> Opt) +doOpt' (k,v) = do case k of "defaults" -> return id - _ -> doOpt (k',v) + _ -> doOpt (k,v) -doOpt :: (Node Pos, Node Pos) -> Parser (Opt -> Opt) -doOpt (k',v) = do - k <- parseStringKey k' +doOpt :: (Text, Value) -> Parser (Opt -> Opt) +doOpt (k,v) = do case k of "tab-stop" -> - parseYAML v >>= \x -> return (\o -> o{ optTabStop = x }) + parseJSON v >>= \x -> return (\o -> o{ optTabStop = x }) "preserve-tabs" -> - parseYAML v >>= \x -> return (\o -> o{ optPreserveTabs = x }) + parseJSON v >>= \x -> return (\o -> o{ optPreserveTabs = x }) "standalone" -> - parseYAML v >>= \x -> return (\o -> o{ optStandalone = x }) + parseJSON v >>= \x -> return (\o -> o{ optStandalone = x }) "table-of-contents" -> - parseYAML v >>= \x -> return (\o -> o{ optTableOfContents = x }) + parseJSON v >>= \x -> return (\o -> o{ optTableOfContents = x }) "toc" -> - parseYAML v >>= \x -> return (\o -> o{ optTableOfContents = x }) + parseJSON v >>= \x -> return (\o -> o{ optTableOfContents = x }) "from" -> - parseYAML v >>= \x -> return (\o -> o{ optFrom = x }) + parseJSON v >>= \x -> return (\o -> o{ optFrom = x }) "reader" -> - parseYAML v >>= \x -> return (\o -> o{ optFrom = x }) + parseJSON v >>= \x -> return (\o -> o{ optFrom = x }) "to" -> - parseYAML v >>= \x -> return (\o -> o{ optTo = x }) + parseJSON v >>= \x -> return (\o -> o{ optTo = x }) "writer" -> - parseYAML v >>= \x -> return (\o -> o{ optTo = x }) + parseJSON v >>= \x -> return (\o -> o{ optTo = x }) "shift-heading-level-by" -> - parseYAML v >>= \x -> return (\o -> o{ optShiftHeadingLevelBy = x }) + parseJSON v >>= \x -> return (\o -> o{ optShiftHeadingLevelBy = x }) "template" -> - parseYAML v >>= \x -> return (\o -> o{ optTemplate = unpack <$> x }) + parseJSON v >>= \x -> return (\o -> o{ optTemplate = unpack <$> x }) "variables" -> - parseYAML v >>= \x -> return (\o -> o{ optVariables = + parseJSON v >>= \x -> return (\o -> o{ optVariables = x <> optVariables o }) -- Note: x comes first because <> for Context is left-biased union -- and we want to favor later default files. See #5988. "metadata" -> yamlToMeta v >>= \x -> return (\o -> o{ optMetadata = optMetadata o <> x }) "metadata-files" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optMetadataFiles = optMetadataFiles o <> map unpack x }) "metadata-file" -> -- allow either a list or a single value - (parseYAML v >>= \x -> return (\o -> o{ optMetadataFiles = + (parseJSON v >>= \x -> return (\o -> o{ optMetadataFiles = optMetadataFiles o <> map unpack x })) <|> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optMetadataFiles = optMetadataFiles o <>[unpack x] })) "output-file" -> - parseYAML v >>= \x -> return (\o -> o{ optOutputFile = unpack <$> x }) + parseJSON v >>= \x -> return (\o -> o{ optOutputFile = unpack <$> x }) "input-files" -> - parseYAML v >>= \x -> return (\o -> o{ optInputFiles = + parseJSON v >>= \x -> return (\o -> o{ optInputFiles = optInputFiles o <> (map unpack <$> x) }) "input-file" -> -- allow either a list or a single value - (parseYAML v >>= \x -> return (\o -> o{ optInputFiles = + (parseJSON v >>= \x -> return (\o -> o{ optInputFiles = optInputFiles o <> (map unpack <$> x) })) <|> - (parseYAML v >>= \x -> return (\o -> o{ optInputFiles = + (parseJSON v >>= \x -> return (\o -> o{ optInputFiles = optInputFiles o <> ((\z -> [unpack z]) <$> x) })) "number-sections" -> - parseYAML v >>= \x -> return (\o -> o{ optNumberSections = x }) + parseJSON v >>= \x -> return (\o -> o{ optNumberSections = x }) "number-offset" -> - parseYAML v >>= \x -> return (\o -> o{ optNumberOffset = x }) + parseJSON v >>= \x -> return (\o -> o{ optNumberOffset = x }) "section-divs" -> - parseYAML v >>= \x -> return (\o -> o{ optSectionDivs = x }) + parseJSON v >>= \x -> return (\o -> o{ optSectionDivs = x }) "incremental" -> - parseYAML v >>= \x -> return (\o -> o{ optIncremental = x }) + parseJSON v >>= \x -> return (\o -> o{ optIncremental = x }) "self-contained" -> - parseYAML v >>= \x -> return (\o -> o{ optSelfContained = x }) + parseJSON v >>= \x -> return (\o -> o{ optSelfContained = x }) "html-q-tags" -> - parseYAML v >>= \x -> return (\o -> o{ optHtmlQTags = x }) + parseJSON v >>= \x -> return (\o -> o{ optHtmlQTags = x }) "highlight-style" -> - parseYAML v >>= \x -> return (\o -> o{ optHighlightStyle = x }) + parseJSON v >>= \x -> return (\o -> o{ optHighlightStyle = x }) "syntax-definition" -> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optSyntaxDefinitions = optSyntaxDefinitions o <> map unpack x })) <|> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optSyntaxDefinitions = optSyntaxDefinitions o <> [unpack x] })) "syntax-definitions" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optSyntaxDefinitions = optSyntaxDefinitions o <> map unpack x }) "top-level-division" -> - parseYAML v >>= \x -> return (\o -> o{ optTopLevelDivision = x }) + parseJSON v >>= \x -> return (\o -> o{ optTopLevelDivision = x }) "html-math-method" -> - parseYAML v >>= \x -> return (\o -> o{ optHTMLMathMethod = x }) + parseJSON v >>= \x -> return (\o -> o{ optHTMLMathMethod = x }) "abbreviations" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optAbbreviations = unpack <$> x }) "reference-doc" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optReferenceDoc = unpack <$> x }) "epub-subdirectory" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optEpubSubdirectory = unpack x }) "epub-metadata" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optEpubMetadata = unpack <$> x }) "epub-fonts" -> - parseYAML v >>= \x -> return (\o -> o{ optEpubFonts = optEpubFonts o <> + parseJSON v >>= \x -> return (\o -> o{ optEpubFonts = optEpubFonts o <> map unpack x }) "epub-chapter-level" -> - parseYAML v >>= \x -> return (\o -> o{ optEpubChapterLevel = x }) + parseJSON v >>= \x -> return (\o -> o{ optEpubChapterLevel = x }) "epub-cover-image" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optEpubCoverImage = unpack <$> x }) "toc-depth" -> - parseYAML v >>= \x -> return (\o -> o{ optTOCDepth = x }) + parseJSON v >>= \x -> return (\o -> o{ optTOCDepth = x }) "dump-args" -> - parseYAML v >>= \x -> return (\o -> o{ optDumpArgs = x }) + parseJSON v >>= \x -> return (\o -> o{ optDumpArgs = x }) "ignore-args" -> - parseYAML v >>= \x -> return (\o -> o{ optIgnoreArgs = x }) + parseJSON v >>= \x -> return (\o -> o{ optIgnoreArgs = x }) "verbosity" -> - parseYAML v >>= \x -> return (\o -> o{ optVerbosity = x }) + parseJSON v >>= \x -> return (\o -> o{ optVerbosity = x }) "trace" -> - parseYAML v >>= \x -> return (\o -> o{ optTrace = x }) + parseJSON v >>= \x -> return (\o -> o{ optTrace = x }) "log-file" -> - parseYAML v >>= \x -> return (\o -> o{ optLogFile = unpack <$> x }) + parseJSON v >>= \x -> return (\o -> o{ optLogFile = unpack <$> x }) "fail-if-warnings" -> - parseYAML v >>= \x -> return (\o -> o{ optFailIfWarnings = x }) + parseJSON v >>= \x -> return (\o -> o{ optFailIfWarnings = x }) "reference-links" -> - parseYAML v >>= \x -> return (\o -> o{ optReferenceLinks = x }) + parseJSON v >>= \x -> return (\o -> o{ optReferenceLinks = x }) "reference-location" -> - parseYAML v >>= \x -> return (\o -> o{ optReferenceLocation = x }) + parseJSON v >>= \x -> return (\o -> o{ optReferenceLocation = x }) "dpi" -> - parseYAML v >>= \x -> return (\o -> o{ optDpi = x }) + parseJSON v >>= \x -> return (\o -> o{ optDpi = x }) "wrap" -> - parseYAML v >>= \x -> return (\o -> o{ optWrap = x }) + parseJSON v >>= \x -> return (\o -> o{ optWrap = x }) "columns" -> - parseYAML v >>= \x -> return (\o -> o{ optColumns = x }) + parseJSON v >>= \x -> return (\o -> o{ optColumns = x }) "filters" -> - parseYAML v >>= \x -> return (\o -> o{ optFilters = optFilters o <> x }) + parseJSON v >>= \x -> return (\o -> o{ optFilters = optFilters o <> x }) "citeproc" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> if x then return (\o -> o{ optFilters = CiteprocFilter : optFilters o }) else return id "email-obfuscation" -> - parseYAML v >>= \x -> return (\o -> o{ optEmailObfuscation = x }) + parseJSON v >>= \x -> return (\o -> o{ optEmailObfuscation = x }) "identifier-prefix" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optIdentifierPrefix = x }) "strip-empty-paragraphs" -> - parseYAML v >>= \x -> return (\o -> o{ optStripEmptyParagraphs = x }) + parseJSON v >>= \x -> return (\o -> o{ optStripEmptyParagraphs = x }) "indented-code-classes" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optIndentedCodeClasses = x }) "data-dir" -> - parseYAML v >>= \x -> return (\o -> o{ optDataDir = unpack <$> x }) + parseJSON v >>= \x -> return (\o -> o{ optDataDir = unpack <$> x }) "cite-method" -> - parseYAML v >>= \x -> return (\o -> o{ optCiteMethod = x }) + parseJSON v >>= \x -> return (\o -> o{ optCiteMethod = x }) "listings" -> - parseYAML v >>= \x -> return (\o -> o{ optListings = x }) + parseJSON v >>= \x -> return (\o -> o{ optListings = x }) "pdf-engine" -> - parseYAML v >>= \x -> return (\o -> o{ optPdfEngine = unpack <$> x }) + parseJSON v >>= \x -> return (\o -> o{ optPdfEngine = unpack <$> x }) "pdf-engine-opts" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optPdfEngineOpts = map unpack x }) "pdf-engine-opt" -> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optPdfEngineOpts = map unpack x })) <|> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optPdfEngineOpts = [unpack x] })) "slide-level" -> - parseYAML v >>= \x -> return (\o -> o{ optSlideLevel = x }) + parseJSON v >>= \x -> return (\o -> o{ optSlideLevel = x }) "atx-headers" -> - parseYAML v >>= \x -> return (\o -> o{ optSetextHeaders = not x }) + parseJSON v >>= \x -> return (\o -> o{ optSetextHeaders = not x }) "markdown-headings" -> - parseYAML v >>= \x -> return (\o -> + parseJSON v >>= \x -> return (\o -> case T.toLower x of "atx" -> o{ optSetextHeaders = False } "setext" -> o{ optSetextHeaders = True } _ -> o) "ascii" -> - parseYAML v >>= \x -> return (\o -> o{ optAscii = x }) + parseJSON v >>= \x -> return (\o -> o{ optAscii = x }) "default-image-extension" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optDefaultImageExtension = x }) "extract-media" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optExtractMedia = unpack <$> x }) "track-changes" -> - parseYAML v >>= \x -> return (\o -> o{ optTrackChanges = x }) + parseJSON v >>= \x -> return (\o -> o{ optTrackChanges = x }) "file-scope" -> - parseYAML v >>= \x -> return (\o -> o{ optFileScope = x }) + parseJSON v >>= \x -> return (\o -> o{ optFileScope = x }) "title-prefix" -> - parseYAML v >>= \x -> return (\o -> o{ optTitlePrefix = x, + parseJSON v >>= \x -> return (\o -> o{ optTitlePrefix = x, optStandalone = True }) "css" -> - (parseYAML v >>= \x -> return (\o -> o{ optCss = optCss o <> + (parseJSON v >>= \x -> return (\o -> o{ optCss = optCss o <> map unpack x })) <|> - (parseYAML v >>= \x -> return (\o -> o{ optCss = optCss o <> + (parseJSON v >>= \x -> return (\o -> o{ optCss = optCss o <> [unpack x] })) "bibliography" -> - (parseYAML v >>= \x -> return (\o -> + (parseJSON v >>= \x -> return (\o -> o{ optBibliography = optBibliography o <> map unpack x })) <|> - (parseYAML v >>= \x -> return (\o -> + (parseJSON v >>= \x -> return (\o -> o{ optBibliography = optBibliography o <> [unpack x] })) "csl" -> - parseYAML v >>= \x -> return (\o -> o{ optCSL = unpack <$> x }) + parseJSON v >>= \x -> return (\o -> o{ optCSL = unpack <$> x }) "citation-abbreviations" -> - parseYAML v >>= \x -> return (\o -> o{ optCitationAbbreviations = + parseJSON v >>= \x -> return (\o -> o{ optCitationAbbreviations = unpack <$> x }) "ipynb-output" -> - parseYAML v >>= \x -> return (\o -> o{ optIpynbOutput = x }) + parseJSON v >>= \x -> return (\o -> o{ optIpynbOutput = x }) "include-before-body" -> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optIncludeBeforeBody = optIncludeBeforeBody o <> map unpack x })) <|> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optIncludeBeforeBody = optIncludeBeforeBody o <> [unpack x] })) "include-after-body" -> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optIncludeAfterBody = optIncludeAfterBody o <> map unpack x })) <|> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optIncludeAfterBody = optIncludeAfterBody o <> [unpack x] })) "include-in-header" -> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optIncludeInHeader = optIncludeInHeader o <> map unpack x })) <|> - (parseYAML v >>= \x -> + (parseJSON v >>= \x -> return (\o -> o{ optIncludeInHeader = optIncludeInHeader o <> [unpack x] })) "resource-path" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optResourcePath = map unpack x <> optResourcePath o }) "request-headers" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optRequestHeaders = x }) "no-check-certificate" -> - parseYAML v >>= \x -> + parseJSON v >>= \x -> return (\o -> o{ optNoCheckCertificate = x }) "eol" -> - parseYAML v >>= \x -> return (\o -> o{ optEol = x }) + parseJSON v >>= \x -> return (\o -> o{ optEol = x }) "strip-comments" -> - parseYAML v >>= \x -> return (\o -> o { optStripComments = x }) - _ -> failAtNode k' $ "Unknown option " ++ show k + parseJSON v >>= \x -> return (\o -> o { optStripComments = x }) + "sandbox" -> + parseJSON v >>= \x -> return (\o -> o { optSandbox = x }) + _ -> fail $ "Unknown option " ++ show k -- | Defaults for command-line options. defaultOpts :: Opt @@ -673,20 +670,15 @@ defaultOpts = Opt , optCSL = Nothing , optBibliography = [] , optCitationAbbreviations = Nothing + , optSandbox = False } -parseStringKey :: Node Pos -> Parser Text -parseStringKey k = case k of - Scalar _ (SStr t) -> return t - Scalar _ _ -> failAtNode k "Non-string key" - _ -> failAtNode k "Non-scalar key" - -yamlToMeta :: Node Pos -> Parser Meta -yamlToMeta (Mapping _ _ m) = - either (fail . show) return $ runEverything (yamlMap pMetaString m) - where - pMetaString = pure . MetaString <$> P.manyChar P.anyChar - runEverything p = +yamlToMeta :: Value -> Parser Meta +yamlToMeta (Object o) = + either (fail . show) return $ runEverything (yamlMap pMetaString o) + where + pMetaString = pure . MetaString <$> P.manyChar P.anyChar + runEverything p = runPure (P.readWithM p (def :: P.ParserState) ("" :: Text)) >>= fmap (Meta . flip P.runF def) yamlToMeta _ = return mempty @@ -699,14 +691,12 @@ applyDefaults :: (PandocMonad m, MonadIO m) applyDefaults opt file = do setVerbosity $ optVerbosity opt modify $ \defsState -> defsState{ curDefaults = Just file } - inp <- readFileLazy file - case decode1 inp of + inp <- readFileStrict file + case decodeEither' inp of Right f -> f opt - Left (errpos, errmsg) -> throwError $ - PandocParseError $ T.pack $ - "Error parsing " ++ file ++ " line " ++ - show (posLine errpos) ++ " column " ++ - show (posColumn errpos) ++ ":\n" ++ errmsg + Left err' -> throwError $ + PandocParseError + $ T.pack $ Data.Yaml.prettyPrintParseException err' fullDefaultsPath :: (PandocMonad m, MonadIO m) => Maybe FilePath @@ -734,14 +724,3 @@ cyclic :: Ord a => [[a]] -> Bool cyclic = any hasDuplicate where hasDuplicate xs = length (ordNub xs) /= length xs - --- see https://github.com/jgm/pandoc/pull/4083 --- using generic deriving caused long compilation times -$(deriveJSON - defaultOptions{ fieldLabelModifier = drop 11 . map toLower } ''IpynbOutput) -$(deriveJSON - defaultOptions{ fieldLabelModifier = map toLower } ''LineEnding) -$(deriveJSON - defaultOptions{ fieldLabelModifier = - camelCaseStrToHyphenated . dropWhile isLower - } ''Opt) diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index 3864ab188..7b057713b 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -45,16 +45,16 @@ readUtf8File :: PandocMonad m => FilePath -> m T.Text readUtf8File = fmap UTF8.toText . readFileStrict -- | Settings specifying how document output should be produced. -data OutputSettings = OutputSettings +data OutputSettings m = OutputSettings { outputFormat :: T.Text - , outputWriter :: Writer PandocIO + , outputWriter :: Writer m , outputWriterName :: T.Text , outputWriterOptions :: WriterOptions , outputPdfProgram :: Maybe String } -- | Get output settings from command line options. -optToOutputSettings :: Opt -> PandocIO OutputSettings +optToOutputSettings :: (PandocMonad m, MonadIO m) => Opt -> m (OutputSettings m) optToOutputSettings opts = do let outputFile = fromMaybe "-" (optOutputFile opts) @@ -90,12 +90,31 @@ optToOutputSettings opts = do then writerName else T.toLower $ baseWriterName writerName - (writer :: Writer PandocIO, writerExts) <- + let makeSandboxed pureWriter = + let files = maybe id (:) (optReferenceDoc opts) . + maybe id (:) (optEpubMetadata opts) . + maybe id (:) (optEpubCoverImage opts) . + maybe id (:) (optCSL opts) . + maybe id (:) (optCitationAbbreviations opts) $ + optEpubFonts opts ++ + optBibliography opts + in case pureWriter of + TextWriter w -> TextWriter $ \o d -> sandbox files (w o d) + ByteStringWriter w + -> ByteStringWriter $ \o d -> sandbox files (w o d) + + + (writer, writerExts) <- if ".lua" `T.isSuffixOf` format then return (TextWriter - (\o d -> writeCustom (T.unpack writerName) o d) - :: Writer PandocIO, mempty) - else getWriter (T.toLower writerName) + (\o d -> writeCustom (T.unpack writerName) o d), mempty) + else if optSandbox opts + then + case runPure (getWriter writerName) of + Left e -> throwError e + Right (w, wexts) -> + return (makeSandboxed w, wexts) + else getWriter (T.toLower writerName) let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 246f54516..2530ef46f 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -7,13 +7,13 @@ module Text.Pandoc.Citeproc ( processCitations, getReferences, - getStyle ) where import Citeproc import Citeproc.Pandoc () -import Text.Pandoc.Citeproc.Locator (parseLocator) +import Text.Pandoc.Citeproc.Locator (parseLocator, toLocatorMap, + LocatorInfo(..)) import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..)) import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText) @@ -49,15 +49,16 @@ import qualified Data.Text as T import System.FilePath (takeExtension) import Safe (lastMay, initSafe) - processCitations :: PandocMonad m => Pandoc -> m Pandoc processCitations (Pandoc meta bs) = do style <- getStyle (Pandoc meta bs) - - mblang <- getLang meta + mblang <- getCiteprocLang meta let locale = Citeproc.mergeLocales mblang style - refs <- getReferences (Just locale) (Pandoc meta bs) + let addQuoteSpan (Quoted _ xs) = Span ("",["csl-quoted"],[]) xs + addQuoteSpan x = x + refs <- map (walk addQuoteSpan) <$> + getReferences (Just locale) (Pandoc meta bs) let otherIdsMap = foldr (\ref m -> case T.words . extractText <$> @@ -73,7 +74,9 @@ processCitations (Pandoc meta bs) = do let linkCites = maybe False truish $ lookupMeta "link-citations" meta - let opts = defaultCiteprocOptions{ linkCitations = linkCites } + let linkBib = maybe True truish $ lookupMeta "link-bibliography" meta + let opts = defaultCiteprocOptions{ linkCitations = linkCites + , linkBibliography = linkBib } let result = Citeproc.citeproc opts style mblang refs citations mapM_ (report . CiteprocWarning) (resultWarnings result) let sopts = styleOptions style @@ -88,13 +91,11 @@ processCitations (Pandoc meta bs) = do _ -> id) $ [] let bibs = mconcat $ map (\(ident, out) -> B.divWith ("ref-" <> ident,["csl-entry"],[]) . B.para . - walk (convertQuotes locale) . insertSpace $ out) (resultBibliography result) let moveNotes = styleIsNoteStyle sopts && maybe True truish (lookupMeta "notes-after-punctuation" meta) - let cits = map (walk (convertQuotes locale)) $ - resultCitations result + let cits = resultCitations result let metanocites = lookupMeta "nocite" meta let Pandoc meta'' bs' = @@ -105,9 +106,13 @@ processCitations (Pandoc meta bs) = do else id) . evalState (walkM insertResolvedCitations $ Pandoc meta' bs) $ cits - return $ Pandoc meta'' - $ insertRefs refkvs classes meta'' - (walk fixLinks $ B.toList bibs) bs' + return $ walk removeQuoteSpan + $ Pandoc meta'' + $ insertRefs refkvs classes meta'' (B.toList bibs) bs' + +removeQuoteSpan :: Inline -> Inline +removeQuoteSpan (Span ("",["csl-quoted"],[]) xs) = Span nullAttr xs +removeQuoteSpan x = x -- | Retrieve the CSL style specified by the csl or citation-style -- metadata field in a pandoc document, or the default CSL style @@ -162,10 +167,9 @@ getStyle (Pandoc meta _) = do -- Retrieve citeproc lang based on metadata. -getLang :: PandocMonad m => Meta -> m (Maybe Lang) -getLang meta = maybe (return Nothing) bcp47LangToIETF - ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= - metaValueToText) +getCiteprocLang :: PandocMonad m => Meta -> m (Maybe Lang) +getCiteprocLang meta = maybe (return Nothing) bcp47LangToIETF + ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= metaValueToText) -- | Get references defined inline in the metadata and via an external -- bibliography. Only references that are actually cited in the document @@ -177,7 +181,7 @@ getReferences mblocale (Pandoc meta bs) = do locale <- case mblocale of Just l -> return l Nothing -> do - mblang <- getLang meta + mblang <- getCiteprocLang meta case mblang of Just lang -> return $ either mempty id $ getLocale lang Nothing -> return mempty @@ -205,8 +209,7 @@ getReferences mblocale (Pandoc meta bs) = do Just fp -> getRefsFromBib locale idpred fp Nothing -> return [] Nothing -> return [] - return $ map (linkifyVariables . legacyDateRanges) - (externalRefs ++ inlineRefs) + return $ map legacyDateRanges (externalRefs ++ inlineRefs) -- note that inlineRefs can override externalRefs @@ -262,26 +265,9 @@ getRefs locale format idpred mbfp raw = do rs <- yamlToRefs idpred def{ readerExtensions = pandocExtensions } (T.unpack <$> mbfp) - (L.fromStrict raw) + raw return $ mapMaybe metaValueToReference rs --- localized quotes -convertQuotes :: Locale -> Inline -> Inline -convertQuotes locale (Quoted qt ils) = - case (M.lookup openterm terms, M.lookup closeterm terms) of - (Just ((_,oq):_), Just ((_,cq):_)) -> - Span ("",[],[]) (Str oq : ils ++ [Str cq]) - _ -> Quoted qt ils - where - terms = localeTerms locale - openterm = case qt of - DoubleQuote -> "open-quote" - SingleQuote -> "open-inner-quote" - closeterm = case qt of - DoubleQuote -> "close-quote" - SingleQuote -> "close-inner-quote" -convertQuotes _ x = x - -- assumes we walk in same order as query insertResolvedCitations :: Inline -> State [Inlines] Inline insertResolvedCitations (Cite cs ils) = do @@ -290,7 +276,7 @@ insertResolvedCitations (Cite cs ils) = do [] -> return (Cite cs ils) (x:xs) -> do put xs - return $ Cite cs (walk fixLinks $ B.toList x) + return $ Cite cs (B.toList x) insertResolvedCitations x = return x getCitations :: Locale @@ -318,17 +304,15 @@ fromPandocCitations :: Locale -> [CitationItem Inlines] fromPandocCitations locale otherIdsMap = concatMap go where + locmap = toLocatorMap locale go c = - let (loclab, suffix) = parseLocator locale (citationSuffix c) - (mblab, mbloc) = case loclab of - Just (loc, lab) -> (Just loc, Just lab) - Nothing -> (Nothing, Nothing) + let (mblocinfo, suffix) = parseLocator locmap (citationSuffix c) cit = CitationItem { citationItemId = fromMaybe (ItemId $ Pandoc.citationId c) (M.lookup (Pandoc.citationId c) otherIdsMap) - , citationItemLabel = mblab - , citationItemLocator = mbloc + , citationItemLabel = locatorLabel <$> mblocinfo + , citationItemLocator = locatorLoc <$> mblocinfo , citationItemType = NormalCite , citationItemPrefix = case citationPrefix c of [] -> Nothing @@ -368,6 +352,7 @@ formatFromExtension fp = case dropWhile (== '.') $ takeExtension fp of "bib" -> Just Format_biblatex "json" -> Just Format_json "yaml" -> Just Format_yaml + "yml" -> Just Format_yaml _ -> Nothing @@ -431,15 +416,6 @@ mvPunct moveNotes locale (Cite cs ils : Str "." : ys) mvPunct moveNotes locale (x:xs) = x : mvPunct moveNotes locale xs mvPunct _ _ [] = [] --- move https://doi.org etc. prefix inside link text (#6723): -fixLinks :: [Inline] -> [Inline] -fixLinks (Str t : Link attr [Str u1] (u2,tit) : xs) - | u2 == t <> u1 - = Link attr [Str (t <> u1)] (u2,tit) : fixLinks xs -fixLinks (x:xs) = x : fixLinks xs -fixLinks [] = [] - - endWithPunct :: Bool -> [Inline] -> Bool endWithPunct _ [] = False endWithPunct onlyFinal xs@(_:_) = @@ -535,29 +511,6 @@ legacyDateRanges ref = _ -> DateVal d go x = x -linkifyVariables :: Reference Inlines -> Reference Inlines -linkifyVariables ref = - ref{ referenceVariables = M.mapWithKey go $ referenceVariables ref } - where - go "URL" x = tolink "https://" x - go "DOI" x = tolink "https://doi.org/" (fixShortDOI x) - go "ISBN" x = tolink "https://worldcat.org/isbn/" x - go "PMID" x = tolink "https://www.ncbi.nlm.nih.gov/pubmed/" x - go "PMCID" x = tolink "https://www.ncbi.nlm.nih.gov/pmc/articles/" x - go _ x = x - fixShortDOI x = let x' = extractText x - in if "10/" `T.isPrefixOf` x' - then TextVal $ T.drop 3 x' - -- see https://shortdoi.org - else TextVal x' - tolink pref x = let x' = extractText x - x'' = if "://" `T.isInfixOf` x' - then x' - else pref <> x' - in if T.null x' - then x - else FancyVal (B.link x'' "" (B.str x')) - extractText :: Val Inlines -> Text extractText (TextVal x) = x extractText (FancyVal x) = toText x @@ -590,7 +543,7 @@ deNote (Note bs) = addParens [] = [] addParens (Cite (c:cs) ils : zs) | citationMode c == AuthorInText - = Cite (c:cs) (concatMap (noteAfterComma (needsPeriod zs)) ils) : + = Cite (c:cs) (addCommas (needsPeriod zs) ils) : addParens zs | otherwise = Cite (c:cs) (concatMap noteInParens ils) : addParens zs @@ -611,13 +564,19 @@ deNote (Note bs) = removeFinalPeriod ils ++ [Str ")"] noteInParens x = [x] - noteAfterComma needsPer (Span ("",["csl-note"],[]) ils) - | not (null ils) - = Str "," : Space : - if needsPer - then ils - else removeFinalPeriod ils - noteAfterComma _ x = [x] + -- We want to add a comma before a CSL note citation, but not + -- before the author name, and not before the first citation + -- if it doesn't begin with an author name. + addCommas = addCommas' True -- boolean == "at beginning" + + addCommas' _ _ [] = [] + addCommas' atBeginning needsPer + (Span ("",["csl-note"],[]) ils : rest) + | not (null ils) + = (if atBeginning then id else ([Str "," , Space] ++)) $ + (if needsPer then ils else removeFinalPeriod ils) ++ + addCommas' False needsPer rest + addCommas' _ needsPer (il : rest) = il : addCommas' False needsPer rest deNote x = x diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index c178de6e9..a8e5622ed 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Class (runPure) import qualified Text.Pandoc.Walk as Walk import Citeproc.Types import Citeproc.Pandoc () -import Text.Pandoc.Citeproc.Util (toIETF) +import Text.Pandoc.Citeproc.Util (toIETF, splitStrWhen) import Text.Pandoc.Citeproc.Data (biblatexStringMap) import Data.Default import Data.Text (Text) @@ -48,13 +48,12 @@ import Control.Monad.RWS hiding ((<>)) import qualified Data.Sequence as Seq import Data.Char (isAlphaNum, isDigit, isLetter, isUpper, toLower, toUpper, - isLower, isPunctuation) + isLower, isPunctuation, isSpace) import Data.List (foldl', intercalate, intersperse) import Safe (readMay) import Text.Printf (printf) import Text.DocLayout (literal, hsep, nest, hang, Doc(..), braces, ($$), cr) - data Variant = Bibtex | Biblatex deriving (Show, Eq, Ord) @@ -527,9 +526,9 @@ itemToReference locale variant item = do let fixSeriesTitle [Str xs] | isNumber xs = [Str (ordinalize locale xs), Space, Str (resolveKey' lang "jourser")] fixSeriesTitle xs = xs - seriesTitle' <- (Just . B.fromList . fixSeriesTitle . - B.toList . resolveKey lang <$> - getTitle "series") <|> + + seriesTitle' <- (Just . B.fromList . fixSeriesTitle . B.toList + <$> getTitle "series") <|> return Nothing shortTitle' <- (Just <$> (guard (not hasMaintitle || isChapterlike) >> getTitle "shorttitle")) @@ -805,30 +804,34 @@ bibEntries = do skipMany nonEntry many (bibItem <* skipMany nonEntry) where nonEntry = bibSkip <|> + comment <|> try (char '@' >> (bibComment <|> bibPreamble <|> bibString)) bibSkip :: BibParser () -bibSkip = skipMany1 (satisfy (/='@')) +bibSkip = skipMany1 (satisfy (\c -> c /='@' && c /='%')) + +comment :: BibParser () +comment = char '%' *> void anyLine bibComment :: BibParser () bibComment = do cistring "comment" - spaces + spaces' void inBraces <|> bibSkip <|> return () bibPreamble :: BibParser () bibPreamble = do cistring "preamble" - spaces + spaces' void inBraces bibString :: BibParser () bibString = do cistring "string" - spaces + spaces' char '{' - spaces + spaces' (k,v) <- entField char '}' updateState (\(l,m) -> (l, Map.insert k v m)) @@ -842,9 +845,9 @@ inBraces = do char '{' res <- manyTill ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\') - <|> (char '\\' >> ( (char '{' >> return "\\{") - <|> (char '}' >> return "\\}") - <|> return "\\")) + <|> (char '\\' >> (do c <- oneOf "{}" + return $ T.pack ['\\',c]) + <|> return "\\") <|> (braced <$> inBraces) ) (char '}') return $ T.concat res @@ -856,8 +859,9 @@ inQuotes :: BibParser Text inQuotes = do char '"' T.concat <$> manyTill - ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\') + ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\' && c /= '%') <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) + <|> ("" <$ (char '%' >> anyLine)) <|> braced <$> inBraces ) (char '"') @@ -870,32 +874,35 @@ isBibtexKeyChar :: Char -> Bool isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()$/*@_+=-[]*&" :: [Char]) +spaces' :: BibParser () +spaces' = skipMany (void (satisfy isSpace) <|> comment) + bibItem :: BibParser Item bibItem = do char '@' pos <- getPosition enttype <- T.toLower <$> take1WhileP isLetter - spaces + spaces' char '{' - spaces + spaces' entid <- take1WhileP isBibtexKeyChar - spaces + spaces' char ',' - spaces - entfields <- entField `sepEndBy` (char ',' >> spaces) - spaces + spaces' + entfields <- entField `sepEndBy` (char ',' >> spaces') + spaces' char '}' return $ Item entid pos enttype (Map.fromList entfields) entField :: BibParser (Text, Text) entField = do k <- fieldName - spaces + spaces' char '=' - spaces + spaces' vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy` - try (spaces >> char '#' >> spaces) - spaces + try (spaces' >> char '#' >> spaces') + spaces' return (k, T.concat vs) resolveAlias :: Text -> Text @@ -984,8 +991,12 @@ getTitle f = do ils <- getField f utc <- gets untitlecase lang <- gets localeLang + let ils' = + if f == "series" + then resolveKey lang ils + else ils let processTitle = if utc then unTitlecase (Just lang) else id - return $ processTitle ils + return $ processTitle ils' getShortTitle :: Bool -> Text -> Bib (Maybe Inlines) getShortTitle requireColon f = do @@ -1253,20 +1264,6 @@ toName opts ils = do , nameStaticOrdering = False } -splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline] -splitStrWhen _ [] = [] -splitStrWhen p (Str xs : ys) = map Str (go xs) ++ splitStrWhen p ys - where go s = - let (w,z) = T.break p s - in if T.null z - then if T.null w - then [] - else [w] - else if T.null w - then (T.take 1 z : go (T.drop 1 z)) - else (w : T.take 1 z : go (T.drop 1 z)) -splitStrWhen p (x : ys) = x : splitStrWhen p ys - ordinalize :: Locale -> Text -> Text ordinalize locale n = let terms = localeTerms locale @@ -1460,14 +1457,14 @@ bookTrans z = _ -> [z] resolveKey :: Lang -> Inlines -> Inlines -resolveKey lang ils = Walk.walk go ils +resolveKey lang (Many ils) = Many $ fmap go ils where go (Str s) = Str $ resolveKey' lang s go x = x resolveKey' :: Lang -> Text -> Text resolveKey' lang k = case Map.lookup (langLanguage lang) biblatexStringMap >>= - Map.lookup (T.toLower k) of + Map.lookup k of Nothing -> k Just (x, _) -> either (const k) stringify $ parseLaTeX lang x diff --git a/src/Text/Pandoc/Citeproc/CslJson.hs b/src/Text/Pandoc/Citeproc/CslJson.hs index 862af5188..43c1a87ec 100644 --- a/src/Text/Pandoc/Citeproc/CslJson.hs +++ b/src/Text/Pandoc/Citeproc/CslJson.hs @@ -28,6 +28,7 @@ fromCslJson (CslSub x) = B.subscript (fromCslJson x) fromCslJson (CslSup x) = B.superscript (fromCslJson x) fromCslJson (CslNoCase x) = B.spanWith ("",["nocase"],[]) (fromCslJson x) fromCslJson (CslDiv t x) = B.spanWith ("",["csl-" <> t],[]) (fromCslJson x) +fromCslJson (CslLink u x) = B.link u "" (fromCslJson x) cslJsonToReferences :: ByteString -> Either String [Reference Inlines] cslJsonToReferences raw = diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index f8931d7b5..0b8f79922 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -2,9 +2,13 @@ {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Citeproc.Locator - ( parseLocator ) + ( parseLocator + , toLocatorMap + , LocatorInfo(..) + , LocatorMap(..) ) where import Citeproc.Types +import Text.Pandoc.Citeproc.Util (splitStrWhen) import Data.Text (Text) import qualified Data.Text as T import Data.List (foldl') @@ -16,9 +20,17 @@ import Control.Monad (mzero) import qualified Data.Map as M import Data.Char (isSpace, isPunctuation, isDigit) -parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline]) -parseLocator locale inp = - case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of + +data LocatorInfo = + LocatorInfo{ locatorRaw :: Text + , locatorLabel :: Text + , locatorLoc :: Text + } + deriving (Show) + +parseLocator :: LocatorMap -> [Inline] -> (Maybe LocatorInfo, [Inline]) +parseLocator locmap inp = + case parse (pLocatorWords locmap) "suffix" $ splitInp inp of Right r -> r Left _ -> (Nothing, maybeAddComma inp) @@ -32,18 +44,16 @@ splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':')) type LocatorParser = Parsec [Inline] () pLocatorWords :: LocatorMap - -> LocatorParser (Maybe (Text, Text), [Inline]) + -> LocatorParser (Maybe LocatorInfo, [Inline]) pLocatorWords locMap = do optional $ pMatchChar "," (== ',') optional pSpace - (la, lo) <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap + info <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap s <- getInput -- rest is suffix - -- need to trim, otherwise "p. 9" and "9" will have 'different' locators later on - -- i.e. the first one will be " 9" return $ - if T.null la && T.null lo + if T.null (locatorLabel info) && T.null (locatorLoc info) then (Nothing, maybeAddComma s) - else (Just (la, T.strip lo), s) + else (Just info, s) maybeAddComma :: [Inline] -> [Inline] maybeAddComma [] = [] @@ -53,28 +63,30 @@ maybeAddComma ils@(Str t : _) , isPunctuation c = ils maybeAddComma ils = Str "," : Space : ils -pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text) +pLocatorDelimited :: LocatorMap -> LocatorParser LocatorInfo pLocatorDelimited locMap = try $ do _ <- pMatchChar "{" (== '{') skipMany pSpace -- gobble pre-spaces so label doesn't try to include them - (la, _) <- pLocatorLabelDelimited locMap + (rawlab, la, _) <- pLocatorLabelDelimited locMap -- we only care about balancing {} and [] (because of the outer [] scope); -- the rest can be anything let inner = do { t <- anyToken; return (True, stringify t) } gs <- many (pBalancedBraces [('{','}'), ('[',']')] inner) _ <- pMatchChar "}" (== '}') let lo = T.concat $ map snd gs - return (la, lo) + return $ LocatorInfo{ locatorLoc = lo, + locatorLabel = la, + locatorRaw = rawlab <> "{" <> lo <> "}" } -pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Bool) +pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Text, Bool) pLocatorLabelDelimited locMap - = pLocatorLabel' locMap lim <|> return ("page", True) + = pLocatorLabel' locMap lim <|> return ("", "page", True) where lim = stringify <$> anyToken -pLocatorIntegrated :: LocatorMap -> LocatorParser (Text, Text) +pLocatorIntegrated :: LocatorMap -> LocatorParser LocatorInfo pLocatorIntegrated locMap = try $ do - (la, wasImplicit) <- pLocatorLabelIntegrated locMap + (rawlab, la, wasImplicit) <- pLocatorLabelIntegrated locMap -- if we got the label implicitly, we have presupposed the first one is -- going to have a digit, so guarantee that. You _can_ have p. (a) -- because you specified it. @@ -84,17 +96,20 @@ pLocatorIntegrated locMap = try $ do g <- try $ pLocatorWordIntegrated (not wasImplicit) >>= modifier gs <- many (try $ pLocatorWordIntegrated False >>= modifier) let lo = T.concat (g:gs) - return (la, lo) + return $ LocatorInfo{ locatorLabel = la, + locatorLoc = lo, + locatorRaw = rawlab <> lo } -pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Bool) +pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Text, Bool) pLocatorLabelIntegrated locMap - = pLocatorLabel' locMap lim <|> (lookAhead digital >> return ("page", True)) + = pLocatorLabel' locMap lim <|> + (lookAhead digital >> return ("", "page", True)) where lim = try $ pLocatorWordIntegrated True >>= requireRomansOrDigits digital = try $ pLocatorWordIntegrated True >>= requireDigits pLocatorLabel' :: LocatorMap -> LocatorParser Text - -> LocatorParser (Text, Bool) + -> LocatorParser (Text, Text, Bool) pLocatorLabel' locMap lim = go "" where -- grow the match string until we hit the end @@ -105,9 +120,9 @@ pLocatorLabel' locMap lim = go "" t <- anyToken ts <- manyTill anyToken (try $ lookAhead lim) let s = acc <> stringify (t:ts) - case M.lookup (T.toCaseFold $ T.strip s) locMap of + case M.lookup (T.toCaseFold $ T.strip s) (unLocatorMap locMap) of -- try to find a longer one, or return this one - Just l -> go s <|> return (l, False) + Just l -> go s <|> return (s, l, False) Nothing -> go s -- hard requirement for a locator to have some real digits in it @@ -247,27 +262,16 @@ isLocatorSep ',' = True isLocatorSep ';' = True isLocatorSep _ = False -splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline] -splitStrWhen _ [] = [] -splitStrWhen p (Str xs : ys) = go (T.unpack xs) ++ splitStrWhen p ys - where - go [] = [] - go s = case break p s of - ([],[]) -> [] - (zs,[]) -> [Str $ T.pack zs] - ([],w:ws) -> Str (T.singleton w) : go ws - (zs,w:ws) -> Str (T.pack zs) : Str (T.singleton w) : go ws -splitStrWhen p (x : ys) = x : splitStrWhen p ys - -- -- Locator Map -- -type LocatorMap = M.Map Text Text +newtype LocatorMap = LocatorMap { unLocatorMap :: M.Map Text Text } + deriving (Show) toLocatorMap :: Locale -> LocatorMap toLocatorMap locale = - foldr go mempty locatorTerms + LocatorMap $ foldr go mempty locatorTerms where go tname locmap = case M.lookup tname (localeTerms locale) of diff --git a/src/Text/Pandoc/Citeproc/Util.hs b/src/Text/Pandoc/Citeproc/Util.hs index 6d8e01bc9..8bffc0f32 100644 --- a/src/Text/Pandoc/Citeproc/Util.hs +++ b/src/Text/Pandoc/Citeproc/Util.hs @@ -1,9 +1,21 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Citeproc.Util - ( toIETF ) + ( splitStrWhen + , toIETF ) where +import qualified Data.Text as T import Data.Text (Text) +import Text.Pandoc.Definition + +-- Split Str elements so that characters satisfying the +-- predicate each have their own Str. +splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline] +splitStrWhen p = foldr go [] + where + go (Str t) = (map Str (T.groupBy goesTogether t) ++) + go x = (x :) + goesTogether c d = not (p c || p d) toIETF :: Text -> Text toIETF "english" = "en-US" -- "en-EN" unavailable in CSL diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 2f28ac4dd..6394df251 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -19,6 +19,7 @@ module Text.Pandoc.Class , module Text.Pandoc.Class.PandocIO , module Text.Pandoc.Class.PandocMonad , module Text.Pandoc.Class.PandocPure + , module Text.Pandoc.Class.Sandbox , Translations ) where @@ -27,3 +28,4 @@ import Text.Pandoc.Class.PandocMonad import Text.Pandoc.Class.PandocIO import Text.Pandoc.Class.PandocPure import Text.Pandoc.Translations (Translations) +import Text.Pandoc.Class.Sandbox diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index f4cfc8682..305f07a01 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -30,6 +30,7 @@ module Text.Pandoc.Class.IO , openURL , readFileLazy , readFileStrict + , readStdinStrict , extractMedia ) where @@ -158,6 +159,11 @@ readFileLazy s = liftIOError BL.readFile s readFileStrict :: (PandocMonad m, MonadIO m) => FilePath -> m B.ByteString readFileStrict s = liftIOError B.readFile s +-- | Read the strict ByteString contents from stdin, raising +-- an error on failure. +readStdinStrict :: (PandocMonad m, MonadIO m) => m B.ByteString +readStdinStrict = liftIOError (const B.getContents) "stdin" + -- | Return a list of paths that match a glob, relative to the working -- directory. See 'System.FilePath.Glob' for the glob syntax. glob :: (PandocMonad m, MonadIO m) => String -> m [FilePath] diff --git a/src/Text/Pandoc/Class/PandocIO.hs b/src/Text/Pandoc/Class/PandocIO.hs index 63cb94155..61ee1f1c6 100644 --- a/src/Text/Pandoc/Class/PandocIO.hs +++ b/src/Text/Pandoc/Class/PandocIO.hs @@ -29,6 +29,7 @@ import Text.Pandoc.Class.PandocMonad import Text.Pandoc.Definition import Text.Pandoc.Error import qualified Text.Pandoc.Class.IO as IO +import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow) -- | Evaluate a 'PandocIO' operation. runIO :: PandocIO a -> IO (Either PandocError a) @@ -45,6 +46,9 @@ newtype PandocIO a = PandocIO { , Functor , Applicative , Monad + , MonadCatch + , MonadMask + , MonadThrow , MonadError PandocError ) @@ -58,6 +62,7 @@ instance PandocMonad PandocIO where openURL = IO.openURL readFileLazy = IO.readFileLazy readFileStrict = IO.readFileStrict + readStdinStrict = IO.readStdinStrict glob = IO.glob fileExists = IO.fileExists @@ -70,5 +75,5 @@ instance PandocMonad PandocIO where logOutput = IO.logOutput -- | Extract media from the mediabag into a directory. -extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc +extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc extractMedia = IO.extractMedia diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 439aec071..c15ce6444 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -117,6 +117,9 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m) -- | Read the strict ByteString contents from a file path, -- raising an error on failure. readFileStrict :: FilePath -> m B.ByteString + -- | Read the contents of stdin as a strict ByteString, raising + -- an error on failure. + readStdinStrict :: m B.ByteString -- | Return a list of paths that match a glob, relative to -- the working directory. See 'System.FilePath.Glob' for -- the glob syntax. @@ -451,7 +454,7 @@ getDefaultReferenceDocx = do "word/theme/theme1.xml"] let toLazy = BL.fromChunks . (:[]) let pathToEntry path = do - epochtime <- floor . utcTimeToPOSIXSeconds <$> getCurrentTime + epochtime <- floor . utcTimeToPOSIXSeconds <$> getTimestamp contents <- toLazy <$> readDataFile ("docx/" ++ path) return $ toEntry path epochtime contents datadir <- getUserDataDir @@ -674,6 +677,7 @@ instance (MonadTrans t, PandocMonad m, Functor (t m), openURL = lift . openURL readFileLazy = lift . readFileLazy readFileStrict = lift . readFileStrict + readStdinStrict = lift readStdinStrict glob = lift . glob fileExists = lift . fileExists getDataFileName = lift . getDataFileName @@ -691,6 +695,7 @@ instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where openURL = lift . openURL readFileLazy = lift . readFileLazy readFileStrict = lift . readFileStrict + readStdinStrict = lift readStdinStrict glob = lift . glob fileExists = lift . fileExists getDataFileName = lift . getDataFileName diff --git a/src/Text/Pandoc/Class/PandocPure.hs b/src/Text/Pandoc/Class/PandocPure.hs index 23c941839..290a6d97c 100644 --- a/src/Text/Pandoc/Class/PandocPure.hs +++ b/src/Text/Pandoc/Class/PandocPure.hs @@ -64,6 +64,7 @@ data PureState = PureState , stReferencePptx :: Archive , stReferenceODT :: Archive , stFiles :: FileTree + , stStdin :: B.ByteString , stUserDataFiles :: FileTree , stCabalDataFiles :: FileTree } @@ -80,6 +81,7 @@ instance Default PureState where , stReferencePptx = emptyArchive , stReferenceODT = emptyArchive , stFiles = mempty + , stStdin = mempty , stUserDataFiles = mempty , stCabalDataFiles = mempty } @@ -193,6 +195,8 @@ instance PandocMonad PandocPure where Just bs -> return bs Nothing -> throwError $ PandocResourceNotFound $ T.pack fp + readStdinStrict = getsPureState stStdin + glob s = do FileTree ftmap <- getsPureState stFiles return $ filter (match (compile s)) $ M.keys ftmap diff --git a/src/Text/Pandoc/Class/Sandbox.hs b/src/Text/Pandoc/Class/Sandbox.hs new file mode 100644 index 000000000..8bc0f1e77 --- /dev/null +++ b/src/Text/Pandoc/Class/Sandbox.hs @@ -0,0 +1,50 @@ +{- | +Module : Text.Pandoc.Class.Sandbox +Copyright : Copyright (C) 2021 John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : John MacFarlane (<jgm@berkeley.edu>) +Stability : alpha +Portability : portable + +This module provides a way to run PandocMonad actions in a sandbox +(pure context, with no IO allowed and access only to designated files). +-} + +module Text.Pandoc.Class.Sandbox + ( sandbox ) +where + +import Control.Monad (foldM) +import Control.Monad.Except (throwError) +import Control.Monad.IO.Class (MonadIO (liftIO)) +import Text.Pandoc.Class.PandocMonad +import Text.Pandoc.Class.PandocPure +import Text.Pandoc.Class.CommonState (CommonState(..)) +import Text.Pandoc.Logging (messageVerbosity) + +-- | Lift a PandocPure action into any instance of PandocMonad. +-- The main computation is done purely, but CommonState is preserved +-- continuously, and warnings are emitted after the action completes. +-- The parameter is a list of FilePaths which will be added to the +-- ersatz file system and be available for reading. +sandbox :: (PandocMonad m, MonadIO m) => [FilePath] -> PandocPure a -> m a +sandbox files action = do + oldState <- getCommonState + tree <- liftIO $ foldM addToFileTree mempty files + case runPure (do putCommonState oldState + modifyPureState $ \ps -> ps{ stFiles = tree } + result <- action + st <- getCommonState + return (st, result)) of + Left e -> throwError e + Right (st, result) -> do + putCommonState st + let verbosity = stVerbosity st + -- emit warnings, since these are not printed in runPure + let newMessages = reverse $ take + (length (stLog st) - length (stLog oldState)) (stLog st) + mapM_ logOutput + (filter ((<= verbosity) . messageVerbosity) newMessages) + return result + diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 9dee8356b..f16ad2997 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -171,34 +171,34 @@ handleError (Left e) = exitCode = case e of PandocIOError{} -> 1 + PandocFailOnWarningError{} -> 3 + PandocAppError{} -> 4 + PandocTemplateError{} -> 5 + PandocOptionError{} -> 6 + PandocUnknownReaderError{} -> 21 + PandocUnknownWriterError{} -> 22 + PandocUnsupportedExtensionError{} -> 23 + PandocCiteprocError{} -> 24 + PandocBibliographyError{} -> 25 + PandocEpubSubdirectoryError{} -> 31 + PandocPDFError{} -> 43 + PandocXMLError{} -> 44 + PandocPDFProgramNotFoundError{} -> 47 PandocHttpError{} -> 61 PandocShouldNeverHappenError{} -> 62 PandocSomeError{} -> 63 PandocParseError{} -> 64 PandocParsecError{} -> 65 PandocMakePDFError{} -> 66 - PandocOptionError{} -> 6 PandocSyntaxMapError{} -> 67 - PandocFailOnWarningError{} -> 3 - PandocPDFProgramNotFoundError{} -> 47 - PandocPDFError{} -> 43 - PandocXMLError{} -> 44 PandocFilterError{} -> 83 PandocLuaError{} -> 84 - PandocCouldNotFindDataFileError{} -> 97 - PandocResourceNotFound{} -> 99 - PandocTemplateError{} -> 5 - PandocAppError{} -> 4 - PandocEpubSubdirectoryError{} -> 31 PandocMacroLoop{} -> 91 PandocUTF8DecodingError{} -> 92 PandocIpynbDecodingError{} -> 93 PandocUnsupportedCharsetError{} -> 94 - PandocUnknownReaderError{} -> 21 - PandocUnknownWriterError{} -> 22 - PandocUnsupportedExtensionError{} -> 23 - PandocCiteprocError{} -> 24 - PandocBibliographyError{} -> 25 + PandocCouldNotFindDataFileError{} -> 97 + PandocResourceNotFound{} -> 99 err :: Int -> Text -> IO a err exitCode msg = do diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 9c55d0a7a..33f615740 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -40,31 +40,8 @@ import Data.Typeable (Typeable) import GHC.Generics (Generic) import Safe (readMay) import Text.Parsec -import Data.Aeson.TH (deriveJSON, defaultOptions) - -newtype Extensions = Extensions Integer - deriving (Show, Read, Eq, Ord, Data, Typeable, Generic) - -instance Semigroup Extensions where - (Extensions a) <> (Extensions b) = Extensions (a .|. b) -instance Monoid Extensions where - mempty = Extensions 0 - mappend = (<>) - -extensionsFromList :: [Extension] -> Extensions -extensionsFromList = foldr enableExtension emptyExtensions - -emptyExtensions :: Extensions -emptyExtensions = Extensions 0 - -extensionEnabled :: Extension -> Extensions -> Bool -extensionEnabled x (Extensions exts) = testBit exts (fromEnum x) - -enableExtension :: Extension -> Extensions -> Extensions -enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x)) - -disableExtension :: Extension -> Extensions -> Extensions -disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x)) +import Data.Aeson.TH (deriveJSON) +import Data.Aeson -- | Individually selectable syntax extensions. data Extension = @@ -74,6 +51,7 @@ data Extension = | Ext_angle_brackets_escapable -- ^ Make < and > escapable | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers; -- presupposes Ext_auto_identifiers + | Ext_attributes -- ^ Generic attribute syntax | Ext_auto_identifiers -- ^ Automatic identifiers for headers | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks @@ -105,6 +83,7 @@ data Extension = -- header identifiers; presupposes -- Ext_auto_identifiers | Ext_grid_tables -- ^ Grid tables (pandoc, reST) + | Ext_gutenberg -- ^ Use Project Gutenberg conventions for plain | Ext_hard_line_breaks -- ^ All newlines become hard line breaks | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored @@ -138,9 +117,11 @@ data Extension = | Ext_raw_markdown -- ^ Parse markdown in ipynb as raw markdown | Ext_rebase_relative_paths -- ^ Rebase relative image and link paths, -- relative to directory of containing file + | Ext_short_subsuperscripts -- ^ sub-&superscripts w/o closing char (v~i) | Ext_shortcut_reference_links -- ^ Shortcut reference links | Ext_simple_tables -- ^ Pandoc-style simple tables | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes + | Ext_sourcepos -- ^ Include source position attributes | Ext_space_in_atx_header -- ^ Require space between # and header text | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link | Ext_startnum -- ^ Make start number of ordered list significant @@ -156,11 +137,42 @@ data Extension = | Ext_xrefs_name -- ^ Use xrefs with names | Ext_xrefs_number -- ^ Use xrefs with numbers | Ext_yaml_metadata_block -- ^ YAML metadata block - | Ext_gutenberg -- ^ Use Project Gutenberg conventions for plain - | Ext_attributes -- ^ Generic attribute syntax - | Ext_sourcepos -- ^ Include source position attributes deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) +$(deriveJSON defaultOptions{ constructorTagModifier = drop 4 } ''Extension) + +newtype Extensions = Extensions Integer + deriving (Show, Read, Eq, Ord, Data, Typeable, Generic) + +instance Semigroup Extensions where + (Extensions a) <> (Extensions b) = Extensions (a .|. b) +instance Monoid Extensions where + mempty = Extensions 0 + mappend = (<>) + +instance FromJSON Extensions where + parseJSON = + return . foldr enableExtension emptyExtensions . fromJSON + +instance ToJSON Extensions where + toJSON exts = toJSON $ + [ext | ext <- [minBound..maxBound], extensionEnabled ext exts] + +extensionsFromList :: [Extension] -> Extensions +extensionsFromList = foldr enableExtension emptyExtensions + +emptyExtensions :: Extensions +emptyExtensions = Extensions 0 + +extensionEnabled :: Extension -> Extensions -> Bool +extensionEnabled x (Extensions exts) = testBit exts (fromEnum x) + +enableExtension :: Extension -> Extensions -> Extensions +enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x)) + +disableExtension :: Extension -> Extensions -> Extensions +disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x)) + -- | Extensions to be used with pandoc-flavored markdown. pandocExtensions :: Extensions pandocExtensions = extensionsFromList @@ -286,14 +298,9 @@ multimarkdownExtensions = extensionsFromList , Ext_auto_identifiers , Ext_mmd_header_identifiers , Ext_implicit_figures - -- Note: MMD's syntax for superscripts and subscripts - -- is a bit more permissive than pandoc's, allowing - -- e^2 and a~1 instead of e^2^ and a~1~, so even with - -- these options we don't have full support for MMD - -- superscripts and subscripts, but there's no reason - -- not to include these: - , Ext_superscript + , Ext_short_subsuperscripts , Ext_subscript + , Ext_superscript , Ext_backtick_code_blocks , Ext_spaced_reference_links -- So far only in dev version of mmd: @@ -357,6 +364,7 @@ getDefaultExtensions "gfm" = extensionsFromList , Ext_task_lists , Ext_emoji , Ext_yaml_metadata_block + , Ext_footnotes ] getDefaultExtensions "commonmark" = extensionsFromList [Ext_raw_html] @@ -424,6 +432,8 @@ getDefaultExtensions "jats_archiving" = getDefaultExtensions "jats" getDefaultExtensions "jats_publishing" = getDefaultExtensions "jats" getDefaultExtensions "jats_articleauthoring" = getDefaultExtensions "jats" getDefaultExtensions "opml" = pandocExtensions -- affects notes +getDefaultExtensions "markua" = extensionsFromList + [] getDefaultExtensions _ = extensionsFromList [Ext_auto_identifiers] @@ -464,6 +474,7 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_gutenberg , Ext_smart , Ext_literate_haskell + , Ext_short_subsuperscripts , Ext_rebase_relative_paths ] getAll "markdown_strict" = allMarkdownExtensions @@ -475,6 +486,7 @@ getAllExtensions f = universalExtensions <> getAll f [ Ext_raw_markdown ] getAll "docx" = autoIdExtensions <> extensionsFromList [ Ext_empty_paragraphs + , Ext_native_numbering , Ext_styles ] getAll "opendocument" = extensionsFromList @@ -619,5 +631,3 @@ parseFormatSpec = parse formatSpec "" '+' -> (ext : extsToEnable, extsToDisable) _ -> (extsToEnable, ext : extsToDisable) -$(deriveJSON defaultOptions ''Extension) -$(deriveJSON defaultOptions ''Extensions) diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs index 1209ceeb7..84015ed92 100644 --- a/src/Text/Pandoc/Filter.hs +++ b/src/Text/Pandoc/Filter.hs @@ -19,10 +19,9 @@ module Text.Pandoc.Filter ) where import System.CPUTime (getCPUTime) -import Data.Aeson.TH (deriveJSON, defaultOptions) +import Data.Aeson import GHC.Generics (Generic) -import Text.Pandoc.Class.PandocIO (PandocIO) -import Text.Pandoc.Class.PandocMonad (report, getVerbosity) +import Text.Pandoc.Class (report, getVerbosity, PandocMonad) import Text.Pandoc.Definition (Pandoc) import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Logging @@ -30,7 +29,6 @@ import Text.Pandoc.Citeproc (processCitations) import qualified Text.Pandoc.Filter.JSON as JSONFilter import qualified Text.Pandoc.Filter.Lua as LuaFilter import qualified Text.Pandoc.Filter.Path as Path -import Data.YAML import qualified Data.Text as T import System.FilePath (takeExtension) import Control.Applicative ((<|>)) @@ -43,9 +41,9 @@ data Filter = LuaFilter FilePath | CiteprocFilter -- built-in citeproc deriving (Show, Generic) -instance FromYAML Filter where - parseYAML node = - (withMap "Filter" $ \m -> do +instance FromJSON Filter where + parseJSON node = + (withObject "Filter" $ \m -> do ty <- m .: "type" fp <- m .:? "path" let missingPath = fail $ "Expected 'path' for filter of type " ++ show ty @@ -56,7 +54,7 @@ instance FromYAML Filter where "json" -> filterWithPath JSONFilter fp _ -> fail $ "Unknown filter type " ++ show (ty :: T.Text)) node <|> - (withStr "Filter" $ \t -> do + (withText "Filter" $ \t -> do let fp = T.unpack t if fp == "citeproc" then return CiteprocFilter @@ -65,12 +63,20 @@ instance FromYAML Filter where ".lua" -> LuaFilter fp _ -> JSONFilter fp) node +instance ToJSON Filter where + toJSON CiteprocFilter = object [ "type" .= String "citeproc" ] + toJSON (LuaFilter fp) = object [ "type" .= String "lua", + "path" .= String (T.pack fp) ] + toJSON (JSONFilter fp) = object [ "type" .= String "json", + "path" .= String (T.pack fp) ] + -- | Modify the given document using a filter. -applyFilters :: ReaderOptions +applyFilters :: (PandocMonad m, MonadIO m) + => ReaderOptions -> [Filter] -> [String] -> Pandoc - -> PandocIO Pandoc + -> m Pandoc applyFilters ropts filters args d = do expandedFilters <- mapM expandFilterPath filters foldM applyFilter d expandedFilters @@ -92,9 +98,7 @@ applyFilters ropts filters args d = do toMilliseconds picoseconds = picoseconds `div` 1000000000 -- | Expand paths of filters, searching the data directory. -expandFilterPath :: Filter -> PandocIO Filter +expandFilterPath :: (PandocMonad m, MonadIO m) => Filter -> m Filter expandFilterPath (LuaFilter fp) = LuaFilter <$> Path.expandFilterPath fp expandFilterPath (JSONFilter fp) = JSONFilter <$> Path.expandFilterPath fp expandFilterPath CiteprocFilter = return CiteprocFilter - -$(deriveJSON defaultOptions ''Filter) diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs index c238e53d9..4e264261b 100644 --- a/src/Text/Pandoc/Filter/Lua.hs +++ b/src/Text/Pandoc/Filter/Lua.hs @@ -14,7 +14,8 @@ module Text.Pandoc.Filter.Lua (apply) where import Control.Exception (throw) import Control.Monad ((>=>)) import qualified Data.Text as T -import Text.Pandoc.Class.PandocIO (PandocIO) +import Text.Pandoc.Class (PandocMonad) +import Control.Monad.Trans (MonadIO) import Text.Pandoc.Definition (Pandoc) import Text.Pandoc.Error (PandocError (PandocFilterError, PandocLuaError)) import Text.Pandoc.Lua (Global (..), runLua, runFilterFile, setGlobals) @@ -23,11 +24,12 @@ import Text.Pandoc.Options (ReaderOptions) -- | Run the Lua filter in @filterPath@ for a transformation to the -- target format (first element in args). Pandoc uses Lua init files to -- setup the Lua interpreter. -apply :: ReaderOptions +apply :: (PandocMonad m, MonadIO m) + => ReaderOptions -> [String] -> FilePath -> Pandoc - -> PandocIO Pandoc + -> m Pandoc apply ropts args fp doc = do let format = case args of (x:_) -> x @@ -39,7 +41,8 @@ apply ropts args fp doc = do ] runFilterFile fp doc -forceResult :: FilePath -> Either PandocError Pandoc -> PandocIO Pandoc +forceResult :: (PandocMonad m, MonadIO m) + => FilePath -> Either PandocError Pandoc -> m Pandoc forceResult fp eitherResult = case eitherResult of Right x -> return x Left err -> throw . PandocFilterError (T.pack fp) $ case err of diff --git a/src/Text/Pandoc/Image.hs b/src/Text/Pandoc/Image.hs index e0c938938..cbc26c981 100644 --- a/src/Text/Pandoc/Image.hs +++ b/src/Text/Pandoc/Image.hs @@ -11,24 +11,25 @@ Portability : portable Functions for converting images. -} module Text.Pandoc.Image ( svgToPng ) where -import Text.Pandoc.Options (WriterOptions(..)) import Text.Pandoc.Process (pipeProcess) import qualified Data.ByteString.Lazy as L import System.Exit import Data.Text (Text) import Text.Pandoc.Shared (tshow) import qualified Control.Exception as E +import Control.Monad.IO.Class (MonadIO(liftIO)) -- | Convert svg image to png. rsvg-convert -- is used and must be available on the path. -svgToPng :: WriterOptions +svgToPng :: MonadIO m + => Int -- ^ DPI -> L.ByteString -- ^ Input image as bytestring - -> IO (Either Text L.ByteString) -svgToPng opts bs = do - let dpi = show $ writerDpi opts - E.catch + -> m (Either Text L.ByteString) +svgToPng dpi bs = do + let dpi' = show dpi + liftIO $ E.catch (do (exit, out) <- pipeProcess Nothing "rsvg-convert" - ["-f","png","-a","--dpi-x",dpi,"--dpi-y",dpi] + ["-f","png","-a","--dpi-x",dpi',"--dpi-y",dpi'] bs return $ if exit == ExitSuccess then Right out diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 193b8b61c..2268f29f7 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -24,7 +24,6 @@ module Text.Pandoc.Logging ( ) where import Control.Monad (mzero) -import Data.YAML (withStr, FromYAML(..)) import Data.Aeson import Data.Aeson.Encode.Pretty (Config (..), defConfig, encodePretty', keyOrder) @@ -53,13 +52,6 @@ instance FromJSON Verbosity where _ -> mzero parseJSON _ = mzero -instance FromYAML Verbosity where - parseYAML = withStr "Verbosity" $ \case - "ERROR" -> return ERROR - "WARNING" -> return WARNING - "INFO" -> return INFO - _ -> mzero - data LogMessage = SkippedContent Text SourcePos | IgnoredElement Text @@ -76,6 +68,7 @@ data LogMessage = | InlineNotRendered Inline | BlockNotRendered Block | DocxParserWarning Text + | PowerpointTemplateWarning Text | IgnoredIOError Text | CouldNotFetchResource Text Text | CouldNotDetermineImageSize Text Text @@ -104,6 +97,7 @@ data LogMessage = | ATXHeadingInLHS Int Text | EnvironmentVariableUndefined Text | DuplicateAttribute Text Text + | NotUTF8Encoded FilePath deriving (Show, Eq, Data, Ord, Typeable, Generic) instance ToJSON LogMessage where @@ -174,6 +168,8 @@ instance ToJSON LogMessage where ["contents" .= toJSON bl] DocxParserWarning s -> ["contents" .= s] + PowerpointTemplateWarning s -> + ["contents" .= s] IgnoredIOError s -> ["contents" .= s] CouldNotFetchResource fp s -> @@ -241,6 +237,8 @@ instance ToJSON LogMessage where DuplicateAttribute attr val -> ["attribute" .= attr ,"value" .= val] + NotUTF8Encoded src -> + ["source" .= src] showPos :: SourcePos -> Text showPos pos = Text.pack $ sn ++ "line " ++ @@ -291,6 +289,8 @@ showLogMessage msg = "Not rendering " <> Text.pack (show bl) DocxParserWarning s -> "Docx parser warning: " <> s + PowerpointTemplateWarning s -> + "Powerpoint template warning: " <> s IgnoredIOError s -> "IO Error (ignored): " <> s CouldNotFetchResource fp s -> @@ -365,6 +365,9 @@ showLogMessage msg = "Undefined environment variable " <> var <> " in defaults file." DuplicateAttribute attr val -> "Ignoring duplicate attribute " <> attr <> "=" <> tshow val <> "." + NotUTF8Encoded src -> + Text.pack src <> + " is not UTF-8 encoded: falling back to latin1." messageVerbosity :: LogMessage -> Verbosity messageVerbosity msg = @@ -386,6 +389,7 @@ messageVerbosity msg = InlineNotRendered{} -> INFO BlockNotRendered{} -> INFO DocxParserWarning{} -> INFO + PowerpointTemplateWarning{} -> WARNING IgnoredIOError{} -> WARNING CouldNotFetchResource{} -> WARNING CouldNotDetermineImageSize{} -> WARNING @@ -414,3 +418,4 @@ messageVerbosity msg = ATXHeadingInLHS{} -> WARNING EnvironmentVariableUndefined{}-> WARNING DuplicateAttribute{} -> WARNING + NotUTF8Encoded{} -> WARNING diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index f0e9e076b..2aa84b7fa 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -20,4 +20,4 @@ module Text.Pandoc.Lua import Text.Pandoc.Lua.Filter (runFilterFile) import Text.Pandoc.Lua.Global (Global (..), setGlobals) import Text.Pandoc.Lua.Init (runLua) -import Text.Pandoc.Lua.Marshaling () +import Text.Pandoc.Lua.Orphans () diff --git a/src/Text/Pandoc/Lua/ErrorConversion.hs b/src/Text/Pandoc/Lua/ErrorConversion.hs index 4e6880722..5cb1bf825 100644 --- a/src/Text/Pandoc/Lua/ErrorConversion.hs +++ b/src/Text/Pandoc/Lua/ErrorConversion.hs @@ -1,6 +1,5 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.ErrorConversion Copyright : © 2020-2021 Albert Krewinkel @@ -13,49 +12,37 @@ Define how Lua errors are converted into @'PandocError'@ Haskell exceptions, and /vice versa/. -} module Text.Pandoc.Lua.ErrorConversion - ( errorConversion + ( addContextToException ) where -import Foreign.Lua (Lua (..), NumResults) +import HsLua (LuaError, LuaE, top) +import HsLua.Marshalling (resultToEither, runPeek) +import HsLua.Class.Peekable (PeekError (..)) import Text.Pandoc.Error (PandocError (PandocLuaError)) -import Text.Pandoc.Lua.Marshaling.PandocError (pushPandocError, peekPandocError) +import Text.Pandoc.Lua.Marshal.PandocError (pushPandocError, peekPandocError) -import qualified Control.Monad.Catch as Catch import qualified Data.Text as T -import qualified Foreign.Lua as Lua - --- | Conversions between Lua errors and Haskell exceptions, assuming --- that all exceptions are of type @'PandocError'@. -errorConversion :: Lua.ErrorConversion -errorConversion = Lua.ErrorConversion - { Lua.addContextToException = addContextToException - , Lua.alternative = alternative - , Lua.errorToException = errorToException - , Lua.exceptionToError = exceptionToError - } - --- | Convert a Lua error, which must be at the top of the stack, into a --- @'PandocError'@, popping the value from the stack. -errorToException :: forall a . Lua.State -> IO a -errorToException l = Lua.unsafeRunWith l $ do - err <- peekPandocError Lua.stackTop - Lua.pop 1 - Catch.throwM err - --- | Try the first op -- if it doesn't succeed, run the second. -alternative :: forall a . Lua a -> Lua a -> Lua a -alternative x y = Catch.try x >>= \case - Left (_ :: PandocError) -> y - Right x' -> return x' - --- | Add more context to an error -addContextToException :: forall a . String -> Lua a -> Lua a -addContextToException ctx op = op `Catch.catch` \case - PandocLuaError msg -> Catch.throwM $ PandocLuaError (T.pack ctx <> msg) - e -> Catch.throwM e - --- | Catch a @'PandocError'@ exception and raise it as a Lua error. -exceptionToError :: Lua NumResults -> Lua NumResults -exceptionToError op = op `Catch.catch` \e -> do - pushPandocError e - Lua.error +import qualified HsLua as Lua + +addContextToException :: () +addContextToException = undefined + +-- | Retrieve a @'PandocError'@ from the Lua stack. +popPandocError :: LuaE PandocError PandocError +popPandocError = do + errResult <- runPeek $ peekPandocError top + case resultToEither errResult of + Right x -> return x + Left err -> return $ PandocLuaError (T.pack err) + +-- Ensure conversions between Lua errors and 'PandocError' exceptions +-- are possible. +instance LuaError PandocError where + popException = popPandocError + pushException = pushPandocError + luaException = PandocLuaError . T.pack + +instance PeekError PandocError where + messageFromException = \case + PandocLuaError m -> T.unpack m + err -> show err diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index 01bf90efa..9910424d8 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -1,4 +1,7 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE IncoherentInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Filter Copyright : © 2012-2021 John MacFarlane, @@ -9,245 +12,36 @@ Stability : alpha Types and functions for running Lua filters. -} -module Text.Pandoc.Lua.Filter ( LuaFilterFunction - , LuaFilter - , runFilterFile - , walkInlines - , walkInlineLists - , walkBlocks - , walkBlockLists - , module Text.Pandoc.Lua.Walk - ) where -import Control.Applicative ((<|>)) -import Control.Monad (mplus, (>=>)) -import Control.Monad.Catch (finally, try) -import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf, - showConstr, toConstr, tyconUQname) -import Data.Foldable (foldrM) -import Data.List (foldl') -import Data.Map (Map) -import Data.Maybe (fromMaybe) -import Foreign.Lua (Lua, Peekable, Pushable, StackIndex) +module Text.Pandoc.Lua.Filter + ( runFilterFile + ) where +import Control.Monad ((>=>), (<$!>)) +import HsLua as Lua import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Lua.Marshaling () -import Text.Pandoc.Lua.Marshaling.List (List (..)) -import Text.Pandoc.Lua.Walk (SingletonsList (..)) -import Text.Pandoc.Walk (Walkable (walkM)) +import Text.Pandoc.Lua.ErrorConversion () +import Text.Pandoc.Lua.Marshal.AST +import Text.Pandoc.Lua.Marshal.Filter -import qualified Data.Map.Strict as Map -import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil -- | Transform document using the filter defined in the given file. -runFilterFile :: FilePath -> Pandoc -> Lua Pandoc +runFilterFile :: FilePath -> Pandoc -> LuaE PandocError Pandoc runFilterFile filterPath doc = do - top <- Lua.gettop + oldtop <- gettop stat <- LuaUtil.dofileWithTraceback filterPath if stat /= Lua.OK - then Lua.throwTopMessage + then throwErrorAsException else do - newtop <- Lua.gettop + newtop <- gettop -- Use the returned filters, or the implicitly defined global -- filter if nothing was returned. - luaFilters <- if newtop - top >= 1 - then Lua.peek Lua.stackTop - else Lua.pushglobaltable *> fmap (:[]) Lua.popValue + luaFilters <- forcePeek $ + if newtop - oldtop >= 1 + then peekList peekFilter top + else (:[]) <$!> (liftLua pushglobaltable *> peekFilter top) + settop oldtop runAll luaFilters doc -runAll :: [LuaFilter] -> Pandoc -> Lua Pandoc -runAll = foldr ((>=>) . walkMWithLuaFilter) return - --- | Filter function stored in the registry -newtype LuaFilterFunction = LuaFilterFunction Lua.Reference - --- | Collection of filter functions (at most one function per element --- constructor) -newtype LuaFilter = LuaFilter (Map String LuaFilterFunction) - -instance Peekable LuaFilter where - peek idx = do - let constrs = listOfInlinesFilterName - : listOfBlocksFilterName - : metaFilterName - : pandocFilterNames - ++ blockElementNames - ++ inlineElementNames - let go constr acc = do - Lua.getfield idx constr - filterFn <- registerFilterFunction - return $ case filterFn of - Nothing -> acc - Just fn -> Map.insert constr fn acc - LuaFilter <$> foldrM go Map.empty constrs - --- | Register the function at the top of the stack as a filter function in the --- registry. -registerFilterFunction :: Lua (Maybe LuaFilterFunction) -registerFilterFunction = do - isFn <- Lua.isfunction Lua.stackTop - if isFn - then Just . LuaFilterFunction <$> Lua.ref Lua.registryindex - else Nothing <$ Lua.pop 1 - --- | Retrieve filter function from registry and push it to the top of the stack. -pushFilterFunction :: LuaFilterFunction -> Lua () -pushFilterFunction (LuaFilterFunction fnRef) = - Lua.getref Lua.registryindex fnRef - --- | Fetch either a list of elements from the stack. If there is a single --- element instead of a list, fetch that element as a singleton list. If the top --- of the stack is nil, return the default element that was passed to this --- function. If none of these apply, raise an error. -elementOrList :: Peekable a => a -> Lua [a] -elementOrList x = do - let topOfStack = Lua.stackTop - elementUnchanged <- Lua.isnil topOfStack - if elementUnchanged - then [x] <$ Lua.pop 1 - else do - mbres <- peekEither topOfStack - case mbres of - Right res -> [res] <$ Lua.pop 1 - Left _ -> Lua.peekList topOfStack `finally` Lua.pop 1 - --- | Pop and return a value from the stack; if the value at the top of --- the stack is @nil@, return the fallback element. -popOption :: Peekable a => a -> Lua a -popOption fallback = fromMaybe fallback . Lua.fromOptional <$> Lua.popValue - --- | Apply filter on a sequence of AST elements. Both lists and single --- value are accepted as filter function return values. -runOnSequence :: (Data a, Peekable a, Pushable a) - => LuaFilter -> SingletonsList a -> Lua (SingletonsList a) -runOnSequence (LuaFilter fnMap) (SingletonsList xs) = - SingletonsList <$> mconcatMapM tryFilter xs - where - tryFilter :: (Data a, Peekable a, Pushable a) => a -> Lua [a] - tryFilter x = - let filterFnName = showConstr (toConstr x) - catchAllName = tyconUQname $ dataTypeName (dataTypeOf x) - in case Map.lookup filterFnName fnMap <|> Map.lookup catchAllName fnMap of - Just fn -> runFilterFunction fn x *> elementOrList x - Nothing -> return [x] - --- | Try filtering the given value without type error corrections on --- the return value. -runOnValue :: (Data a, Peekable a, Pushable a) - => String -> LuaFilter -> a -> Lua a -runOnValue filterFnName (LuaFilter fnMap) x = - case Map.lookup filterFnName fnMap of - Just fn -> runFilterFunction fn x *> popOption x - Nothing -> return x - --- | Push a value to the stack via a lua filter function. The filter function is --- called with given element as argument and is expected to return an element. --- Alternatively, the function can return nothing or nil, in which case the --- element is left unchanged. -runFilterFunction :: Pushable a => LuaFilterFunction -> a -> Lua () -runFilterFunction lf x = do - pushFilterFunction lf - Lua.push x - LuaUtil.callWithTraceback 1 1 - -walkMWithLuaFilter :: LuaFilter -> Pandoc -> Lua Pandoc -walkMWithLuaFilter f = - walkInlines f - >=> walkInlineLists f - >=> walkBlocks f - >=> walkBlockLists f - >=> walkMeta f - >=> walkPandoc f - -mconcatMapM :: (Monad m) => (a -> m [a]) -> [a] -> m [a] -mconcatMapM f = fmap mconcat . mapM f - -hasOneOf :: LuaFilter -> [String] -> Bool -hasOneOf (LuaFilter fnMap) = any (`Map.member` fnMap) - -contains :: LuaFilter -> String -> Bool -contains (LuaFilter fnMap) = (`Map.member` fnMap) - -walkInlines :: Walkable (SingletonsList Inline) a => LuaFilter -> a -> Lua a -walkInlines lf = - let f :: SingletonsList Inline -> Lua (SingletonsList Inline) - f = runOnSequence lf - in if lf `hasOneOf` inlineElementNames - then walkM f - else return - -walkInlineLists :: Walkable (List Inline) a => LuaFilter -> a -> Lua a -walkInlineLists lf = - let f :: List Inline -> Lua (List Inline) - f = runOnValue listOfInlinesFilterName lf - in if lf `contains` listOfInlinesFilterName - then walkM f - else return - -walkBlocks :: Walkable (SingletonsList Block) a => LuaFilter -> a -> Lua a -walkBlocks lf = - let f :: SingletonsList Block -> Lua (SingletonsList Block) - f = runOnSequence lf - in if lf `hasOneOf` blockElementNames - then walkM f - else return - -walkBlockLists :: Walkable (List Block) a => LuaFilter -> a -> Lua a -walkBlockLists lf = - let f :: List Block -> Lua (List Block) - f = runOnValue listOfBlocksFilterName lf - in if lf `contains` listOfBlocksFilterName - then walkM f - else return - -walkMeta :: LuaFilter -> Pandoc -> Lua Pandoc -walkMeta lf (Pandoc m bs) = do - m' <- runOnValue "Meta" lf m - return $ Pandoc m' bs - -walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc -walkPandoc (LuaFilter fnMap) = - case foldl' mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of - Just fn -> \x -> runFilterFunction fn x *> singleElement x - Nothing -> return - -constructorsFor :: DataType -> [String] -constructorsFor x = map show (dataTypeConstrs x) - -inlineElementNames :: [String] -inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str mempty)) - -blockElementNames :: [String] -blockElementNames = "Block" : constructorsFor (dataTypeOf (Para [])) - -listOfInlinesFilterName :: String -listOfInlinesFilterName = "Inlines" - -listOfBlocksFilterName :: String -listOfBlocksFilterName = "Blocks" - -metaFilterName :: String -metaFilterName = "Meta" - -pandocFilterNames :: [String] -pandocFilterNames = ["Pandoc", "Doc"] - -singleElement :: Peekable a => a -> Lua a -singleElement x = do - elementUnchanged <- Lua.isnil (-1) - if elementUnchanged - then x <$ Lua.pop 1 - else do - mbres <- peekEither (-1) - case mbres of - Right res -> res <$ Lua.pop 1 - Left err -> do - Lua.pop 1 - Lua.throwMessage - ("Error while trying to get a filter's return " <> - "value from Lua stack.\n" <> show err) - --- | Try to convert the value at the given stack index to a Haskell value. --- Returns @Left@ with an error message on failure. -peekEither :: Peekable a => StackIndex -> Lua (Either PandocError a) -peekEither = try . Lua.peek +runAll :: [Filter] -> Pandoc -> LuaE PandocError Pandoc +runAll = foldr ((>=>) . applyFully) return diff --git a/src/Text/Pandoc/Lua/Global.hs b/src/Text/Pandoc/Lua/Global.hs index 29b788f04..cf82890c6 100644 --- a/src/Text/Pandoc/Lua/Global.hs +++ b/src/Text/Pandoc/Lua/Global.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua Copyright : Copyright © 2017-2021 Albert Krewinkel @@ -14,19 +14,19 @@ module Text.Pandoc.Lua.Global , setGlobals ) where -import Data.Data (Data) -import Foreign.Lua (Lua, Peekable, Pushable) -import Foreign.Lua.Userdata ( ensureUserdataMetatable, pushAnyWithMetatable - , metatableName) +import HsLua as Lua +import HsLua.Module.Version (pushVersion) import Paths_pandoc (version) import Text.Pandoc.Class.CommonState (CommonState) -import Text.Pandoc.Definition (Pandoc (Pandoc), pandocTypesVersion) -import Text.Pandoc.Lua.Marshaling () -import Text.Pandoc.Lua.Util (addFunction) +import Text.Pandoc.Definition (Pandoc, pandocTypesVersion) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.Marshal.CommonState (pushCommonState) +import Text.Pandoc.Lua.Marshal.Pandoc (pushPandoc) +import Text.Pandoc.Lua.Marshal.ReaderOptions (pushReaderOptionsReadonly) +import Text.Pandoc.Lua.Orphans () import Text.Pandoc.Options (ReaderOptions) import qualified Data.Text as Text -import qualified Foreign.Lua as Lua -- | Permissible global Lua variables. data Global = @@ -40,50 +40,30 @@ data Global = -- Cannot derive instance of Data because of CommonState -- | Set all given globals. -setGlobals :: [Global] -> Lua () +setGlobals :: [Global] -> LuaE PandocError () setGlobals = mapM_ setGlobal -setGlobal :: Global -> Lua () +setGlobal :: Global -> LuaE PandocError () setGlobal global = case global of -- This could be simplified if Global was an instance of Data. FORMAT format -> do Lua.push format Lua.setglobal "FORMAT" PANDOC_API_VERSION -> do - Lua.push pandocTypesVersion + pushVersion pandocTypesVersion Lua.setglobal "PANDOC_API_VERSION" PANDOC_DOCUMENT doc -> do - Lua.push (LazyPandoc doc) + pushPandoc doc Lua.setglobal "PANDOC_DOCUMENT" PANDOC_READER_OPTIONS ropts -> do - Lua.push ropts + pushReaderOptionsReadonly ropts Lua.setglobal "PANDOC_READER_OPTIONS" PANDOC_SCRIPT_FILE filePath -> do Lua.push filePath Lua.setglobal "PANDOC_SCRIPT_FILE" PANDOC_STATE commonState -> do - Lua.push commonState + pushCommonState commonState Lua.setglobal "PANDOC_STATE" PANDOC_VERSION -> do - Lua.push version + pushVersion version Lua.setglobal "PANDOC_VERSION" - --- | Readonly and lazy pandoc objects. -newtype LazyPandoc = LazyPandoc Pandoc - deriving (Data) - -instance Pushable LazyPandoc where - push lazyDoc = pushAnyWithMetatable pushPandocMetatable lazyDoc - where - pushPandocMetatable = ensureUserdataMetatable (metatableName lazyDoc) $ - addFunction "__index" indexLazyPandoc - -instance Peekable LazyPandoc where - peek = Lua.peekAny - -indexLazyPandoc :: LazyPandoc -> String -> Lua Lua.NumResults -indexLazyPandoc (LazyPandoc (Pandoc meta blks)) field = 1 <$ - case field of - "blocks" -> Lua.push blks - "meta" -> Lua.push meta - _ -> Lua.pushnil diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs index baa6f0295..835da1fc9 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/src/Text/Pandoc/Lua/Init.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua Copyright : Copyright © 2017-2021 Albert Krewinkel @@ -12,25 +14,24 @@ module Text.Pandoc.Lua.Init ( runLua ) where -import Control.Monad (when) -import Control.Monad.Catch (try) +import Control.Monad (forM, forM_, when) +import Control.Monad.Catch (throwM, try) import Control.Monad.Trans (MonadIO (..)) -import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr) -import Foreign.Lua (Lua) +import Data.Maybe (catMaybes) +import HsLua as Lua hiding (status, try) import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8) -import Text.Pandoc.Class.PandocMonad (readDataFile) -import Text.Pandoc.Class.PandocIO (PandocIO) -import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Class.PandocMonad (PandocMonad, readDataFile) +import Text.Pandoc.Error (PandocError (PandocLuaError)) import Text.Pandoc.Lua.Packages (installPandocPackageSearcher) import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, runPandocLua) -import Text.Pandoc.Lua.Util (throwTopMessageAsError') -import qualified Foreign.Lua as Lua -import qualified Text.Pandoc.Definition as Pandoc +import qualified Data.Text as T +import qualified Lua.LPeg as LPeg import qualified Text.Pandoc.Lua.Module.Pandoc as ModulePandoc -- | Run the lua interpreter, using pandoc's default way of environment -- initialization. -runLua :: Lua a -> PandocIO (Either PandocError a) +runLua :: (PandocMonad m, MonadIO m) + => LuaE PandocError a -> m (Either PandocError a) runLua luaOp = do enc <- liftIO $ getForeignEncoding <* setForeignEncoding utf8 res <- runPandocLua . try $ do @@ -39,12 +40,27 @@ runLua luaOp = do liftIO $ setForeignEncoding enc return res +-- | Modules that are loaded at startup and assigned to fields in the +-- pandoc module. +loadedModules :: [(Name, Name)] +loadedModules = + [ ("pandoc.List", "List") + , ("pandoc.mediabag", "mediabag") + , ("pandoc.path", "path") + , ("pandoc.system", "system") + , ("pandoc.types", "types") + , ("pandoc.utils", "utils") + , ("text", "text") + ] + -- | Initialize the lua state with all required values initLuaState :: PandocLua () initLuaState = do liftPandocLua Lua.openlibs installPandocPackageSearcher initPandocModule + installLpegSearcher + setGlobalModules loadInitScript "init.lua" where initPandocModule :: PandocLua () @@ -53,12 +69,16 @@ initLuaState = do ModulePandoc.pushModule -- register as loaded module liftPandocLua $ do - Lua.pushvalue Lua.stackTop - Lua.getfield Lua.registryindex Lua.loadedTableRegistryField - Lua.setfield (Lua.nthFromTop 2) "pandoc" - Lua.pop 1 - -- copy constructors into registry - putConstructorsInRegistry + Lua.getfield Lua.registryindex Lua.loaded + Lua.pushvalue (Lua.nth 2) + Lua.setfield (Lua.nth 2) "pandoc" + Lua.pop 1 -- remove LOADED table + -- load modules and add them to the `pandoc` module table. + liftPandocLua $ forM_ loadedModules $ \(pkgname, fieldname) -> do + Lua.getglobal "require" + Lua.pushName pkgname + Lua.call 1 1 + Lua.setfield (nth 2) fieldname -- assign module to global variable liftPandocLua $ Lua.setglobal "pandoc" @@ -66,38 +86,54 @@ initLuaState = do loadInitScript scriptFile = do script <- readDataFile scriptFile status <- liftPandocLua $ Lua.dostring script - when (status /= Lua.OK) . liftPandocLua $ - throwTopMessageAsError' - (("Couldn't load '" ++ scriptFile ++ "'.\n") ++) + when (status /= Lua.OK) . liftPandocLua $ do + err <- popException + let prefix = "Couldn't load '" <> T.pack scriptFile <> "':\n" + throwM . PandocLuaError . (prefix <>) $ case err of + PandocLuaError msg -> msg + _ -> T.pack $ show err + setGlobalModules :: PandocLua () + setGlobalModules = liftPandocLua $ do + let globalModules = + [ ("lpeg", LPeg.luaopen_lpeg_ptr) -- must be loaded first + , ("re", LPeg.luaopen_re_ptr) -- re depends on lpeg + ] + loadedBuiltInModules <- fmap catMaybes . forM globalModules $ + \(pkgname, luaopen) -> do + Lua.pushcfunction luaopen + usedBuiltIn <- Lua.pcall 0 1 Nothing >>= \case + OK -> do -- all good, loading succeeded + -- register as loaded module so later modules can rely on this + Lua.getfield Lua.registryindex Lua.loaded + Lua.pushvalue (Lua.nth 2) + Lua.setfield (Lua.nth 2) pkgname + Lua.pop 1 -- pop _LOADED + return True + _ -> do -- built-in library failed, load system lib + Lua.pop 1 -- ignore error message + -- Try loading via the normal package loading mechanism. + Lua.getglobal "require" + Lua.pushName pkgname + Lua.call 1 1 -- Throws an exception if loading failed again! + return False --- | AST elements are marshaled via normal constructor functions in the --- @pandoc@ module. However, accessing Lua globals from Haskell is --- expensive (due to error handling). Accessing the Lua registry is much --- cheaper, which is why the constructor functions are copied into the --- Lua registry and called from there. --- --- This function expects the @pandoc@ module to be at the top of the --- stack. -putConstructorsInRegistry :: PandocLua () -putConstructorsInRegistry = liftPandocLua $ do - constrsToReg $ Pandoc.Pandoc mempty mempty - constrsToReg $ Pandoc.Str mempty - constrsToReg $ Pandoc.Para mempty - constrsToReg $ Pandoc.Meta mempty - constrsToReg $ Pandoc.MetaList mempty - constrsToReg $ Pandoc.Citation mempty mempty mempty Pandoc.AuthorInText 0 0 - putInReg "Attr" -- used for Attr type alias - putInReg "ListAttributes" -- used for ListAttributes type alias - putInReg "List" -- pandoc.List - putInReg "SimpleTable" -- helper for backward-compatible table handling - where - constrsToReg :: Data a => a -> Lua () - constrsToReg = mapM_ (putInReg . showConstr) . dataTypeConstrs . dataTypeOf + -- Module on top of stack. Register as global + Lua.setglobal pkgname + return $ if usedBuiltIn then Just pkgname else Nothing + + -- Remove module entry from _LOADED table in registry if we used a + -- built-in library. This ensures that later calls to @require@ will + -- prefer the shared library, if any. + forM_ loadedBuiltInModules $ \pkgname -> do + Lua.getfield Lua.registryindex Lua.loaded + Lua.pushnil + Lua.setfield (Lua.nth 2) pkgname + Lua.pop 1 -- registry - putInReg :: String -> Lua () - putInReg name = do - Lua.push ("pandoc." ++ name) -- name in registry - Lua.push name -- in pandoc module - Lua.rawget (Lua.nthFromTop 3) - Lua.rawset Lua.registryindex + installLpegSearcher :: PandocLua () + installLpegSearcher = liftPandocLua $ do + Lua.getglobal' "package.searchers" + Lua.pushHaskellFunction $ Lua.state >>= liftIO . LPeg.lpeg_searcher + Lua.rawseti (Lua.nth 2) . (+1) . fromIntegral =<< Lua.rawlen (Lua.nth 2) + Lua.pop 1 -- remove 'package.searchers' from stack diff --git a/src/Text/Pandoc/Lua/Marshal/CommonState.hs b/src/Text/Pandoc/Lua/Marshal/CommonState.hs new file mode 100644 index 000000000..a8c0e28d2 --- /dev/null +++ b/src/Text/Pandoc/Lua/Marshal/CommonState.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Lua.Marshal.CommonState + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel + License : GNU GPL, version 2 or above + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Instances to marshal (push) and unmarshal (peek) the common state. +-} +module Text.Pandoc.Lua.Marshal.CommonState + ( typeCommonState + , peekCommonState + , pushCommonState + ) where + +import HsLua.Core +import HsLua.Marshalling +import HsLua.Packaging +import Text.Pandoc.Class (CommonState (..)) +import Text.Pandoc.Logging (LogMessage, showLogMessage) +import Text.Pandoc.Lua.Marshal.List (pushPandocList) + +-- | Lua type used for the @CommonState@ object. +typeCommonState :: LuaError e => DocumentedType e CommonState +typeCommonState = deftype "pandoc CommonState" [] + [ readonly "input_files" "input files passed to pandoc" + (pushPandocList pushString, stInputFiles) + + , readonly "output_file" "the file to which pandoc will write" + (maybe pushnil pushString, stOutputFile) + + , readonly "log" "list of log messages" + (pushPandocList (pushUD typeLogMessage), stLog) + + , readonly "request_headers" "headers to add for HTTP requests" + (pushPandocList (pushPair pushText pushText), stRequestHeaders) + + , readonly "resource_path" + "path to search for resources like included images" + (pushPandocList pushString, stResourcePath) + + , readonly "source_url" "absolute URL + dir of 1st source file" + (maybe pushnil pushText, stSourceURL) + + , readonly "user_data_dir" "directory to search for data files" + (maybe pushnil pushString, stUserDataDir) + + , readonly "trace" "controls whether tracing messages are issued" + (pushBool, stTrace) + + , readonly "verbosity" "verbosity level" + (pushString . show, stVerbosity) + ] + +peekCommonState :: LuaError e => Peeker e CommonState +peekCommonState = peekUD typeCommonState + +pushCommonState :: LuaError e => Pusher e CommonState +pushCommonState = pushUD typeCommonState + +typeLogMessage :: LuaError e => DocumentedType e LogMessage +typeLogMessage = deftype "pandoc LogMessage" + [ operation Index $ defun "__tostring" + ### liftPure showLogMessage + <#> udparam typeLogMessage "msg" "object" + =#> functionResult pushText "string" "stringified log message" + ] + mempty -- no members diff --git a/src/Text/Pandoc/Lua/Marshaling/Context.hs b/src/Text/Pandoc/Lua/Marshal/Context.hs index 606bdcfb2..17af936e1 100644 --- a/src/Text/Pandoc/Lua/Marshaling/Context.hs +++ b/src/Text/Pandoc/Lua/Marshal/Context.hs @@ -10,10 +10,10 @@ Marshaling instance for doctemplates Context and its components. -} -module Text.Pandoc.Lua.Marshaling.Context () where +module Text.Pandoc.Lua.Marshal.Context () where -import qualified Foreign.Lua as Lua -import Foreign.Lua (Pushable) +import qualified HsLua as Lua +import HsLua (Pushable) import Text.DocTemplates (Context(..), Val(..), TemplateTarget) import Text.DocLayout (render) diff --git a/src/Text/Pandoc/Lua/Marshal/PandocError.hs b/src/Text/Pandoc/Lua/Marshal/PandocError.hs new file mode 100644 index 000000000..d1c0ad4f4 --- /dev/null +++ b/src/Text/Pandoc/Lua/Marshal/PandocError.hs @@ -0,0 +1,51 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{- | + Module : Text.Pandoc.Lua.Marshal.PandocError + Copyright : © 2020-2021 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Marshal of @'PandocError'@ values. +-} +module Text.Pandoc.Lua.Marshal.PandocError + ( peekPandocError + , pushPandocError + , typePandocError + ) + where + +import HsLua.Core (LuaError) +import HsLua.Marshalling (Peeker, Pusher, pushString, liftLua) +import HsLua.Packaging +import Text.Pandoc.Error (PandocError (PandocLuaError)) + +import qualified HsLua as Lua +import qualified Text.Pandoc.UTF8 as UTF8 + +-- | Lua userdata type definition for PandocError. +typePandocError :: LuaError e => DocumentedType e PandocError +typePandocError = deftype "PandocError" + [ operation Tostring $ defun "__tostring" + ### liftPure (show @PandocError) + <#> udparam typePandocError "obj" "PandocError object" + =#> functionResult pushString "string" "string representation of error." + ] + mempty -- no members + +-- | Peek a @'PandocError'@ element to the Lua stack. +pushPandocError :: LuaError e => Pusher e PandocError +pushPandocError = pushUD typePandocError + +-- | Retrieve a @'PandocError'@ from the Lua stack. +peekPandocError :: LuaError e => Peeker e PandocError +peekPandocError idx = Lua.retrieving "PandocError" $ + liftLua (Lua.ltype idx) >>= \case + Lua.TypeUserdata -> peekUD typePandocError idx + _ -> do + msg <- liftLua $ Lua.state >>= \l -> Lua.liftIO (Lua.popErrorMessage l) + return $ PandocLuaError (UTF8.toText msg) diff --git a/src/Text/Pandoc/Lua/Marshal/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshal/ReaderOptions.hs new file mode 100644 index 000000000..c20770dba --- /dev/null +++ b/src/Text/Pandoc/Lua/Marshal/ReaderOptions.hs @@ -0,0 +1,133 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} +{- | + Module : Text.Pandoc.Lua.Marshaling.ReaderOptions + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Marshaling instance for ReaderOptions and its components. +-} +module Text.Pandoc.Lua.Marshal.ReaderOptions + ( peekReaderOptions + , pushReaderOptions + , pushReaderOptionsReadonly + ) where + +import Data.Default (def) +import HsLua as Lua +import Text.Pandoc.Lua.Marshal.List (pushPandocList) +import Text.Pandoc.Options (ReaderOptions (..)) + +-- +-- Reader Options +-- + +-- | Retrieve a ReaderOptions value, either from a normal ReaderOptions +-- value, from a read-only object, or from a table with the same +-- keys as a ReaderOptions object. +peekReaderOptions :: LuaError e => Peeker e ReaderOptions +peekReaderOptions = retrieving "ReaderOptions" . \idx -> + liftLua (ltype idx) >>= \case + TypeUserdata -> choice [ peekUD typeReaderOptions + , peekUD typeReaderOptionsReadonly + ] + idx + TypeTable -> peekReaderOptionsTable idx + _ -> failPeek =<< + typeMismatchMessage "ReaderOptions userdata or table" idx + +-- | Pushes a ReaderOptions value as userdata object. +pushReaderOptions :: LuaError e => Pusher e ReaderOptions +pushReaderOptions = pushUD typeReaderOptions + +-- | Pushes a ReaderOptions object, but makes it read-only. +pushReaderOptionsReadonly :: LuaError e => Pusher e ReaderOptions +pushReaderOptionsReadonly = pushUD typeReaderOptionsReadonly + +-- | ReaderOptions object type for read-only values. +typeReaderOptionsReadonly :: LuaError e => DocumentedType e ReaderOptions +typeReaderOptionsReadonly = deftype "ReaderOptions (read-only)" + [ operation Tostring $ lambda + ### liftPure show + <#> udparam typeReaderOptions "opts" "options to print in native format" + =#> functionResult pushString "string" "Haskell representation" + , operation Newindex $ lambda + ### (failLua "This ReaderOptions value is read-only.") + =?> "Throws an error when called, i.e., an assignment is made." + ] + readerOptionsMembers + +-- | 'ReaderOptions' object type. +typeReaderOptions :: LuaError e => DocumentedType e ReaderOptions +typeReaderOptions = deftype "ReaderOptions" + [ operation Tostring $ lambda + ### liftPure show + <#> udparam typeReaderOptions "opts" "options to print in native format" + =#> functionResult pushString "string" "Haskell representation" + ] + readerOptionsMembers + +-- | Member properties of 'ReaderOptions' Lua values. +readerOptionsMembers :: LuaError e + => [Member e (DocumentedFunction e) ReaderOptions] +readerOptionsMembers = + [ property "abbreviations" "" + (pushSet pushText, readerAbbreviations) + (peekSet peekText, \opts x -> opts{ readerAbbreviations = x }) + , property "columns" "" + (pushIntegral, readerColumns) + (peekIntegral, \opts x -> opts{ readerColumns = x }) + , property "default_image_extension" "" + (pushText, readerDefaultImageExtension) + (peekText, \opts x -> opts{ readerDefaultImageExtension = x }) + , property "extensions" "" + (pushString . show, readerExtensions) + (peekRead, \opts x -> opts{ readerExtensions = x }) + , property "indented_code_classes" "" + (pushPandocList pushText, readerIndentedCodeClasses) + (peekList peekText, \opts x -> opts{ readerIndentedCodeClasses = x }) + , property "strip_comments" "" + (pushBool, readerStripComments) + (peekBool, \opts x -> opts{ readerStripComments = x }) + , property "standalone" "" + (pushBool, readerStandalone) + (peekBool, \opts x -> opts{ readerStandalone = x }) + , property "tab_stop" "" + (pushIntegral, readerTabStop) + (peekIntegral, \opts x -> opts{ readerTabStop = x }) + , property "track_changes" "" + (pushString . show, readerTrackChanges) + (peekRead, \opts x -> opts{ readerTrackChanges = x }) + ] + +-- | Retrieves a 'ReaderOptions' object from a table on the stack, using +-- the default values for all missing fields. +-- +-- Internally, this pushes the default reader options, sets each +-- key/value pair of the table in the userdata value, then retrieves the +-- object again. This will update all fields and complain about unknown +-- keys. +peekReaderOptionsTable :: LuaError e => Peeker e ReaderOptions +peekReaderOptionsTable idx = retrieving "ReaderOptions (table)" $ do + liftLua $ do + absidx <- absindex idx + pushUD typeReaderOptions def + let setFields = do + next absidx >>= \case + False -> return () -- all fields were copied + True -> do + pushvalue (nth 2) *> insert (nth 2) + settable (nth 4) -- set in userdata object + setFields + pushnil -- first key + setFields + peekUD typeReaderOptions top + +instance Pushable ReaderOptions where + push = pushReaderOptions diff --git a/src/Text/Pandoc/Lua/Marshal/Reference.hs b/src/Text/Pandoc/Lua/Marshal/Reference.hs new file mode 100644 index 000000000..ee297484e --- /dev/null +++ b/src/Text/Pandoc/Lua/Marshal/Reference.hs @@ -0,0 +1,107 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} +{- | + Module : Text.Pandoc.Lua.Marshaling.ReaderOptions + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Marshal citeproc 'Reference' values. +-} +module Text.Pandoc.Lua.Marshal.Reference + ( pushReference + ) where + +import Citeproc.Types + ( Date (..), DateParts (..), ItemId (..), Name (..), Reference (..) + , Val (..), Variable, fromVariable + ) +import Control.Monad (forM_) +import HsLua hiding (Name, Reference, pushName, peekName) +import Text.Pandoc.Builder (Inlines, toList) +import Text.Pandoc.Lua.Marshal.Inline (pushInlines) +import Text.Pandoc.Lua.Marshal.List (pushPandocList) + +import qualified Data.Map as Map +import qualified HsLua + +-- | Pushes a ReaderOptions value as userdata object. +pushReference :: LuaError e => Pusher e (Reference Inlines) +pushReference reference = do + pushAsTable [ ("id", pushItemId . referenceId) + , ("type", pushText . referenceType) + ] + reference + forM_ (Map.toList $ referenceVariables reference) $ \(var, val) -> do + pushVariable var + pushVal val + rawset (nth 3) + +-- | Pushes an 'ItemId' as a string. +pushItemId :: Pusher e ItemId +pushItemId = pushText . unItemId + +-- | Pushes a person's 'Name' as a table. +pushName :: LuaError e => Pusher e Name +pushName = pushAsTable + [ ("family" , pushTextOrNil . nameFamily) + , ("given" , pushTextOrNil . nameGiven) + , ("dropping-particle" , pushTextOrNil . nameDroppingParticle) + , ("non-dropping-particle" , pushTextOrNil . nameNonDroppingParticle) + , ("suffix" , pushTextOrNil . nameSuffix) + , ("literal" , pushTextOrNil . nameLiteral) + , ("comma-suffix" , pushBoolOrNil . nameCommaSuffix) + , ("static-ordering" , pushBoolOrNil . nameStaticOrdering) + ] + where + pushTextOrNil = \case + Nothing -> pushnil + Just xs -> pushText xs + +-- | Pushes a boolean, but uses @nil@ instead of @false@; table fields +-- are not set unless the value is true. +pushBoolOrNil :: Pusher e Bool +pushBoolOrNil = \case + False -> pushnil + True -> pushBool True + +-- | Pushes a 'Variable' as string. +pushVariable :: Pusher e Variable +pushVariable = pushText . fromVariable + +-- | Pushes a 'Val', i.e., a variable value. +pushVal :: LuaError e => Pusher e (Val Inlines) +pushVal = \case + TextVal t -> pushText t + FancyVal inlns -> pushInlines $ toList inlns + NumVal i -> pushIntegral i + NamesVal names -> pushPandocList pushName names + DateVal date -> pushDate date + +-- | Pushes a 'Date' as table. +pushDate :: LuaError e => Pusher e Date +pushDate = pushAsTable + [ ("date-parts", pushPandocList pushDateParts . dateParts) + , ("circa", pushBoolOrNil . dateCirca) + , ("season", maybe pushnil pushIntegral . dateSeason) + , ("literal", maybe pushnil pushText . dateLiteral) + ] + where + -- date parts are lists of Int values + pushDateParts (DateParts dp) = pushPandocList pushIntegral dp + +-- | Helper funtion to push an object as a table. +pushAsTable :: LuaError e + => [(HsLua.Name, a -> LuaE e ())] + -> a -> LuaE e () +pushAsTable props obj = do + createtable 0 (length props) + forM_ props $ \(name, pushValue) -> do + HsLua.pushName name + pushValue obj + rawset (nth 3) diff --git a/src/Text/Pandoc/Lua/Marshal/Sources.hs b/src/Text/Pandoc/Lua/Marshal/Sources.hs new file mode 100644 index 000000000..7b5262ab5 --- /dev/null +++ b/src/Text/Pandoc/Lua/Marshal/Sources.hs @@ -0,0 +1,46 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} +{- | +Module : Text.Pandoc.Lua.Marshaling.Sources +Copyright : © 2021 Albert Krewinkel +License : GNU GPL, version 2 or above +Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +Marshal 'Sources'. +-} +module Text.Pandoc.Lua.Marshal.Sources + ( pushSources + ) where + +import Data.Text (Text) +import HsLua as Lua +import Text.Pandoc.Lua.Marshal.List (newListMetatable) +import Text.Pandoc.Sources (Sources (..)) +import Text.Parsec (SourcePos, sourceName) + +-- | Pushes the 'Sources' as a list of lazy Lua objects. +pushSources :: LuaError e => Pusher e Sources +pushSources (Sources srcs) = do + pushList (pushUD typeSource) srcs + newListMetatable "pandoc Sources" $ do + pushName "__tostring" + pushHaskellFunction $ do + sources <- forcePeek $ peekList (peekUD typeSource) (nthBottom 1) + pushText . mconcat $ map snd sources + return 1 + rawset (nth 3) + setmetatable (nth 2) + +-- | Source object type. +typeSource :: LuaError e => DocumentedType e (SourcePos, Text) +typeSource = deftype "pandoc input source" + [ operation Tostring $ lambda + ### liftPure snd + <#> udparam typeSource "srcs" "Source to print in native format" + =#> functionResult pushText "string" "Haskell representation" + ] + [ readonly "name" "source name" + (pushString, sourceName . fst) + , readonly "text" "source text" + (pushText, snd) + ] diff --git a/src/Text/Pandoc/Lua/Marshaling.hs b/src/Text/Pandoc/Lua/Marshaling.hs deleted file mode 100644 index f517c7c27..000000000 --- a/src/Text/Pandoc/Lua/Marshaling.hs +++ /dev/null @@ -1,19 +0,0 @@ -{- | - Module : Text.Pandoc.Lua.Marshaling - Copyright : © 2012-2021 John MacFarlane - © 2017-2021 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha - -Lua marshaling (pushing) and unmarshaling (peeking) instances. --} -module Text.Pandoc.Lua.Marshaling () where - -import Text.Pandoc.Lua.Marshaling.AST () -import Text.Pandoc.Lua.Marshaling.CommonState () -import Text.Pandoc.Lua.Marshaling.Context () -import Text.Pandoc.Lua.Marshaling.PandocError() -import Text.Pandoc.Lua.Marshaling.ReaderOptions () -import Text.Pandoc.Lua.Marshaling.Version () diff --git a/src/Text/Pandoc/Lua/Marshaling/AST.hs b/src/Text/Pandoc/Lua/Marshaling/AST.hs deleted file mode 100644 index 8e12d232c..000000000 --- a/src/Text/Pandoc/Lua/Marshaling/AST.hs +++ /dev/null @@ -1,378 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE LambdaCase #-} -{- | - Module : Text.Pandoc.Lua.Marshaling.AST - Copyright : © 2012-2021 John MacFarlane - © 2017-2021 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha - -Marshaling/unmarshaling instances for document AST elements. --} -module Text.Pandoc.Lua.Marshaling.AST - ( LuaAttr (..) - , LuaListAttributes (..) - ) where - -import Control.Applicative ((<|>)) -import Control.Monad ((<$!>)) -import Foreign.Lua (Lua, Peekable, Pushable, StackIndex) -import Text.Pandoc.Definition -import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Lua.Util (defineHowTo, pushViaConstructor) -import Text.Pandoc.Lua.Marshaling.CommonState () - -import qualified Control.Monad.Catch as Catch -import qualified Foreign.Lua as Lua -import qualified Text.Pandoc.Lua.Util as LuaUtil - -instance Pushable Pandoc where - push (Pandoc meta blocks) = - pushViaConstructor "Pandoc" blocks meta - -instance Peekable Pandoc where - peek idx = defineHowTo "get Pandoc value" $! Pandoc - <$!> LuaUtil.rawField idx "meta" - <*> LuaUtil.rawField idx "blocks" - -instance Pushable Meta where - push (Meta mmap) = - pushViaConstructor "Meta" mmap -instance Peekable Meta where - peek idx = defineHowTo "get Meta value" $! - Meta <$!> Lua.peek idx - -instance Pushable MetaValue where - push = pushMetaValue -instance Peekable MetaValue where - peek = peekMetaValue - -instance Pushable Block where - push = pushBlock - -instance Peekable Block where - peek = peekBlock - --- Inline -instance Pushable Inline where - push = pushInline - -instance Peekable Inline where - peek = peekInline - --- Citation -instance Pushable Citation where - push (Citation cid prefix suffix mode noteNum hash) = - pushViaConstructor "Citation" cid mode prefix suffix noteNum hash - -instance Peekable Citation where - peek idx = Citation - <$!> LuaUtil.rawField idx "id" - <*> LuaUtil.rawField idx "prefix" - <*> LuaUtil.rawField idx "suffix" - <*> LuaUtil.rawField idx "mode" - <*> LuaUtil.rawField idx "note_num" - <*> LuaUtil.rawField idx "hash" - -instance Pushable Alignment where - push = Lua.push . show -instance Peekable Alignment where - peek = Lua.peekRead - -instance Pushable CitationMode where - push = Lua.push . show -instance Peekable CitationMode where - peek = Lua.peekRead - -instance Pushable Format where - push (Format f) = Lua.push f -instance Peekable Format where - peek idx = Format <$!> Lua.peek idx - -instance Pushable ListNumberDelim where - push = Lua.push . show -instance Peekable ListNumberDelim where - peek = Lua.peekRead - -instance Pushable ListNumberStyle where - push = Lua.push . show -instance Peekable ListNumberStyle where - peek = Lua.peekRead - -instance Pushable MathType where - push = Lua.push . show -instance Peekable MathType where - peek = Lua.peekRead - -instance Pushable QuoteType where - push = Lua.push . show -instance Peekable QuoteType where - peek = Lua.peekRead - --- | Push an meta value element to the top of the lua stack. -pushMetaValue :: MetaValue -> Lua () -pushMetaValue = \case - MetaBlocks blcks -> pushViaConstructor "MetaBlocks" blcks - MetaBool bool -> Lua.push bool - MetaInlines inlns -> pushViaConstructor "MetaInlines" inlns - MetaList metalist -> pushViaConstructor "MetaList" metalist - MetaMap metamap -> pushViaConstructor "MetaMap" metamap - MetaString str -> Lua.push str - --- | Interpret the value at the given stack index as meta value. -peekMetaValue :: StackIndex -> Lua MetaValue -peekMetaValue idx = defineHowTo "get MetaValue" $ do - -- Get the contents of an AST element. - let elementContent :: Peekable a => Lua a - elementContent = Lua.peek idx - luatype <- Lua.ltype idx - case luatype of - Lua.TypeBoolean -> MetaBool <$!> Lua.peek idx - Lua.TypeString -> MetaString <$!> Lua.peek idx - Lua.TypeTable -> do - tag <- try $ LuaUtil.getTag idx - case tag of - Right "MetaBlocks" -> MetaBlocks <$!> elementContent - Right "MetaBool" -> MetaBool <$!> elementContent - Right "MetaMap" -> MetaMap <$!> elementContent - Right "MetaInlines" -> MetaInlines <$!> elementContent - Right "MetaList" -> MetaList <$!> elementContent - Right "MetaString" -> MetaString <$!> elementContent - Right t -> Lua.throwMessage ("Unknown meta tag: " <> t) - Left _ -> do - -- no meta value tag given, try to guess. - len <- Lua.rawlen idx - if len <= 0 - then MetaMap <$!> Lua.peek idx - else (MetaInlines <$!> Lua.peek idx) - <|> (MetaBlocks <$!> Lua.peek idx) - <|> (MetaList <$!> Lua.peek idx) - _ -> Lua.throwMessage "could not get meta value" - --- | Push a block element to the top of the Lua stack. -pushBlock :: Block -> Lua () -pushBlock = \case - BlockQuote blcks -> pushViaConstructor "BlockQuote" blcks - BulletList items -> pushViaConstructor "BulletList" items - CodeBlock attr code -> pushViaConstructor "CodeBlock" code (LuaAttr attr) - DefinitionList items -> pushViaConstructor "DefinitionList" items - Div attr blcks -> pushViaConstructor "Div" blcks (LuaAttr attr) - Header lvl attr inlns -> pushViaConstructor "Header" lvl inlns (LuaAttr attr) - HorizontalRule -> pushViaConstructor "HorizontalRule" - LineBlock blcks -> pushViaConstructor "LineBlock" blcks - OrderedList lstAttr list -> pushViaConstructor "OrderedList" list - (LuaListAttributes lstAttr) - Null -> pushViaConstructor "Null" - Para blcks -> pushViaConstructor "Para" blcks - Plain blcks -> pushViaConstructor "Plain" blcks - RawBlock f cs -> pushViaConstructor "RawBlock" f cs - Table attr blkCapt specs thead tbody tfoot -> - pushViaConstructor "Table" blkCapt specs thead tbody tfoot attr - --- | Return the value at the given index as block if possible. -peekBlock :: StackIndex -> Lua Block -peekBlock idx = defineHowTo "get Block value" $! do - tag <- LuaUtil.getTag idx - case tag of - "BlockQuote" -> BlockQuote <$!> elementContent - "BulletList" -> BulletList <$!> elementContent - "CodeBlock" -> withAttr CodeBlock <$!> elementContent - "DefinitionList" -> DefinitionList <$!> elementContent - "Div" -> withAttr Div <$!> elementContent - "Header" -> (\(lvl, LuaAttr attr, lst) -> Header lvl attr lst) - <$!> elementContent - "HorizontalRule" -> return HorizontalRule - "LineBlock" -> LineBlock <$!> elementContent - "OrderedList" -> (\(LuaListAttributes lstAttr, lst) -> - OrderedList lstAttr lst) - <$!> elementContent - "Null" -> return Null - "Para" -> Para <$!> elementContent - "Plain" -> Plain <$!> elementContent - "RawBlock" -> uncurry RawBlock <$!> elementContent - "Table" -> (\(attr, capt, colSpecs, thead, tbodies, tfoot) -> - Table (fromLuaAttr attr) - capt - colSpecs - thead - tbodies - tfoot) - <$!> elementContent - _ -> Lua.throwMessage ("Unknown block type: " <> tag) - where - -- Get the contents of an AST element. - elementContent :: Peekable a => Lua a - elementContent = LuaUtil.rawField idx "c" - -instance Pushable Caption where - push = pushCaption - -instance Peekable Caption where - peek = peekCaption - --- | Push Caption element -pushCaption :: Caption -> Lua () -pushCaption (Caption shortCaption longCaption) = do - Lua.newtable - LuaUtil.addField "short" (Lua.Optional shortCaption) - LuaUtil.addField "long" longCaption - --- | Peek Caption element -peekCaption :: StackIndex -> Lua Caption -peekCaption idx = Caption - <$!> (Lua.fromOptional <$!> LuaUtil.rawField idx "short") - <*> LuaUtil.rawField idx "long" - -instance Peekable ColWidth where - peek idx = do - width <- Lua.fromOptional <$!> Lua.peek idx - return $! maybe ColWidthDefault ColWidth width - -instance Pushable ColWidth where - push = \case - (ColWidth w) -> Lua.push w - ColWidthDefault -> Lua.pushnil - -instance Pushable Row where - push (Row attr cells) = Lua.push (attr, cells) - -instance Peekable Row where - peek = fmap (uncurry Row) . Lua.peek - -instance Pushable TableBody where - push (TableBody attr (RowHeadColumns rowHeadColumns) head' body) = do - Lua.newtable - LuaUtil.addField "attr" attr - LuaUtil.addField "row_head_columns" rowHeadColumns - LuaUtil.addField "head" head' - LuaUtil.addField "body" body - -instance Peekable TableBody where - peek idx = TableBody - <$!> LuaUtil.rawField idx "attr" - <*> (RowHeadColumns <$!> LuaUtil.rawField idx "row_head_columns") - <*> LuaUtil.rawField idx "head" - <*> LuaUtil.rawField idx "body" - -instance Pushable TableHead where - push (TableHead attr rows) = Lua.push (attr, rows) - -instance Peekable TableHead where - peek = fmap (uncurry TableHead) . Lua.peek - -instance Pushable TableFoot where - push (TableFoot attr cells) = Lua.push (attr, cells) - -instance Peekable TableFoot where - peek = fmap (uncurry TableFoot) . Lua.peek - -instance Pushable Cell where - push = pushCell - -instance Peekable Cell where - peek = peekCell - -pushCell :: Cell -> Lua () -pushCell (Cell attr align (RowSpan rowSpan) (ColSpan colSpan) contents) = do - Lua.newtable - LuaUtil.addField "attr" attr - LuaUtil.addField "alignment" align - LuaUtil.addField "row_span" rowSpan - LuaUtil.addField "col_span" colSpan - LuaUtil.addField "contents" contents - -peekCell :: StackIndex -> Lua Cell -peekCell idx = Cell - <$!> (fromLuaAttr <$!> LuaUtil.rawField idx "attr") - <*> LuaUtil.rawField idx "alignment" - <*> (RowSpan <$!> LuaUtil.rawField idx "row_span") - <*> (ColSpan <$!> LuaUtil.rawField idx "col_span") - <*> LuaUtil.rawField idx "contents" - --- | Push an inline element to the top of the lua stack. -pushInline :: Inline -> Lua () -pushInline = \case - Cite citations lst -> pushViaConstructor "Cite" lst citations - Code attr lst -> pushViaConstructor "Code" lst (LuaAttr attr) - Emph inlns -> pushViaConstructor "Emph" inlns - Underline inlns -> pushViaConstructor "Underline" inlns - Image attr alt (src,tit) -> pushViaConstructor "Image" alt src tit (LuaAttr attr) - LineBreak -> pushViaConstructor "LineBreak" - Link attr lst (src,tit) -> pushViaConstructor "Link" lst src tit (LuaAttr attr) - Note blcks -> pushViaConstructor "Note" blcks - Math mty str -> pushViaConstructor "Math" mty str - Quoted qt inlns -> pushViaConstructor "Quoted" qt inlns - RawInline f cs -> pushViaConstructor "RawInline" f cs - SmallCaps inlns -> pushViaConstructor "SmallCaps" inlns - SoftBreak -> pushViaConstructor "SoftBreak" - Space -> pushViaConstructor "Space" - Span attr inlns -> pushViaConstructor "Span" inlns (LuaAttr attr) - Str str -> pushViaConstructor "Str" str - Strikeout inlns -> pushViaConstructor "Strikeout" inlns - Strong inlns -> pushViaConstructor "Strong" inlns - Subscript inlns -> pushViaConstructor "Subscript" inlns - Superscript inlns -> pushViaConstructor "Superscript" inlns - --- | Return the value at the given index as inline if possible. -peekInline :: StackIndex -> Lua Inline -peekInline idx = defineHowTo "get Inline value" $ do - tag <- LuaUtil.getTag idx - case tag of - "Cite" -> uncurry Cite <$!> elementContent - "Code" -> withAttr Code <$!> elementContent - "Emph" -> Emph <$!> elementContent - "Underline" -> Underline <$!> elementContent - "Image" -> (\(LuaAttr !attr, !lst, !tgt) -> Image attr lst tgt) - <$!> elementContent - "Link" -> (\(LuaAttr !attr, !lst, !tgt) -> Link attr lst tgt) - <$!> elementContent - "LineBreak" -> return LineBreak - "Note" -> Note <$!> elementContent - "Math" -> uncurry Math <$!> elementContent - "Quoted" -> uncurry Quoted <$!> elementContent - "RawInline" -> uncurry RawInline <$!> elementContent - "SmallCaps" -> SmallCaps <$!> elementContent - "SoftBreak" -> return SoftBreak - "Space" -> return Space - "Span" -> withAttr Span <$!> elementContent - -- strict to Lua string is copied before gc - "Str" -> Str <$!> elementContent - "Strikeout" -> Strikeout <$!> elementContent - "Strong" -> Strong <$!> elementContent - "Subscript" -> Subscript <$!> elementContent - "Superscript"-> Superscript <$!> elementContent - _ -> Lua.throwMessage ("Unknown inline type: " <> tag) - where - -- Get the contents of an AST element. - elementContent :: Peekable a => Lua a - elementContent = LuaUtil.rawField idx "c" - -try :: Lua a -> Lua (Either PandocError a) -try = Catch.try - -withAttr :: (Attr -> a -> b) -> (LuaAttr, a) -> b -withAttr f (attributes, x) = f (fromLuaAttr attributes) x - --- | Wrapper for Attr -newtype LuaAttr = LuaAttr { fromLuaAttr :: Attr } - -instance Pushable LuaAttr where - push (LuaAttr (id', classes, kv)) = - pushViaConstructor "Attr" id' classes kv - -instance Peekable LuaAttr where - peek idx = defineHowTo "get Attr value" $! (LuaAttr <$!> Lua.peek idx) - --- | Wrapper for ListAttributes -newtype LuaListAttributes = LuaListAttributes ListAttributes - -instance Pushable LuaListAttributes where - push (LuaListAttributes (start, style, delimiter)) = - pushViaConstructor "ListAttributes" start style delimiter - -instance Peekable LuaListAttributes where - peek = defineHowTo "get ListAttributes value" . - fmap LuaListAttributes . Lua.peek diff --git a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs deleted file mode 100644 index 82e26b963..000000000 --- a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs +++ /dev/null @@ -1,24 +0,0 @@ -{- | - Module : Text.Pandoc.Lua.Marshaling.AnyValue - Copyright : © 2017-2021 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha - -Helper type to work with raw Lua stack indices instead of unmarshaled -values. - -TODO: Most of this module should be abstracted, factored out, and go -into HsLua. --} -module Text.Pandoc.Lua.Marshaling.AnyValue (AnyValue (..)) where - -import Foreign.Lua (Peekable (peek), StackIndex) - --- | Dummy type to allow values of arbitrary Lua type. This just wraps --- stack indices, using it requires extra care. -newtype AnyValue = AnyValue StackIndex - -instance Peekable AnyValue where - peek = return . AnyValue diff --git a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs deleted file mode 100644 index 147197c5d..000000000 --- a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs +++ /dev/null @@ -1,102 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.Lua.Marshaling.CommonState - Copyright : © 2012-2021 John MacFarlane - © 2017-2021 Albert Krewinkel - License : GNU GPL, version 2 or above - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha - -Instances to marshal (push) and unmarshal (peek) the common state. --} -module Text.Pandoc.Lua.Marshaling.CommonState () where - -import Foreign.Lua (Lua, Peekable, Pushable) -import Foreign.Lua.Types.Peekable (reportValueOnFailure) -import Foreign.Lua.Userdata (ensureUserdataMetatable, pushAnyWithMetatable, - toAnyWithName) -import Text.Pandoc.Class (CommonState (..)) -import Text.Pandoc.Logging (LogMessage, showLogMessage) -import Text.Pandoc.Lua.Marshaling.AnyValue (AnyValue (..)) - -import qualified Data.Map as Map -import qualified Data.Text as Text -import qualified Foreign.Lua as Lua -import qualified Text.Pandoc.Lua.Util as LuaUtil - --- | Name used by Lua for the @CommonState@ type. -commonStateTypeName :: String -commonStateTypeName = "Pandoc CommonState" - -instance Peekable CommonState where - peek idx = reportValueOnFailure commonStateTypeName - (`toAnyWithName` commonStateTypeName) idx - -instance Pushable CommonState where - push st = pushAnyWithMetatable pushCommonStateMetatable st - where - pushCommonStateMetatable = ensureUserdataMetatable commonStateTypeName $ do - LuaUtil.addFunction "__index" indexCommonState - LuaUtil.addFunction "__pairs" pairsCommonState - -indexCommonState :: CommonState -> AnyValue -> Lua Lua.NumResults -indexCommonState st (AnyValue idx) = Lua.ltype idx >>= \case - Lua.TypeString -> 1 <$ (Lua.peek idx >>= pushField) - _ -> 1 <$ Lua.pushnil - where - pushField :: Text.Text -> Lua () - pushField name = case lookup name commonStateFields of - Just pushValue -> pushValue st - Nothing -> Lua.pushnil - -pairsCommonState :: CommonState -> Lua Lua.NumResults -pairsCommonState st = do - Lua.pushHaskellFunction nextFn - Lua.pushnil - Lua.pushnil - return 3 - where - nextFn :: AnyValue -> AnyValue -> Lua Lua.NumResults - nextFn _ (AnyValue idx) = - Lua.ltype idx >>= \case - Lua.TypeNil -> case commonStateFields of - [] -> 2 <$ (Lua.pushnil *> Lua.pushnil) - (key, pushValue):_ -> 2 <$ (Lua.push key *> pushValue st) - Lua.TypeString -> do - key <- Lua.peek idx - case tail $ dropWhile ((/= key) . fst) commonStateFields of - [] -> 2 <$ (Lua.pushnil *> Lua.pushnil) - (nextKey, pushValue):_ -> 2 <$ (Lua.push nextKey *> pushValue st) - _ -> 2 <$ (Lua.pushnil *> Lua.pushnil) - -commonStateFields :: [(Text.Text, CommonState -> Lua ())] -commonStateFields = - [ ("input_files", Lua.push . stInputFiles) - , ("output_file", Lua.push . Lua.Optional . stOutputFile) - , ("log", Lua.push . stLog) - , ("request_headers", Lua.push . Map.fromList . stRequestHeaders) - , ("resource_path", Lua.push . stResourcePath) - , ("source_url", Lua.push . Lua.Optional . stSourceURL) - , ("user_data_dir", Lua.push . Lua.Optional . stUserDataDir) - , ("trace", Lua.push . stTrace) - , ("verbosity", Lua.push . show . stVerbosity) - ] - --- | Name used by Lua for the @CommonState@ type. -logMessageTypeName :: String -logMessageTypeName = "Pandoc LogMessage" - -instance Peekable LogMessage where - peek idx = reportValueOnFailure logMessageTypeName - (`toAnyWithName` logMessageTypeName) idx - -instance Pushable LogMessage where - push msg = pushAnyWithMetatable pushLogMessageMetatable msg - where - pushLogMessageMetatable = ensureUserdataMetatable logMessageTypeName $ - LuaUtil.addFunction "__tostring" tostringLogMessage - -tostringLogMessage :: LogMessage -> Lua Text.Text -tostringLogMessage = return . showLogMessage diff --git a/src/Text/Pandoc/Lua/Marshaling/List.hs b/src/Text/Pandoc/Lua/Marshaling/List.hs deleted file mode 100644 index 0446302a1..000000000 --- a/src/Text/Pandoc/Lua/Marshaling/List.hs +++ /dev/null @@ -1,43 +0,0 @@ -{-# LANGUAGE DeriveDataTypeable #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE UndecidableInstances #-} -{- | -Module : Text.Pandoc.Lua.Marshaling.List -Copyright : © 2012-2021 John MacFarlane - © 2017-2021 Albert Krewinkel -License : GNU GPL, version 2 or above -Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> -Stability : alpha - -Marshaling/unmarshaling instances for @pandoc.List@s. --} -module Text.Pandoc.Lua.Marshaling.List - ( List (..) - ) where - -import Data.Data (Data) -import Foreign.Lua (Peekable, Pushable) -import Text.Pandoc.Walk (Walkable (..)) -import Text.Pandoc.Lua.Util (defineHowTo, pushViaConstructor) - -import qualified Foreign.Lua as Lua - --- | List wrapper which is marshalled as @pandoc.List@. -newtype List a = List { fromList :: [a] } - deriving (Data, Eq, Show) - -instance Pushable a => Pushable (List a) where - push (List xs) = - pushViaConstructor "List" xs - -instance Peekable a => Peekable (List a) where - peek idx = defineHowTo "get List" $ do - xs <- Lua.peek idx - return $ List xs - --- List is just a wrapper, so we can reuse the walk instance for --- unwrapped Hasekll lists. -instance Walkable [a] b => Walkable (List a) b where - walkM f = walkM (fmap fromList . f . List) - query f = query (f . List) diff --git a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs deleted file mode 100644 index 70bd010a0..000000000 --- a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs +++ /dev/null @@ -1,73 +0,0 @@ -{- | - Module : Text.Pandoc.Lua.Marshaling.MediaBag - Copyright : © 2012-2021 John MacFarlane - © 2017-2021 Albert Krewinkel - License : GNU GPL, version 2 or above - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha - -Instances to marshal (push) and unmarshal (peek) media data. --} -module Text.Pandoc.Lua.Marshaling.MediaBag (pushIterator) where - -import Foreign.Ptr (Ptr) -import Foreign.StablePtr (StablePtr, deRefStablePtr, newStablePtr) -import Foreign.Lua (Lua, NumResults, Peekable, Pushable, StackIndex) -import Foreign.Lua.Types.Peekable (reportValueOnFailure) -import Foreign.Lua.Userdata (ensureUserdataMetatable, pushAnyWithMetatable, - toAnyWithName) -import Text.Pandoc.MediaBag (MediaBag, mediaItems) -import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.Lua.Marshaling.AnyValue (AnyValue (..)) - -import qualified Data.ByteString.Lazy as BL -import qualified Foreign.Lua as Lua -import qualified Foreign.Storable as Storable - --- | A list of 'MediaBag' items. -newtype MediaItems = MediaItems [(String, MimeType, BL.ByteString)] - -instance Pushable MediaItems where - push = pushMediaItems - -instance Peekable MediaItems where - peek = peekMediaItems - --- | Push an iterator triple to be used with Lua's @for@ loop construct. --- Each iterator invocation returns a triple containing the item's --- filename, MIME type, and content. -pushIterator :: MediaBag -> Lua NumResults -pushIterator mb = do - Lua.pushHaskellFunction nextItem - Lua.push (MediaItems $ mediaItems mb) - Lua.pushnil - return 3 - --- | Lua type name for @'MediaItems'@. -mediaItemsTypeName :: String -mediaItemsTypeName = "pandoc MediaItems" - --- | Push a @MediaItems@ element to the stack. -pushMediaItems :: MediaItems -> Lua () -pushMediaItems xs = pushAnyWithMetatable pushMT xs - where - pushMT = ensureUserdataMetatable mediaItemsTypeName (return ()) - --- | Retrieve a @MediaItems@ element from the stack. -peekMediaItems :: StackIndex -> Lua MediaItems -peekMediaItems = reportValueOnFailure mediaItemsTypeName - (`toAnyWithName` mediaItemsTypeName) - --- | Retrieve a list of items from an iterator state, return the first --- item (if present), and advance the state. -nextItem :: Ptr (StablePtr MediaItems) -> AnyValue -> Lua NumResults -nextItem ptr _ = do - (MediaItems items) <- Lua.liftIO $ deRefStablePtr =<< Storable.peek ptr - case items of - [] -> 2 <$ (Lua.pushnil *> Lua.pushnil) - (key, mt, content):xs -> do - Lua.liftIO $ Storable.poke ptr =<< newStablePtr (MediaItems xs) - Lua.push key - Lua.push mt - Lua.push content - return 3 diff --git a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs deleted file mode 100644 index f698704e0..000000000 --- a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs +++ /dev/null @@ -1,65 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ScopedTypeVariables #-} -{- | - Module : Text.Pandoc.Lua.Marshaling.PandocError - Copyright : © 2020-2021 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha - -Marshaling of @'PandocError'@ values. --} -module Text.Pandoc.Lua.Marshaling.PandocError - ( peekPandocError - , pushPandocError - ) - where - -import Foreign.Lua (Lua, Peekable, Pushable, StackIndex) -import Text.Pandoc.Error (PandocError (PandocLuaError)) - -import qualified Foreign.Lua as Lua -import qualified Foreign.Lua.Userdata as Lua -import qualified Text.Pandoc.Lua.Util as LuaUtil -import qualified Text.Pandoc.UTF8 as UTF8 - --- | Userdata name used by Lua for the @PandocError@ type. -pandocErrorName :: String -pandocErrorName = "pandoc error" - --- | Peek a @'PandocError'@ element to the Lua stack. -pushPandocError :: PandocError -> Lua () -pushPandocError = Lua.pushAnyWithMetatable pushPandocErrorMT - where - pushPandocErrorMT = Lua.ensureUserdataMetatable pandocErrorName $ - LuaUtil.addFunction "__tostring" __tostring - --- | Retrieve a @'PandocError'@ from the Lua stack. -peekPandocError :: StackIndex -> Lua PandocError -peekPandocError idx = Lua.ltype idx >>= \case - Lua.TypeUserdata -> do - errMb <- Lua.toAnyWithName idx pandocErrorName - return $ case errMb of - Just err -> err - Nothing -> PandocLuaError "could not retrieve original error" - _ -> do - Lua.pushvalue idx - msg <- Lua.state >>= \l -> Lua.liftIO (Lua.errorMessage l) - return $ PandocLuaError (UTF8.toText msg) - --- | Convert to string. -__tostring :: PandocError -> Lua String -__tostring = return . show - --- --- Instances --- - -instance Pushable PandocError where - push = pushPandocError - -instance Peekable PandocError where - peek = peekPandocError diff --git a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs deleted file mode 100644 index dd7bf2e61..000000000 --- a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs +++ /dev/null @@ -1,79 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# OPTIONS_GHC -fno-warn-orphans #-} -{- | - Module : Text.Pandoc.Lua.Marshaling.ReaderOptions - Copyright : © 2012-2021 John MacFarlane - © 2017-2021 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha - -Marshaling instance for ReaderOptions and its components. --} -module Text.Pandoc.Lua.Marshaling.ReaderOptions () where - -import Data.Data (showConstr, toConstr) -import Foreign.Lua (Lua, Pushable) -import Text.Pandoc.Extensions (Extensions) -import Text.Pandoc.Lua.Marshaling.AnyValue (AnyValue (..)) -import Text.Pandoc.Lua.Marshaling.CommonState () -import Text.Pandoc.Options (ReaderOptions (..), TrackChanges) - -import qualified Data.Set as Set -import qualified Data.Text as Text -import qualified Foreign.Lua as Lua -import qualified Text.Pandoc.Lua.Util as LuaUtil - --- --- Reader Options --- -instance Pushable Extensions where - push exts = Lua.push (show exts) - -instance Pushable TrackChanges where - push = Lua.push . showConstr . toConstr - -instance Pushable ReaderOptions where - push ro = do - let ReaderOptions - (extensions :: Extensions) - (standalone :: Bool) - (columns :: Int) - (tabStop :: Int) - (indentedCodeClasses :: [Text.Text]) - (abbreviations :: Set.Set Text.Text) - (defaultImageExtension :: Text.Text) - (trackChanges :: TrackChanges) - (stripComments :: Bool) - = ro - Lua.newtable - LuaUtil.addField "extensions" extensions - LuaUtil.addField "standalone" standalone - LuaUtil.addField "columns" columns - LuaUtil.addField "tab_stop" tabStop - LuaUtil.addField "indented_code_classes" indentedCodeClasses - LuaUtil.addField "abbreviations" abbreviations - LuaUtil.addField "default_image_extension" defaultImageExtension - LuaUtil.addField "track_changes" trackChanges - LuaUtil.addField "strip_comments" stripComments - - -- add metatable - let indexReaderOptions :: AnyValue -> AnyValue -> Lua Lua.NumResults - indexReaderOptions _tbl (AnyValue key) = do - Lua.ltype key >>= \case - Lua.TypeString -> Lua.peek key >>= \case - ("defaultImageExtension" :: Text.Text) - -> Lua.push defaultImageExtension - "indentedCodeClasses" -> Lua.push indentedCodeClasses - "stripComments" -> Lua.push stripComments - "tabStop" -> Lua.push tabStop - "trackChanges" -> Lua.push trackChanges - _ -> Lua.pushnil - _ -> Lua.pushnil - return 1 - Lua.newtable - LuaUtil.addFunction "__index" indexReaderOptions - Lua.setmetatable (Lua.nthFromTop 2) diff --git a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs deleted file mode 100644 index 6d43039fa..000000000 --- a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs +++ /dev/null @@ -1,59 +0,0 @@ -{- | - Module : Text.Pandoc.Lua.Marshaling.SimpleTable - Copyright : © 2020-2021 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha - -Definition and marshaling of the 'SimpleTable' data type used as a -convenience type when dealing with tables. --} -module Text.Pandoc.Lua.Marshaling.SimpleTable - ( SimpleTable (..) - , peekSimpleTable - , pushSimpleTable - ) - where - -import Foreign.Lua (Lua, Peekable, Pushable, StackIndex) -import Text.Pandoc.Definition -import Text.Pandoc.Lua.Util (defineHowTo, pushViaConstructor, rawField) -import Text.Pandoc.Lua.Marshaling.AST () - -import qualified Foreign.Lua as Lua - --- | A simple (legacy-style) table. -data SimpleTable = SimpleTable - { simpleTableCaption :: [Inline] - , simpleTableAlignments :: [Alignment] - , simpleTableColumnWidths :: [Double] - , simpleTableHeader :: [[Block]] - , simpleTableBody :: [[[Block]]] - } - -instance Pushable SimpleTable where - push = pushSimpleTable - -instance Peekable SimpleTable where - peek = peekSimpleTable - --- | Push a simple table to the stack by calling the --- @pandoc.SimpleTable@ constructor. -pushSimpleTable :: SimpleTable -> Lua () -pushSimpleTable tbl = pushViaConstructor "SimpleTable" - (simpleTableCaption tbl) - (simpleTableAlignments tbl) - (simpleTableColumnWidths tbl) - (simpleTableHeader tbl) - (simpleTableBody tbl) - --- | Retrieve a simple table from the stack. -peekSimpleTable :: StackIndex -> Lua SimpleTable -peekSimpleTable idx = defineHowTo "get SimpleTable" $ - SimpleTable - <$> rawField idx "caption" - <*> rawField idx "aligns" - <*> rawField idx "widths" - <*> rawField idx "headers" - <*> rawField idx "rows" diff --git a/src/Text/Pandoc/Lua/Marshaling/Version.hs b/src/Text/Pandoc/Lua/Marshaling/Version.hs deleted file mode 100644 index 4f4ffac51..000000000 --- a/src/Text/Pandoc/Lua/Marshaling/Version.hs +++ /dev/null @@ -1,154 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ScopedTypeVariables #-} -{- | - Module : Text.Pandoc.Lua.Marshaling.Version - Copyright : © 2019-2021 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha - -Marshaling of @'Version'@s. The marshaled elements can be compared using -default comparison operators (like @>@ and @<=@). --} -module Text.Pandoc.Lua.Marshaling.Version - ( peekVersion - , pushVersion - ) - where - -import Data.Text (Text) -import Data.Maybe (fromMaybe) -import Data.Version (Version (..), makeVersion, parseVersion, showVersion) -import Foreign.Lua (Lua, Optional (..), NumResults, - Peekable, Pushable, StackIndex) -import Foreign.Lua.Types.Peekable (reportValueOnFailure) -import Foreign.Lua.Userdata (ensureUserdataMetatable, pushAnyWithMetatable, - toAnyWithName) -import Safe (atMay, lastMay) -import Text.Pandoc.Lua.Marshaling.AnyValue (AnyValue (..)) -import Text.ParserCombinators.ReadP (readP_to_S) - -import qualified Foreign.Lua as Lua -import qualified Text.Pandoc.Lua.Util as LuaUtil - --- | Push a @'Version'@ element to the Lua stack. -pushVersion :: Version -> Lua () -pushVersion version = pushAnyWithMetatable pushVersionMT version - where - pushVersionMT = ensureUserdataMetatable versionTypeName $ do - LuaUtil.addFunction "__eq" __eq - LuaUtil.addFunction "__le" __le - LuaUtil.addFunction "__lt" __lt - LuaUtil.addFunction "__len" __len - LuaUtil.addFunction "__index" __index - LuaUtil.addFunction "__pairs" __pairs - LuaUtil.addFunction "__tostring" __tostring - -instance Pushable Version where - push = pushVersion - -peekVersion :: StackIndex -> Lua Version -peekVersion idx = Lua.ltype idx >>= \case - Lua.TypeString -> do - versionStr <- Lua.peek idx - let parses = readP_to_S parseVersion versionStr - case lastMay parses of - Just (v, "") -> return v - _ -> Lua.throwMessage $ "could not parse as Version: " ++ versionStr - - Lua.TypeUserdata -> - reportValueOnFailure versionTypeName - (`toAnyWithName` versionTypeName) - idx - Lua.TypeNumber -> do - n <- Lua.peek idx - return (makeVersion [n]) - - Lua.TypeTable -> - makeVersion <$> Lua.peek idx - - _ -> - Lua.throwMessage "could not peek Version" - -instance Peekable Version where - peek = peekVersion - --- | Name used by Lua for the @CommonState@ type. -versionTypeName :: String -versionTypeName = "HsLua Version" - -__eq :: Version -> Version -> Lua Bool -__eq v1 v2 = return (v1 == v2) - -__le :: Version -> Version -> Lua Bool -__le v1 v2 = return (v1 <= v2) - -__lt :: Version -> Version -> Lua Bool -__lt v1 v2 = return (v1 < v2) - --- | Get number of version components. -__len :: Version -> Lua Int -__len = return . length . versionBranch - --- | Access fields. -__index :: Version -> AnyValue -> Lua NumResults -__index v (AnyValue k) = do - ty <- Lua.ltype k - case ty of - Lua.TypeNumber -> do - n <- Lua.peek k - let versionPart = atMay (versionBranch v) (n - 1) - Lua.push (Lua.Optional versionPart) - return 1 - Lua.TypeString -> do - (str :: Text) <- Lua.peek k - if str == "must_be_at_least" - then 1 <$ Lua.pushHaskellFunction must_be_at_least - else 1 <$ Lua.pushnil - _ -> 1 <$ Lua.pushnil - --- | Create iterator. -__pairs :: Version -> Lua NumResults -__pairs v = do - Lua.pushHaskellFunction nextFn - Lua.pushnil - Lua.pushnil - return 3 - where - nextFn :: AnyValue -> Optional Int -> Lua Lua.NumResults - nextFn _ (Optional key) = - case key of - Nothing -> case versionBranch v of - [] -> 2 <$ (Lua.pushnil *> Lua.pushnil) - n:_ -> 2 <$ (Lua.push (1 :: Int) *> Lua.push n) - Just n -> case atMay (versionBranch v) n of - Nothing -> 2 <$ (Lua.pushnil *> Lua.pushnil) - Just b -> 2 <$ (Lua.push (n + 1) *> Lua.push b) - --- | Convert to string. -__tostring :: Version -> Lua String -__tostring v = return (showVersion v) - --- | Default error message when a version is too old. This message is --- formatted in Lua with the expected and actual versions as arguments. -versionTooOldMessage :: String -versionTooOldMessage = "expected version %s or newer, got %s" - --- | Throw an error if this version is older than the given version. --- FIXME: This function currently requires the string library to be --- loaded. -must_be_at_least :: Version -> Version -> Optional String -> Lua NumResults -must_be_at_least actual expected optMsg = do - let msg = fromMaybe versionTooOldMessage (fromOptional optMsg) - if expected <= actual - then return 0 - else do - Lua.getglobal' "string.format" - Lua.push msg - Lua.push (showVersion expected) - Lua.push (showVersion actual) - Lua.call 3 1 - Lua.error diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index 3eed50fca..fb055101e 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -1,103 +1,126 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Module.MediaBag Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - Stability : alpha -The lua module @pandoc.mediabag@. +The Lua module @pandoc.mediabag@. -} module Text.Pandoc.Lua.Module.MediaBag - ( pushModule + ( documentedModule ) where import Prelude hiding (lookup) -import Control.Monad (zipWithM_) -import Foreign.Lua (Lua, NumResults, Optional) +import Data.Maybe (fromMaybe) +import HsLua ( LuaE, DocumentedFunction, Module (..) + , (<#>), (###), (=#>), (=?>), defun, functionResult + , optionalParameter , parameter) import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Class.PandocMonad (fetchItem, getMediaBag, modifyCommonState, setMediaBag) -import Text.Pandoc.Lua.Marshaling () -import Text.Pandoc.Lua.Marshaling.MediaBag (pushIterator) -import Text.Pandoc.Lua.PandocLua (PandocLua (..), liftPandocLua, addFunction) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.Marshal.List (pushPandocList) +import Text.Pandoc.Lua.Orphans () +import Text.Pandoc.Lua.PandocLua (unPandocLua) import Text.Pandoc.MIME (MimeType) import qualified Data.ByteString.Lazy as BL -import qualified Data.Text as T -import qualified Foreign.Lua as Lua +import qualified HsLua as Lua import qualified Text.Pandoc.MediaBag as MB -- -- MediaBag submodule -- -pushModule :: PandocLua NumResults -pushModule = do - liftPandocLua Lua.newtable - addFunction "delete" delete - addFunction "empty" empty - addFunction "insert" insert - addFunction "items" items - addFunction "lookup" lookup - addFunction "list" list - addFunction "fetch" fetch - return 1 +documentedModule :: Module PandocError +documentedModule = Module + { moduleName = "pandoc.mediabag" + , moduleDescription = "mediabag access" + , moduleFields = [] + , moduleFunctions = + [ delete + , empty + , fetch + , insert + , items + , list + , lookup + ] + , moduleOperations = [] + } -- | Delete a single item from the media bag. -delete :: FilePath -> PandocLua NumResults -delete fp = 0 <$ modifyCommonState - (\st -> st { stMediaBag = MB.deleteMedia fp (stMediaBag st) }) +delete :: DocumentedFunction PandocError +delete = defun "delete" + ### (\fp -> unPandocLua $ modifyCommonState + (\st -> st { stMediaBag = MB.deleteMedia fp (stMediaBag st) })) + <#> parameter Lua.peekString "string" "filepath" "filename of item to delete" + =#> [] + -- | Delete all items from the media bag. -empty :: PandocLua NumResults -empty = 0 <$ modifyCommonState (\st -> st { stMediaBag = mempty }) +empty :: DocumentedFunction PandocError +empty = defun "empty" + ### unPandocLua (modifyCommonState (\st -> st { stMediaBag = mempty })) + =#> [] -- | Insert a new item into the media bag. -insert :: FilePath - -> Optional MimeType - -> BL.ByteString - -> PandocLua NumResults -insert fp optionalMime contents = do - mb <- getMediaBag - setMediaBag $ MB.insertMedia fp (Lua.fromOptional optionalMime) contents mb - return (Lua.NumResults 0) +insert :: DocumentedFunction PandocError +insert = defun "insert" + ### (\fp mmime contents -> unPandocLua $ do + mb <- getMediaBag + setMediaBag $ MB.insertMedia fp mmime contents mb + return (Lua.NumResults 0)) + <#> parameter Lua.peekString "string" "filepath" "item file path" + <#> optionalParameter Lua.peekText "string" "mimetype" "the item's MIME type" + <#> parameter Lua.peekLazyByteString "string" "contents" "binary contents" + =?> "Nothing" -- | Returns iterator values to be used with a Lua @for@ loop. -items :: PandocLua NumResults -items = getMediaBag >>= liftPandocLua . pushIterator +items :: DocumentedFunction PandocError +items = defun "items" + ### (do + mb <-unPandocLua getMediaBag + let pushItem (fp, mimetype, contents) = do + Lua.pushString fp + Lua.pushText mimetype + Lua.pushByteString $ BL.toStrict contents + return (Lua.NumResults 3) + Lua.pushIterator pushItem (MB.mediaItems mb)) + =?> "Iterator triple" -lookup :: FilePath - -> PandocLua NumResults -lookup fp = do - res <- MB.lookupMedia fp <$> getMediaBag - liftPandocLua $ case res of - Nothing -> 1 <$ Lua.pushnil - Just item -> do - Lua.push $ MB.mediaMimeType item - Lua.push $ MB.mediaContents item - return 2 +-- | Function to lookup a value in the mediabag. +lookup :: DocumentedFunction PandocError +lookup = defun "lookup" + ### (\fp -> unPandocLua (MB.lookupMedia fp <$> getMediaBag) >>= \case + Nothing -> 1 <$ Lua.pushnil + Just item -> 2 <$ do + Lua.pushText $ MB.mediaMimeType item + Lua.pushLazyByteString $ MB.mediaContents item) + <#> parameter Lua.peekString "string" "filepath" "path of item to lookup" + =?> "MIME type and contents" -list :: PandocLua NumResults -list = do - dirContents <- MB.mediaDirectory <$> getMediaBag - liftPandocLua $ do - Lua.newtable - zipWithM_ addEntry [1..] dirContents - return 1 +-- | Function listing all mediabag items. +list :: DocumentedFunction PandocError +list = defun "list" + ### (unPandocLua (MB.mediaDirectory <$> getMediaBag)) + =#> functionResult (pushPandocList pushEntry) "table" "list of entry triples" where - addEntry :: Lua.Integer -> (FilePath, MimeType, Int) -> Lua () - addEntry idx (fp, mimeType, contentLength) = do + pushEntry :: (FilePath, MimeType, Int) -> LuaE PandocError () + pushEntry (fp, mimeType, contentLength) = do Lua.newtable - Lua.push ("path" :: T.Text) *> Lua.push fp *> Lua.rawset (-3) - Lua.push ("type" :: T.Text) *> Lua.push mimeType *> Lua.rawset (-3) - Lua.push ("length" :: T.Text) *> Lua.push contentLength *> Lua.rawset (-3) - Lua.rawseti (-2) idx + Lua.pushName "path" *> Lua.pushString fp *> Lua.rawset (-3) + Lua.pushName "type" *> Lua.pushText mimeType *> Lua.rawset (-3) + Lua.pushName "length" *> Lua.pushIntegral contentLength *> Lua.rawset (-3) -fetch :: T.Text - -> PandocLua NumResults -fetch src = do - (bs, mimeType) <- fetchItem src - liftPandocLua . Lua.push $ maybe "" T.unpack mimeType - liftPandocLua $ Lua.push bs - return 2 -- returns 2 values: contents, mimetype +-- | Lua function to retrieve a new item. +fetch :: DocumentedFunction PandocError +fetch = defun "fetch" + ### (\src -> do + (bs, mimeType) <- unPandocLua $ fetchItem src + Lua.pushText $ fromMaybe "" mimeType + Lua.pushByteString bs + return 2) + <#> parameter Lua.peekText "string" "src" "URI to fetch" + =?> "Returns two string values: the fetched contents and the mimetype." diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 5c14b3a30..20c2f5af5 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -1,5 +1,8 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} {- | Module : Text.Pandoc.Lua.Module.Pandoc Copyright : Copyright © 2017-2021 Albert Krewinkel @@ -12,32 +15,37 @@ Pandoc module for lua. -} module Text.Pandoc.Lua.Module.Pandoc ( pushModule + , documentedModule ) where import Prelude hiding (read) -import Control.Monad (when) +import Control.Monad (forM_, when) +import Control.Monad.Catch (catch, throwM) import Control.Monad.Except (throwError) +import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr) import Data.Default (Default (..)) import Data.Maybe (fromMaybe) -import Foreign.Lua (Lua, NumResults, Optional, Peekable, Pushable) +import Data.Proxy (Proxy (Proxy)) +import HsLua hiding (pushModule) +import HsLua.Class.Peekable (PeekError) import System.Exit (ExitCode (..)) import Text.Pandoc.Class.PandocIO (runIO) -import Text.Pandoc.Definition (Block, Inline) -import Text.Pandoc.Lua.Filter (LuaFilter, SingletonsList (..), walkInlines, - walkInlineLists, walkBlocks, walkBlockLists) -import Text.Pandoc.Lua.Marshaling () -import Text.Pandoc.Lua.Marshaling.List (List (..)) -import Text.Pandoc.Lua.PandocLua (PandocLua, addFunction, liftPandocLua, - loadDefaultModule) -import Text.Pandoc.Walk (Walkable) +import Text.Pandoc.Definition +import Text.Pandoc.Lua.Orphans () +import Text.Pandoc.Lua.Marshal.AST +import Text.Pandoc.Lua.Marshal.Filter (peekFilter) +import Text.Pandoc.Lua.Marshal.ReaderOptions ( peekReaderOptions + , pushReaderOptions) +import Text.Pandoc.Lua.Module.Utils (sha1) +import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua) import Text.Pandoc.Options (ReaderOptions (readerExtensions)) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.Readers (Reader (..), getReader) +import qualified HsLua as Lua import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BSL import qualified Data.Text as T -import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil import Text.Pandoc.Error @@ -45,55 +53,164 @@ import Text.Pandoc.Error -- module to be loadable. pushModule :: PandocLua NumResults pushModule = do - loadDefaultModule "pandoc" - addFunction "read" read - addFunction "pipe" pipe - addFunction "walk_block" walk_block - addFunction "walk_inline" walk_inline + liftPandocLua $ Lua.pushModule documentedModule return 1 -walkElement :: (Walkable (SingletonsList Inline) a, - Walkable (SingletonsList Block) a, - Walkable (List Inline) a, - Walkable (List Block) a) - => a -> LuaFilter -> PandocLua a -walkElement x f = liftPandocLua $ - walkInlines f x >>= walkInlineLists f >>= walkBlocks f >>= walkBlockLists f - -walk_inline :: Inline -> LuaFilter -> PandocLua Inline -walk_inline = walkElement - -walk_block :: Block -> LuaFilter -> PandocLua Block -walk_block = walkElement - -read :: T.Text -> Optional T.Text -> PandocLua NumResults -read content formatSpecOrNil = liftPandocLua $ do - let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil) - res <- Lua.liftIO . runIO $ - getReader formatSpec >>= \(rdr,es) -> - case rdr of - TextReader r -> - r def{ readerExtensions = es } content - _ -> throwError $ PandocSomeError - "Only textual formats are supported" - case res of - Right pd -> (1 :: NumResults) <$ Lua.push pd -- success, push Pandoc - Left (PandocUnknownReaderError f) -> Lua.raiseError $ - "Unknown reader: " <> f - Left (PandocUnsupportedExtensionError e f) -> Lua.raiseError $ - "Extension " <> e <> " not supported for " <> f - Left e -> Lua.raiseError $ show e - --- | Pipes input through a command. -pipe :: String -- ^ path to executable - -> [String] -- ^ list of arguments - -> BL.ByteString -- ^ input passed to process via stdin - -> PandocLua NumResults -pipe command args input = liftPandocLua $ do - (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input - case ec of - ExitSuccess -> 1 <$ Lua.push output - ExitFailure n -> Lua.raiseError (PipeError (T.pack command) n output) +documentedModule :: Module PandocError +documentedModule = Module + { moduleName = "pandoc" + , moduleDescription = T.unlines + [ "Lua functions for pandoc scripts; includes constructors for" + , "document elements, functions to parse text in a given" + , "format, and functions to filter and modify a subtree." + ] + , moduleFields = stringConstants ++ [inlineField, blockField] + , moduleOperations = [] + , moduleFunctions = mconcat + [ functions + , otherConstructors + , blockConstructors + , inlineConstructors + , metaValueConstructors + ] + } + +-- | Inline table field +inlineField :: Field PandocError +inlineField = Field + { fieldName = "Inline" + , fieldDescription = "Inline constructors, nested under 'constructors'." + -- the nesting happens for historical reasons and should probably be + -- changed. + , fieldPushValue = pushWithConstructorsSubtable inlineConstructors + } + +-- | @Block@ module field +blockField :: Field PandocError +blockField = Field + { fieldName = "Block" + , fieldDescription = "Inline constructors, nested under 'constructors'." + -- the nesting happens for historical reasons and should probably be + -- changed. + , fieldPushValue = pushWithConstructorsSubtable blockConstructors + } + +pushWithConstructorsSubtable :: [DocumentedFunction PandocError] + -> LuaE PandocError () +pushWithConstructorsSubtable constructors = do + newtable -- Field table + newtable -- constructor table + pushName "constructor" *> pushvalue (nth 2) *> rawset (nth 4) + forM_ constructors $ \fn -> do + pushName (functionName fn) + pushDocumentedFunction fn + rawset (nth 3) + pop 1 -- pop constructor table + +otherConstructors :: LuaError e => [DocumentedFunction e] +otherConstructors = + [ mkPandoc + , mkMeta + , mkAttr + , mkAttributeList + , mkBlocks + , mkCitation + , mkCell + , mkRow + , mkTableHead + , mkTableFoot + , mkInlines + , mkListAttributes + , mkSimpleTable + + , defun "ReaderOptions" + ### liftPure id + <#> parameter peekReaderOptions "ReaderOptions|table" "opts" "reader options" + =#> functionResult pushReaderOptions "ReaderOptions" "new object" + #? "Creates a new ReaderOptions value." + ] + +stringConstants :: [Field e] +stringConstants = + let constrs :: forall a. Data a => Proxy a -> [String] + constrs _ = map showConstr . dataTypeConstrs . dataTypeOf @a $ undefined + nullaryConstructors = mconcat + [ constrs (Proxy @ListNumberStyle) + , constrs (Proxy @ListNumberDelim) + , constrs (Proxy @QuoteType) + , constrs (Proxy @MathType) + , constrs (Proxy @Alignment) + , constrs (Proxy @CitationMode) + ] + toField s = Field + { fieldName = T.pack s + , fieldDescription = T.pack s + , fieldPushValue = pushString s + } + in map toField nullaryConstructors + +functions :: [DocumentedFunction PandocError] +functions = + [ defun "pipe" + ### (\command args input -> do + (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input + `catch` (throwM . PandocIOError "pipe") + case ec of + ExitSuccess -> 1 <$ Lua.pushLazyByteString output + ExitFailure n -> do + pushPipeError (PipeError (T.pack command) n output) + Lua.error) + <#> parameter peekString "string" "command" "path to executable" + <#> parameter (peekList peekString) "{string,...}" "args" + "list of arguments" + <#> parameter peekLazyByteString "string" "input" + "input passed to process via stdin" + =?> "output string, or error triple" + + , defun "read" + ### (\content mformatspec mreaderOptions -> do + let formatSpec = fromMaybe "markdown" mformatspec + readerOptions = fromMaybe def mreaderOptions + res <- Lua.liftIO . runIO $ getReader formatSpec >>= \case + (TextReader r, es) -> r readerOptions{ readerExtensions = es } + content + _ -> throwError $ PandocSomeError + "Only textual formats are supported" + case res of + Right pd -> return pd -- success, got a Pandoc document + Left (PandocUnknownReaderError f) -> + Lua.failLua . T.unpack $ "Unknown reader: " <> f + Left (PandocUnsupportedExtensionError e f) -> + Lua.failLua . T.unpack $ + "Extension " <> e <> " not supported for " <> f + Left e -> + throwM e) + <#> parameter peekText "string" "content" "text to parse" + <#> optionalParameter peekText "string" "formatspec" "format and extensions" + <#> optionalParameter peekReaderOptions "ReaderOptions" "reader_options" + "reader options" + =#> functionResult pushPandoc "Pandoc" "result document" + + , sha1 + + , defun "walk_block" + ### walkElement + <#> parameter peekBlockFuzzy "Block" "block" "element to traverse" + <#> parameter peekFilter "Filter" "lua_filter" "filter functions" + =#> functionResult pushBlock "Block" "modified Block" + + , defun "walk_inline" + ### walkElement + <#> parameter peekInlineFuzzy "Inline" "inline" "element to traverse" + <#> parameter peekFilter "Filter" "lua_filter" "filter functions" + =#> functionResult pushInline "Inline" "modified Inline" + ] + where + walkElement x f = + walkInlineSplicing f x + >>= walkInlinesStraight f + >>= walkBlockSplicing f + >>= walkBlocksStraight f data PipeError = PipeError { pipeErrorCommand :: T.Text @@ -101,29 +218,34 @@ data PipeError = PipeError , pipeErrorOutput :: BL.ByteString } -instance Peekable PipeError where - peek idx = - PipeError - <$> (Lua.getfield idx "command" *> Lua.peek (-1) <* Lua.pop 1) - <*> (Lua.getfield idx "error_code" *> Lua.peek (-1) <* Lua.pop 1) - <*> (Lua.getfield idx "output" *> Lua.peek (-1) <* Lua.pop 1) - -instance Pushable PipeError where - push pipeErr = do - Lua.newtable - LuaUtil.addField "command" (pipeErrorCommand pipeErr) - LuaUtil.addField "error_code" (pipeErrorCode pipeErr) - LuaUtil.addField "output" (pipeErrorOutput pipeErr) - pushPipeErrorMetaTable - Lua.setmetatable (-2) - where - pushPipeErrorMetaTable :: Lua () - pushPipeErrorMetaTable = do - v <- Lua.newmetatable "pandoc pipe error" - when v $ LuaUtil.addFunction "__tostring" pipeErrorMessage - - pipeErrorMessage :: PipeError -> Lua BL.ByteString - pipeErrorMessage (PipeError cmd errorCode output) = return $ mconcat +peekPipeError :: PeekError e => StackIndex -> LuaE e PipeError +peekPipeError idx = + PipeError + <$> (Lua.getfield idx "command" *> Lua.peek (-1) <* Lua.pop 1) + <*> (Lua.getfield idx "error_code" *> Lua.peek (-1) <* Lua.pop 1) + <*> (Lua.getfield idx "output" *> Lua.peek (-1) <* Lua.pop 1) + +pushPipeError :: PeekError e => Pusher e PipeError +pushPipeError pipeErr = do + Lua.newtable + LuaUtil.addField "command" (pipeErrorCommand pipeErr) + LuaUtil.addField "error_code" (pipeErrorCode pipeErr) + LuaUtil.addField "output" (pipeErrorOutput pipeErr) + pushPipeErrorMetaTable + Lua.setmetatable (-2) + where + pushPipeErrorMetaTable :: PeekError e => LuaE e () + pushPipeErrorMetaTable = do + v <- Lua.newmetatable "pandoc pipe error" + when v $ do + pushName "__tostring" + pushHaskellFunction pipeErrorMessage + rawset (nth 3) + + pipeErrorMessage :: PeekError e => LuaE e NumResults + pipeErrorMessage = do + (PipeError cmd errorCode output) <- peekPipeError (nthBottom 1) + pushByteString . BSL.toStrict . BSL.concat $ [ BSL.pack "Error running " , BSL.pack $ T.unpack cmd , BSL.pack " (error code " @@ -131,3 +253,4 @@ instance Pushable PipeError where , BSL.pack "): " , if output == mempty then BSL.pack "<no output>" else output ] + return (NumResults 1) diff --git a/src/Text/Pandoc/Lua/Module/System.hs b/src/Text/Pandoc/Lua/Module/System.hs index bd35babaf..e329a0125 100644 --- a/src/Text/Pandoc/Lua/Module/System.hs +++ b/src/Text/Pandoc/Lua/Module/System.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Module.System Copyright : © 2019-2021 Albert Krewinkel @@ -9,25 +11,28 @@ Pandoc's system Lua module. -} module Text.Pandoc.Lua.Module.System - ( pushModule + ( documentedModule ) where -import Foreign.Lua (Lua, NumResults) -import Foreign.Lua.Module.System (arch, env, getwd, os, - with_env, with_tmpdir, with_wd) -import Text.Pandoc.Lua.Util (addFunction, addField) - -import qualified Foreign.Lua as Lua +import HsLua +import HsLua.Module.System + (arch, env, getwd, os, with_env, with_tmpdir, with_wd) -- | Push the pandoc.system module on the Lua stack. -pushModule :: Lua NumResults -pushModule = do - Lua.newtable - addField "arch" arch - addField "os" os - addFunction "environment" env - addFunction "get_working_directory" getwd - addFunction "with_environment" with_env - addFunction "with_temporary_directory" with_tmpdir - addFunction "with_working_directory" with_wd - return 1 +documentedModule :: LuaError e => Module e +documentedModule = Module + { moduleName = "pandoc.system" + , moduleDescription = "system functions" + , moduleFields = + [ arch + , os + ] + , moduleFunctions = + [ setName "environment" env + , setName "get_working_directory" getwd + , setName "with_environment" with_env + , setName "with_temporary_directory" with_tmpdir + , setName "with_working_directory" with_wd + ] + , moduleOperations = [] + } diff --git a/src/Text/Pandoc/Lua/Module/Types.hs b/src/Text/Pandoc/Lua/Module/Types.hs index bb4f02c3c..f16737f63 100644 --- a/src/Text/Pandoc/Lua/Module/Types.hs +++ b/src/Text/Pandoc/Lua/Module/Types.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Module.Types Copyright : © 2019-2021 Albert Krewinkel @@ -9,60 +10,33 @@ Pandoc data type constructors. -} module Text.Pandoc.Lua.Module.Types - ( pushModule + ( documentedModule ) where -import Data.Version (Version) -import Foreign.Lua (Lua, NumResults) -import Text.Pandoc.Definition -import Text.Pandoc.Lua.Marshaling.AST (LuaAttr, LuaListAttributes) -import Text.Pandoc.Lua.Marshaling.Version () -import Text.Pandoc.Lua.Util (addFunction) - -import qualified Foreign.Lua as Lua - --- | Push the pandoc.system module on the Lua stack. -pushModule :: Lua NumResults -pushModule = do - Lua.newtable - addFunction "Version" (return :: Version -> Lua Version) - pushCloneTable - Lua.setfield (Lua.nthFromTop 2) "clone" - return 1 - -pushCloneTable :: Lua NumResults -pushCloneTable = do - Lua.newtable - addFunction "Attr" cloneAttr - addFunction "Block" cloneBlock - addFunction "Citation" cloneCitation - addFunction "Inline" cloneInline - addFunction "Meta" cloneMeta - addFunction "MetaValue" cloneMetaValue - addFunction "ListAttributes" cloneListAttributes - addFunction "Pandoc" clonePandoc - return 1 - -cloneAttr :: LuaAttr -> Lua LuaAttr -cloneAttr = return - -cloneBlock :: Block -> Lua Block -cloneBlock = return - -cloneCitation :: Citation -> Lua Citation -cloneCitation = return - -cloneInline :: Inline -> Lua Inline -cloneInline = return - -cloneListAttributes :: LuaListAttributes -> Lua LuaListAttributes -cloneListAttributes = return - -cloneMeta :: Meta -> Lua Meta -cloneMeta = return - -cloneMetaValue :: MetaValue -> Lua MetaValue -cloneMetaValue = return - -clonePandoc :: Pandoc -> Lua Pandoc -clonePandoc = return +import HsLua ( Module (..), (###), (<#>), (=#>) + , defun, functionResult, parameter) +import HsLua.Module.Version (peekVersionFuzzy, pushVersion) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.ErrorConversion () + +-- | Push the pandoc.types module on the Lua stack. +documentedModule :: Module PandocError +documentedModule = Module + { moduleName = "pandoc.types" + , moduleDescription = + "Constructors for types that are not part of the pandoc AST." + , moduleFields = [] + , moduleFunctions = + [ defun "Version" + ### return + <#> parameter peekVersionFuzzy "string|integer|{integer,...}|Version" + "version_specifier" + (mconcat [ "either a version string like `'2.7.3'`, " + , "a single integer like `2`, " + , "list of integers like `{2,7,3}`, " + , "or a Version object" + ]) + =#> functionResult pushVersion "Version" "A new Version object." + ] + , moduleOperations = [] + } diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs index 3ec3afc26..02307cf7a 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -1,5 +1,7 @@ -{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} {- | Module : Text.Pandoc.Lua.Module.Utils Copyright : Copyright © 2017-2021 Albert Krewinkel @@ -11,143 +13,194 @@ Utility module for Lua, exposing internal helper functions. -} module Text.Pandoc.Lua.Module.Utils - ( pushModule + ( documentedModule + , sha1 ) where import Control.Applicative ((<|>)) -import Control.Monad.Catch (try) +import Control.Monad ((<$!>)) import Data.Data (showConstr, toConstr) import Data.Default (def) +import Data.Maybe (fromMaybe) import Data.Version (Version) -import Foreign.Lua (Peekable, Lua, NumResults (..)) +import HsLua as Lua +import HsLua.Module.Version (peekVersionFuzzy, pushVersion) +import Text.Pandoc.Citeproc (getReferences) import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Lua.Marshaling () -import Text.Pandoc.Lua.Marshaling.SimpleTable - ( SimpleTable (..) - , pushSimpleTable - ) -import Text.Pandoc.Lua.PandocLua (PandocLua, addFunction, liftPandocLua) +import Text.Pandoc.Lua.Marshal.AST +import Text.Pandoc.Lua.Marshal.Reference +import Text.Pandoc.Lua.PandocLua (PandocLua (unPandocLua)) import qualified Data.Digest.Pure.SHA as SHA import qualified Data.ByteString.Lazy as BSL +import qualified Data.Map as Map import qualified Data.Text as T -import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Filter.JSON as JSONFilter import qualified Text.Pandoc.Shared as Shared +import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.Pandoc.Writers.Shared as Shared -- | Push the "pandoc.utils" module to the Lua stack. -pushModule :: PandocLua NumResults -pushModule = do - liftPandocLua Lua.newtable - addFunction "blocks_to_inlines" blocksToInlines - addFunction "equals" equals - addFunction "from_simple_table" from_simple_table - addFunction "make_sections" makeSections - addFunction "normalize_date" normalizeDate - addFunction "run_json_filter" runJSONFilter - addFunction "sha1" sha1 - addFunction "stringify" stringify - addFunction "to_roman_numeral" toRomanNumeral - addFunction "to_simple_table" to_simple_table - addFunction "Version" (return :: Version -> Lua Version) - return 1 - --- | Squashes a list of blocks into inlines. -blocksToInlines :: [Block] -> Lua.Optional [Inline] -> PandocLua [Inline] -blocksToInlines blks optSep = liftPandocLua $ do - let sep = maybe Shared.defaultBlocksSeparator B.fromList - $ Lua.fromOptional optSep - return $ B.toList (Shared.blocksToInlinesWithSep sep blks) - --- | Convert list of Pandoc blocks into sections using Divs. -makeSections :: Bool -> Lua.Optional Int -> [Block] -> Lua [Block] -makeSections number baselevel = - return . Shared.makeSections number (Lua.fromOptional baselevel) - --- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We --- limit years to the range 1601-9999 (ISO 8601 accepts greater than --- or equal to 1583, but MS Word only accepts dates starting 1601). --- Returns nil instead of a string if the conversion failed. -normalizeDate :: T.Text -> Lua (Lua.Optional T.Text) -normalizeDate = return . Lua.Optional . Shared.normalizeDate - --- | Run a JSON filter on the given document. -runJSONFilter :: Pandoc - -> FilePath - -> Lua.Optional [String] - -> PandocLua Pandoc -runJSONFilter doc filterFile optArgs = do - args <- case Lua.fromOptional optArgs of - Just x -> return x - Nothing -> liftPandocLua $ do - Lua.getglobal "FORMAT" - (:[]) <$> Lua.popValue - JSONFilter.apply def args filterFile doc - --- | Calculate the hash of the given contents. -sha1 :: BSL.ByteString - -> Lua T.Text -sha1 = return . T.pack . SHA.showDigest . SHA.sha1 +documentedModule :: Module PandocError +documentedModule = Module + { moduleName = "pandoc.utils" + , moduleDescription = "pandoc utility functions" + , moduleFields = [] + , moduleOperations = [] + , moduleFunctions = + [ defun "blocks_to_inlines" + ### (\blks mSep -> do + let sep = maybe Shared.defaultBlocksSeparator B.fromList mSep + return $ B.toList (Shared.blocksToInlinesWithSep sep blks)) + <#> parameter (peekList peekBlock) "list of blocks" + "blocks" "" + <#> optionalParameter (peekList peekInline) "list of inlines" + "inline" "" + =#> functionResult pushInlines "list of inlines" "" + + , defun "equals" + ### equal + <#> parameter pure "AST element" "elem1" "" + <#> parameter pure "AST element" "elem2" "" + =#> functionResult pushBool "boolean" "true iff elem1 == elem2" + + , defun "make_sections" + ### liftPure3 Shared.makeSections + <#> parameter peekBool "boolean" "numbering" "add header numbers" + <#> parameter (\i -> (Nothing <$ peekNil i) <|> (Just <$!> peekIntegral i)) + "integer or nil" "baselevel" "" + <#> parameter (peekList peekBlock) "list of blocks" + "blocks" "document blocks to process" + =#> functionResult pushBlocks "list of Blocks" + "processes blocks" + + , defun "normalize_date" + ### liftPure Shared.normalizeDate + <#> parameter peekText "string" "date" "the date string" + =#> functionResult (maybe pushnil pushText) "string or nil" + "normalized date, or nil if normalization failed." + #? T.unwords + [ "Parse a date and convert (if possible) to \"YYYY-MM-DD\" format. We" + , "limit years to the range 1601-9999 (ISO 8601 accepts greater than" + , "or equal to 1583, but MS Word only accepts dates starting 1601)." + , "Returns nil instead of a string if the conversion failed." + ] + + , sha1 + + , defun "Version" + ### liftPure (id @Version) + <#> parameter peekVersionFuzzy + "version string, list of integers, or integer" + "v" "version description" + =#> functionResult pushVersion "Version" "new Version object" + #? "Creates a Version object." + + , defun "references" + ### (unPandocLua . getReferences Nothing) + <#> parameter peekPandoc "Pandoc" "doc" "document" + =#> functionResult (pushPandocList pushReference) "table" + "lift of references" + #? mconcat + [ "Get references defined inline in the metadata and via an external " + , "bibliography. Only references that are actually cited in the " + , "document (either with a genuine citation or with `nocite`) are " + , "returned. URL variables are converted to links." + ] + + , defun "run_json_filter" + ### (\doc filterPath margs -> do + args <- case margs of + Just xs -> return xs + Nothing -> do + Lua.getglobal "FORMAT" + (forcePeek ((:[]) <$!> peekString top) <* pop 1) + JSONFilter.apply def args filterPath doc + ) + <#> parameter peekPandoc "Pandoc" "doc" "input document" + <#> parameter peekString "filepath" "filter_path" "path to filter" + <#> optionalParameter (peekList peekString) "list of strings" + "args" "arguments to pass to the filter" + =#> functionResult pushPandoc "Pandoc" "filtered document" + + , defun "stringify" + ### stringify + <#> parameter pure "AST element" "elem" "some pandoc AST element" + =#> functionResult pushText "string" "stringified element" + + , defun "from_simple_table" + ### from_simple_table + <#> parameter peekSimpleTable "SimpleTable" "simple_tbl" "" + =?> "Simple table" + + , defun "to_roman_numeral" + ### liftPure Shared.toRomanNumeral + <#> parameter (peekIntegral @Int) "integer" "n" "number smaller than 4000" + =#> functionResult pushText "string" "roman numeral" + #? "Converts a number < 4000 to uppercase roman numeral." + + , defun "to_simple_table" + ### to_simple_table + <#> parameter peekTable "Block" "tbl" "a table" + =#> functionResult pushSimpleTable "SimpleTable" "SimpleTable object" + #? "Converts a table into an old/simple table." + + , defun "type" + ### (\idx -> getmetafield idx "__name" >>= \case + TypeString -> fromMaybe mempty <$> tostring top + _ -> ltype idx >>= typename) + <#> parameter pure "any" "object" "" + =#> functionResult pushByteString "string" "type of the given value" + #? ("Pandoc-friendly version of Lua's default `type` function, " <> + "returning the type of a value. If the argument has a " <> + "string-valued metafield `__name`, then it gives that string. " <> + "Otherwise it behaves just like the normal `type` function.") + ] + } + +-- | Documented Lua function to compute the hash of a string. +sha1 :: DocumentedFunction e +sha1 = defun "sha1" + ### liftPure (SHA.showDigest . SHA.sha1) + <#> parameter (fmap BSL.fromStrict . peekByteString) "string" "input" "" + =#> functionResult pushString "string" "hexadecimal hash value" + #? "Compute the hash of the given string value." + -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). -stringify :: AstElement -> PandocLua T.Text -stringify el = return $ case el of - PandocElement pd -> Shared.stringify pd - InlineElement i -> Shared.stringify i - BlockElement b -> Shared.stringify b - MetaElement m -> Shared.stringify m - CitationElement c -> Shared.stringify c - MetaValueElement m -> stringifyMetaValue m - _ -> mempty - -stringifyMetaValue :: MetaValue -> T.Text -stringifyMetaValue mv = case mv of - MetaBool b -> T.toLower $ T.pack (show b) - MetaString s -> s - _ -> Shared.stringify mv - -equals :: AstElement -> AstElement -> PandocLua Bool -equals e1 e2 = return (e1 == e2) - -data AstElement - = PandocElement Pandoc - | MetaElement Meta - | BlockElement Block - | InlineElement Inline - | MetaValueElement MetaValue - | AttrElement Attr - | ListAttributesElement ListAttributes - | CitationElement Citation - deriving (Eq, Show) - -instance Peekable AstElement where - peek idx = do - res <- try $ (PandocElement <$> Lua.peek idx) - <|> (InlineElement <$> Lua.peek idx) - <|> (BlockElement <$> Lua.peek idx) - <|> (AttrElement <$> Lua.peek idx) - <|> (ListAttributesElement <$> Lua.peek idx) - <|> (MetaElement <$> Lua.peek idx) - <|> (MetaValueElement <$> Lua.peek idx) - case res of - Right x -> return x - Left (_ :: PandocError) -> Lua.throwMessage - "Expected an AST element, but could not parse value as such." +stringify :: LuaError e => StackIndex -> LuaE e T.Text +stringify idx = forcePeek . retrieving "stringifyable element" $ + choice + [ (fmap Shared.stringify . peekPandoc) + , (fmap Shared.stringify . peekInline) + , (fmap Shared.stringify . peekBlock) + , (fmap Shared.stringify . peekCitation) + , (fmap stringifyMetaValue . peekMetaValue) + , (fmap (const "") . peekAttr) + , (fmap (const "") . peekListAttributes) + ] idx + where + stringifyMetaValue :: MetaValue -> T.Text + stringifyMetaValue mv = case mv of + MetaBool b -> T.toLower $ T.pack (show b) + MetaString s -> s + MetaList xs -> mconcat $ map stringifyMetaValue xs + MetaMap m -> mconcat $ map (stringifyMetaValue . snd) (Map.toList m) + _ -> Shared.stringify mv -- | Converts an old/simple table into a normal table block element. -from_simple_table :: SimpleTable -> Lua NumResults +from_simple_table :: SimpleTable -> LuaE PandocError NumResults from_simple_table (SimpleTable capt aligns widths head' body) = do Lua.push $ Table nullAttr - (Caption Nothing [Plain capt]) + (Caption Nothing [Plain capt | not (null capt)]) (zipWith (\a w -> (a, toColWidth w)) aligns widths) (TableHead nullAttr [blockListToRow head' | not (null head') ]) - [TableBody nullAttr 0 [] $ map blockListToRow body] + [TableBody nullAttr 0 [] $ map blockListToRow body | not (null body)] (TableFoot nullAttr []) return (NumResults 1) where @@ -159,17 +212,19 @@ from_simple_table (SimpleTable capt aligns widths head' body) = do toColWidth w = ColWidth w -- | Converts a table into an old/simple table. -to_simple_table :: Block -> Lua NumResults +to_simple_table :: Block -> LuaE PandocError SimpleTable to_simple_table = \case Table _attr caption specs thead tbodies tfoot -> do let (capt, aligns, widths, headers, rows) = Shared.toLegacyTable caption specs thead tbodies tfoot - pushSimpleTable $ SimpleTable capt aligns widths headers rows - return (NumResults 1) - blk -> - Lua.throwMessage $ - "Expected Table, got " <> showConstr (toConstr blk) <> "." - --- | Convert a number < 4000 to uppercase roman numeral. -toRomanNumeral :: Lua.Integer -> PandocLua T.Text -toRomanNumeral = return . Shared.toRomanNumeral . fromIntegral + return $ SimpleTable capt aligns widths headers rows + blk -> Lua.failLua $ mconcat + [ "Expected Table, got ", showConstr (toConstr blk), "." ] + +peekTable :: LuaError e => Peeker e Block +peekTable idx = peekBlock idx >>= \case + t@(Table {}) -> return t + b -> Lua.failPeek $ mconcat + [ "Expected Table, got " + , UTF8.fromString $ showConstr (toConstr b) + , "." ] diff --git a/src/Text/Pandoc/Lua/Orphans.hs b/src/Text/Pandoc/Lua/Orphans.hs new file mode 100644 index 000000000..d5b8f2c5d --- /dev/null +++ b/src/Text/Pandoc/Lua/Orphans.hs @@ -0,0 +1,116 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +{-# LANGUAGE FlexibleInstances #-} +{- | + Module : Text.Pandoc.Lua.Orphans + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Orphan instances for Lua's Pushable and Peekable type classes. +-} +module Text.Pandoc.Lua.Orphans () where + +import Data.Version (Version) +import HsLua +import HsLua.Module.Version (peekVersionFuzzy) +import Text.Pandoc.Definition +import Text.Pandoc.Lua.Marshal.AST +import Text.Pandoc.Lua.Marshal.CommonState () +import Text.Pandoc.Lua.Marshal.Context () +import Text.Pandoc.Lua.Marshal.PandocError() +import Text.Pandoc.Lua.Marshal.ReaderOptions () +import Text.Pandoc.Lua.Marshal.Sources (pushSources) +import Text.Pandoc.Lua.ErrorConversion () +import Text.Pandoc.Sources (Sources) + +instance Pushable Pandoc where + push = pushPandoc + +instance Pushable Meta where + push = pushMeta + +instance Pushable MetaValue where + push = pushMetaValue + +instance Pushable Block where + push = pushBlock + +instance {-# OVERLAPPING #-} Pushable [Block] where + push = pushBlocks + +instance Pushable Alignment where + push = pushString . show + +instance Pushable CitationMode where + push = pushCitationMode + +instance Pushable Format where + push = pushFormat + +instance Pushable ListNumberDelim where + push = pushString . show + +instance Pushable ListNumberStyle where + push = pushString . show + +instance Pushable MathType where + push = pushMathType + +instance Pushable QuoteType where + push = pushQuoteType + +instance Pushable Cell where + push = pushCell + +instance Peekable Cell where + peek = forcePeek . peekCell + +instance Pushable Inline where + push = pushInline + +instance {-# OVERLAPPING #-} Pushable [Inline] where + push = pushInlines + +instance Pushable Citation where + push = pushCitation + +instance Pushable Row where + push = pushRow + +instance Pushable TableBody where + push = pushTableBody + +instance Pushable TableFoot where + push = pushTableFoot + +instance Pushable TableHead where + push = pushTableHead + +-- These instances exist only for testing. It's a hack to avoid making +-- the marshalling modules public. +instance Peekable Inline where + peek = forcePeek . peekInline + +instance Peekable Block where + peek = forcePeek . peekBlock + +instance Peekable Meta where + peek = forcePeek . peekMeta + +instance Peekable Pandoc where + peek = forcePeek . peekPandoc + +instance Peekable Row where + peek = forcePeek . peekRow + +instance Peekable Version where + peek = forcePeek . peekVersionFuzzy + +instance {-# OVERLAPPING #-} Peekable Attr where + peek = forcePeek . peekAttr + +instance Pushable Sources where + push = pushSources diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs index 2f1c139db..c36c3c670 100644 --- a/src/Text/Pandoc/Lua/Packages.hs +++ b/src/Text/Pandoc/Lua/Packages.hs @@ -1,3 +1,6 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} {- | Module : Text.Pandoc.Lua.Packages Copyright : Copyright © 2017-2021 Albert Krewinkel @@ -13,12 +16,13 @@ module Text.Pandoc.Lua.Packages ) where import Control.Monad (forM_) -import Foreign.Lua (NumResults) -import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, loadDefaultModule) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.Marshal.List (pushListModule) +import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua) -import qualified Foreign.Lua as Lua -import qualified Foreign.Lua.Module.Path as Path -import qualified Foreign.Lua.Module.Text as Text +import qualified HsLua as Lua +import qualified HsLua.Module.Path as Path +import qualified HsLua.Module.Text as Text import qualified Text.Pandoc.Lua.Module.Pandoc as Pandoc import qualified Text.Pandoc.Lua.Module.MediaBag as MediaBag import qualified Text.Pandoc.Lua.Module.System as System @@ -30,8 +34,8 @@ installPandocPackageSearcher :: PandocLua () installPandocPackageSearcher = liftPandocLua $ do Lua.getglobal' "package.searchers" shiftArray - Lua.pushHaskellFunction pandocPackageSearcher - Lua.rawseti (Lua.nthFromTop 2) 1 + Lua.pushHaskellFunction $ Lua.toHaskellFunction pandocPackageSearcher + Lua.rawseti (Lua.nth 2) 1 Lua.pop 1 -- remove 'package.searchers' from stack where shiftArray = forM_ [4, 3, 2, 1] $ \i -> do @@ -39,22 +43,27 @@ installPandocPackageSearcher = liftPandocLua $ do Lua.rawseti (-2) (i + 1) -- | Load a pandoc module. -pandocPackageSearcher :: String -> PandocLua NumResults +pandocPackageSearcher :: String -> PandocLua Lua.NumResults pandocPackageSearcher pkgName = case pkgName of - "pandoc" -> pushWrappedHsFun Pandoc.pushModule - "pandoc.mediabag" -> pushWrappedHsFun MediaBag.pushModule - "pandoc.path" -> pushWrappedHsFun Path.pushModule - "pandoc.system" -> pushWrappedHsFun System.pushModule - "pandoc.types" -> pushWrappedHsFun Types.pushModule - "pandoc.utils" -> pushWrappedHsFun Utils.pushModule - "text" -> pushWrappedHsFun Text.pushModule - "pandoc.List" -> pushWrappedHsFun (loadDefaultModule pkgName) + "pandoc" -> pushModuleLoader Pandoc.documentedModule + "pandoc.mediabag" -> pushModuleLoader MediaBag.documentedModule + "pandoc.path" -> pushModuleLoader Path.documentedModule + "pandoc.system" -> pushModuleLoader System.documentedModule + "pandoc.types" -> pushModuleLoader Types.documentedModule + "pandoc.utils" -> pushModuleLoader Utils.documentedModule + "text" -> pushModuleLoader Text.documentedModule + "pandoc.List" -> pushWrappedHsFun . Lua.toHaskellFunction @PandocError $ + (Lua.NumResults 1 <$ pushListModule @PandocError) _ -> reportPandocSearcherFailure where + pushModuleLoader mdl = liftPandocLua $ do + Lua.pushHaskellFunction $ + Lua.NumResults 1 <$ Lua.pushModule @PandocError mdl + return (Lua.NumResults 1) pushWrappedHsFun f = liftPandocLua $ do Lua.pushHaskellFunction f return 1 reportPandocSearcherFailure = liftPandocLua $ do - Lua.push ("\n\t" <> pkgName <> "is not one of pandoc's default packages") - return (1 :: NumResults) + Lua.push ("\n\t" <> pkgName <> " is not one of pandoc's default packages") + return (Lua.NumResults 1) diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/src/Text/Pandoc/Lua/PandocLua.hs index 750e019b6..71fdf8d5c 100644 --- a/src/Text/Pandoc/Lua/PandocLua.hs +++ b/src/Text/Pandoc/Lua/PandocLua.hs @@ -22,27 +22,22 @@ module Text.Pandoc.Lua.PandocLua ( PandocLua (..) , runPandocLua , liftPandocLua - , addFunction - , loadDefaultModule ) where import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow) import Control.Monad.Except (MonadError (catchError, throwError)) -import Control.Monad.IO.Class (MonadIO (liftIO)) -import Foreign.Lua (Lua (..), NumResults, Pushable, ToHaskellFunction) -import Text.Pandoc.Class.PandocIO (PandocIO) -import Text.Pandoc.Class.PandocMonad (PandocMonad (..), readDefaultDataFile) -import Text.Pandoc.Error (PandocError (PandocLuaError)) +import Control.Monad.IO.Class (MonadIO) +import HsLua as Lua +import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) +import Text.Pandoc.Error (PandocError) import Text.Pandoc.Lua.Global (Global (..), setGlobals) -import Text.Pandoc.Lua.ErrorConversion (errorConversion) +import Text.Pandoc.Lua.Marshal.CommonState (peekCommonState) import qualified Control.Monad.Catch as Catch -import qualified Data.Text as T -import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Class.IO as IO -- | Type providing access to both, pandoc and Lua operations. -newtype PandocLua a = PandocLua { unPandocLua :: Lua a } +newtype PandocLua a = PandocLua { unPandocLua :: LuaE PandocError a } deriving ( Applicative , Functor @@ -54,16 +49,16 @@ newtype PandocLua a = PandocLua { unPandocLua :: Lua a } ) -- | Lift a @'Lua'@ operation into the @'PandocLua'@ type. -liftPandocLua :: Lua a -> PandocLua a +liftPandocLua :: LuaE PandocError a -> PandocLua a liftPandocLua = PandocLua -- | Evaluate a @'PandocLua'@ computation, running all contained Lua -- operations.. -runPandocLua :: PandocLua a -> PandocIO a +runPandocLua :: (PandocMonad m, MonadIO m) => PandocLua a -> m a runPandocLua pLua = do origState <- getCommonState globals <- defaultGlobals - (result, newState) <- liftIO . Lua.run' errorConversion . unPandocLua $ do + (result, newState) <- liftIO . Lua.run . unPandocLua $ do putCommonState origState liftPandocLua $ setGlobals globals r <- pLua @@ -72,38 +67,14 @@ runPandocLua pLua = do putCommonState newState return result -instance {-# OVERLAPPING #-} ToHaskellFunction (PandocLua NumResults) where - toHsFun _narg = unPandocLua - -instance Pushable a => ToHaskellFunction (PandocLua a) where - toHsFun _narg x = 1 <$ (unPandocLua x >>= Lua.push) - --- | Add a function to the table at the top of the stack, using the given name. -addFunction :: ToHaskellFunction a => String -> a -> PandocLua () -addFunction name fn = liftPandocLua $ do - Lua.push name - Lua.pushHaskellFunction fn - Lua.rawset (-3) - --- | Load a pure Lua module included with pandoc. Leaves the result on --- the stack and returns @NumResults 1@. --- --- The script is loaded from the default data directory. We do not load --- from data directories supplied via command line, as this could cause --- scripts to be executed even though they had not been passed explicitly. -loadDefaultModule :: String -> PandocLua NumResults -loadDefaultModule name = do - script <- readDefaultDataFile (name <> ".lua") - status <- liftPandocLua $ Lua.dostring script - if status == Lua.OK - then return (1 :: NumResults) - else do - msg <- liftPandocLua Lua.popValue - let err = "Error while loading `" <> name <> "`.\n" <> msg - throwError $ PandocLuaError (T.pack err) +instance {-# OVERLAPPING #-} Exposable PandocError (PandocLua NumResults) where + partialApply _narg = unPandocLua + +instance Pushable a => Exposable PandocError (PandocLua a) where + partialApply _narg x = 1 <$ (unPandocLua x >>= Lua.push) -- | Global variables which should always be set. -defaultGlobals :: PandocIO [Global] +defaultGlobals :: PandocMonad m => m [Global] defaultGlobals = do commonState <- getCommonState return @@ -127,6 +98,7 @@ instance PandocMonad PandocLua where readFileLazy = IO.readFileLazy readFileStrict = IO.readFileStrict + readStdinStrict = IO.readStdinStrict glob = IO.glob fileExists = IO.fileExists @@ -135,7 +107,7 @@ instance PandocMonad PandocLua where getCommonState = PandocLua $ do Lua.getglobal "PANDOC_STATE" - Lua.peek Lua.stackTop + forcePeek $ peekCommonState Lua.top putCommonState = PandocLua . setGlobals . (:[]) . PANDOC_STATE logOutput = IO.logOutput diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs index 70a8a6d47..9c6f42b2b 100644 --- a/src/Text/Pandoc/Lua/Util.hs +++ b/src/Text/Pandoc/Lua/Util.hs @@ -1,6 +1,4 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Util Copyright : © 2012-2021 John MacFarlane, @@ -13,115 +11,34 @@ Lua utility functions. -} module Text.Pandoc.Lua.Util - ( getTag - , rawField - , addField - , addFunction - , addValue - , pushViaConstructor - , defineHowTo - , throwTopMessageAsError' + ( addField , callWithTraceback + , pcallWithTraceback , dofileWithTraceback ) where -import Control.Monad (unless, when) -import Data.Text (Text) -import Foreign.Lua ( Lua, NumArgs, NumResults, Peekable, Pushable, StackIndex - , Status, ToHaskellFunction ) -import qualified Foreign.Lua as Lua -import qualified Text.Pandoc.UTF8 as UTF8 - --- | Get value behind key from table at given index. -rawField :: Peekable a => StackIndex -> String -> Lua a -rawField idx key = do - absidx <- Lua.absindex idx - Lua.push key - Lua.rawget absidx - Lua.popValue +import Control.Monad (when) +import HsLua +import qualified HsLua as Lua -- | Add a value to the table at the top of the stack at a string-index. -addField :: Pushable a => String -> a -> Lua () -addField = addValue - --- | Add a key-value pair to the table at the top of the stack. -addValue :: (Pushable a, Pushable b) => a -> b -> Lua () -addValue key value = do +addField :: (LuaError e, Pushable a) => String -> a -> LuaE e () +addField key value = do Lua.push key Lua.push value - Lua.rawset (Lua.nthFromTop 3) - --- | Add a function to the table at the top of the stack, using the given name. -addFunction :: ToHaskellFunction a => String -> a -> Lua () -addFunction name fn = do - Lua.push name - Lua.pushHaskellFunction fn - Lua.rawset (-3) - --- | Helper class for pushing a single value to the stack via a lua function. --- See @pushViaCall@. -class PushViaCall a where - pushViaCall' :: String -> Lua () -> NumArgs -> a - -instance PushViaCall (Lua ()) where - pushViaCall' fn pushArgs num = do - Lua.push fn - Lua.rawget Lua.registryindex - pushArgs - Lua.call num 1 - -instance (Pushable a, PushViaCall b) => PushViaCall (a -> b) where - pushViaCall' fn pushArgs num x = - pushViaCall' fn (pushArgs *> Lua.push x) (num + 1) - --- | Push an value to the stack via a lua function. The lua function is called --- with all arguments that are passed to this function and is expected to return --- a single value. -pushViaCall :: PushViaCall a => String -> a -pushViaCall fn = pushViaCall' fn (return ()) 0 - --- | Call a pandoc element constructor within Lua, passing all given arguments. -pushViaConstructor :: PushViaCall a => String -> a -pushViaConstructor pandocFn = pushViaCall ("pandoc." ++ pandocFn) - --- | Get the tag of a value. This is an optimized and specialized version of --- @Lua.getfield idx "tag"@. It only checks for the field on the table at index --- @idx@ and on its metatable, also ignoring any @__index@ value on the --- metatable. -getTag :: StackIndex -> Lua String -getTag idx = do - -- push metatable or just the table - Lua.getmetatable idx >>= \hasMT -> unless hasMT (Lua.pushvalue idx) - Lua.push ("tag" :: Text) - Lua.rawget (Lua.nthFromTop 2) - Lua.tostring Lua.stackTop <* Lua.pop 2 >>= \case - Nothing -> Lua.throwMessage "untagged value" - Just x -> return (UTF8.toString x) - --- | Modify the message at the top of the stack before throwing it as an --- Exception. -throwTopMessageAsError' :: (String -> String) -> Lua a -throwTopMessageAsError' modifier = do - msg <- Lua.tostring' Lua.stackTop - Lua.pop 2 -- remove error and error string pushed by tostring' - Lua.throwMessage (modifier (UTF8.toString msg)) - --- | Mark the context of a Lua computation for better error reporting. -defineHowTo :: String -> Lua a -> Lua a -defineHowTo ctx op = Lua.errorConversion >>= \ec -> - Lua.addContextToException ec ("Could not " <> ctx <> ": ") op + Lua.rawset (Lua.nth 3) -- | Like @'Lua.pcall'@, but uses a predefined error handler which adds a -- traceback on error. -pcallWithTraceback :: NumArgs -> NumResults -> Lua Status +pcallWithTraceback :: LuaError e => NumArgs -> NumResults -> LuaE e Status pcallWithTraceback nargs nresults = do - let traceback' :: Lua NumResults + let traceback' :: LuaError e => LuaE e NumResults traceback' = do l <- Lua.state - msg <- Lua.tostring' (Lua.nthFromBottom 1) - Lua.traceback l (Just (UTF8.toString msg)) 2 + msg <- Lua.tostring' (Lua.nthBottom 1) + Lua.traceback l (Just msg) 2 return 1 - tracebackIdx <- Lua.absindex (Lua.nthFromTop (Lua.fromNumArgs nargs + 1)) + tracebackIdx <- Lua.absindex (Lua.nth (Lua.fromNumArgs nargs + 1)) Lua.pushHaskellFunction traceback' Lua.insert tracebackIdx result <- Lua.pcall nargs nresults (Just tracebackIdx) @@ -129,15 +46,15 @@ pcallWithTraceback nargs nresults = do return result -- | Like @'Lua.call'@, but adds a traceback to the error message (if any). -callWithTraceback :: NumArgs -> NumResults -> Lua () +callWithTraceback :: LuaError e => NumArgs -> NumResults -> LuaE e () callWithTraceback nargs nresults = do result <- pcallWithTraceback nargs nresults when (result /= Lua.OK) - Lua.throwTopMessage + Lua.throwErrorAsException -- | Run the given string as a Lua program, while also adding a traceback to the -- error message if an error occurs. -dofileWithTraceback :: FilePath -> Lua Status +dofileWithTraceback :: LuaError e => FilePath -> LuaE e Status dofileWithTraceback fp = do loadRes <- Lua.loadfile fp case loadRes of diff --git a/src/Text/Pandoc/Lua/Walk.hs b/src/Text/Pandoc/Lua/Walk.hs deleted file mode 100644 index d6d973496..000000000 --- a/src/Text/Pandoc/Lua/Walk.hs +++ /dev/null @@ -1,158 +0,0 @@ -{-# LANGUAGE DeriveTraversable #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{- | -Module : Text.Pandoc.Lua.Walk -Copyright : © 2012-2021 John MacFarlane, - © 2017-2021 Albert Krewinkel -License : GNU GPL, version 2 or above -Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> -Stability : alpha - -Walking documents in a filter-suitable way. --} -module Text.Pandoc.Lua.Walk - ( SingletonsList (..) - ) -where - -import Control.Monad ((<=<)) -import Text.Pandoc.Definition -import Text.Pandoc.Walk - --- | Helper type which allows to traverse trees in order, while splicing in --- trees. --- --- The only interesting use of this type is via it's '@Walkable@' instance. That --- instance makes it possible to walk a Pandoc document (or a subset thereof), --- while applying a function on each element of an AST element /list/, and have --- the resulting list spliced back in place of the original element. This is the --- traversal/splicing method used for Lua filters. -newtype SingletonsList a = SingletonsList { singletonsList :: [a] } - deriving (Functor, Foldable, Traversable) - --- --- SingletonsList Inline --- -instance {-# OVERLAPPING #-} Walkable (SingletonsList Inline) [Inline] where - walkM = walkSingletonsListM - query = querySingletonsList - -instance Walkable (SingletonsList Inline) Pandoc where - walkM = walkPandocM - query = queryPandoc - -instance Walkable (SingletonsList Inline) Citation where - walkM = walkCitationM - query = queryCitation - -instance Walkable (SingletonsList Inline) Inline where - walkM = walkInlineM - query = queryInline - -instance Walkable (SingletonsList Inline) Block where - walkM = walkBlockM - query = queryBlock - -instance Walkable (SingletonsList Inline) Row where - walkM = walkRowM - query = queryRow - -instance Walkable (SingletonsList Inline) TableHead where - walkM = walkTableHeadM - query = queryTableHead - -instance Walkable (SingletonsList Inline) TableBody where - walkM = walkTableBodyM - query = queryTableBody - -instance Walkable (SingletonsList Inline) TableFoot where - walkM = walkTableFootM - query = queryTableFoot - -instance Walkable (SingletonsList Inline) Caption where - walkM = walkCaptionM - query = queryCaption - -instance Walkable (SingletonsList Inline) Cell where - walkM = walkCellM - query = queryCell - -instance Walkable (SingletonsList Inline) MetaValue where - walkM = walkMetaValueM - query = queryMetaValue - -instance Walkable (SingletonsList Inline) Meta where - walkM f (Meta metamap) = Meta <$> walkM f metamap - query f (Meta metamap) = query f metamap - --- --- SingletonsList Block --- -instance {-# OVERLAPPING #-} Walkable (SingletonsList Block) [Block] where - walkM = walkSingletonsListM - query = querySingletonsList - -instance Walkable (SingletonsList Block) Pandoc where - walkM = walkPandocM - query = queryPandoc - -instance Walkable (SingletonsList Block) Citation where - walkM = walkCitationM - query = queryCitation - -instance Walkable (SingletonsList Block) Inline where - walkM = walkInlineM - query = queryInline - -instance Walkable (SingletonsList Block) Block where - walkM = walkBlockM - query = queryBlock - -instance Walkable (SingletonsList Block) Row where - walkM = walkRowM - query = queryRow - -instance Walkable (SingletonsList Block) TableHead where - walkM = walkTableHeadM - query = queryTableHead - -instance Walkable (SingletonsList Block) TableBody where - walkM = walkTableBodyM - query = queryTableBody - -instance Walkable (SingletonsList Block) TableFoot where - walkM = walkTableFootM - query = queryTableFoot - -instance Walkable (SingletonsList Block) Caption where - walkM = walkCaptionM - query = queryCaption - -instance Walkable (SingletonsList Block) Cell where - walkM = walkCellM - query = queryCell - -instance Walkable (SingletonsList Block) MetaValue where - walkM = walkMetaValueM - query = queryMetaValue - -instance Walkable (SingletonsList Block) Meta where - walkM f (Meta metamap) = Meta <$> walkM f metamap - query f (Meta metamap) = query f metamap - - -walkSingletonsListM :: (Monad m, Walkable (SingletonsList a) a) - => (SingletonsList a -> m (SingletonsList a)) - -> [a] -> m [a] -walkSingletonsListM f = - let f' = fmap singletonsList . f . SingletonsList . (:[]) <=< walkM f - in fmap mconcat . mapM f' - -querySingletonsList :: (Monoid c, Walkable (SingletonsList a) a) - => (SingletonsList a -> c) - -> [a] -> c -querySingletonsList f = - let f' x = f (SingletonsList [x]) `mappend` query f x - in mconcat . map f' diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 77c7069e9..dff8f7822 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -528,7 +528,7 @@ mimeTypesList = ,("wvx","video/x-ms-wvx") ,("wz","application/x-wingz") ,("xbm","image/x-xbitmap") - ,("xcf","application/x-xcf") + ,("xcf","image/x-xcf") ,("xht","application/xhtml+xml") ,("xhtml","application/xhtml+xml") ,("xlb","application/vnd.ms-excel") diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 098e484ee..eb4f3110c 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -80,7 +80,7 @@ insertMedia fp mbMime contents (MediaBag mediamap) = uri = parseURI fp newpath = if isRelative fp && isNothing uri - && ".." `notElem` splitPath fp + && ".." `notElem` splitDirectories fp then T.unpack fp' else showDigest (sha1 contents) <> "." <> ext fallback = case takeExtension fp of diff --git a/src/Text/Pandoc/Network/HTTP.hs b/src/Text/Pandoc/Network/HTTP.hs new file mode 100644 index 000000000..89f7f5544 --- /dev/null +++ b/src/Text/Pandoc/Network/HTTP.hs @@ -0,0 +1,18 @@ +{- | + Module : Text.Pandoc.Writers.Markdown.Inline + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Network.HTTP ( + urlEncode + ) where +import qualified Network.HTTP.Types as HTTP +import qualified Text.Pandoc.UTF8 as UTF8 +import qualified Data.Text as T + +urlEncode :: T.Text -> T.Text +urlEncode = UTF8.toText . HTTP.urlEncode True . UTF8.fromText diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 85d9aa103..6a3028b14 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -34,10 +34,10 @@ module Text.Pandoc.Options ( module Text.Pandoc.Extensions , defaultKaTeXURL ) where import Control.Applicative ((<|>)) -import Data.Char (toLower) import Data.Maybe (fromMaybe) import Data.Data (Data) import Data.Default +import Data.Char (toLower) import Data.Text (Text) import qualified Data.Set as Set import Data.Typeable (Typeable) @@ -46,10 +46,9 @@ import Skylighting (SyntaxMap, defaultSyntaxMap) import Text.DocTemplates (Context(..), Template) import Text.Pandoc.Extensions import Text.Pandoc.Highlighting (Style, pygments) -import Text.Pandoc.Shared (camelCaseStrToHyphenated) -import Data.Aeson.TH (deriveJSON, defaultOptions, Options(..), - SumEncoding(..)) -import Data.YAML +import Text.Pandoc.UTF8 (toStringLazy) +import Data.Aeson.TH (deriveJSON) +import Data.Aeson class HasSyntaxExtensions a where getExtensions :: a -> Extensions @@ -106,9 +105,9 @@ data HTMLMathMethod = PlainMath | KaTeX Text -- url of KaTeX files deriving (Show, Read, Eq, Data, Typeable, Generic) -instance FromYAML HTMLMathMethod where - parseYAML node = - (withMap "HTMLMathMethod" $ \m -> do +instance FromJSON HTMLMathMethod where + parseJSON node = + (withObject "HTMLMathMethod" $ \m -> do method <- m .: "method" mburl <- m .:? "url" case method :: Text of @@ -121,28 +120,48 @@ instance FromYAML HTMLMathMethod where "katex" -> return $ KaTeX $ fromMaybe defaultKaTeXURL mburl _ -> fail $ "Unknown HTML math method " ++ show method) node - <|> (withStr "HTMLMathMethod" $ \method -> - case method of - "plain" -> return PlainMath - "webtex" -> return $ WebTeX "" - "gladtex" -> return GladTeX - "mathml" -> return MathML - "mathjax" -> return $ MathJax defaultMathJaxURL - "katex" -> return $ KaTeX defaultKaTeXURL - _ -> fail $ "Unknown HTML math method " ++ show method) node + <|> (case node of + String "plain" -> return PlainMath + String "webtex" -> return $ WebTeX "" + String "gladtex" -> return GladTeX + String "mathml" -> return MathML + String "mathjax" -> return $ MathJax defaultMathJaxURL + String "katex" -> return $ KaTeX defaultKaTeXURL + _ -> fail $ "Unknown HTML math method " <> + toStringLazy (encode node)) + +instance ToJSON HTMLMathMethod where + toJSON PlainMath = String "plain" + toJSON (WebTeX "") = String "webtex" + toJSON (WebTeX url) = object ["method" .= String "webtex", + "url" .= String url] + toJSON GladTeX = String "gladtex" + toJSON MathML = String "mathml" + toJSON (MathJax "") = String "mathjax" + toJSON (MathJax url) = object ["method" .= String "mathjax", + "url" .= String url] + toJSON (KaTeX "") = String "katex" + toJSON (KaTeX url) = object ["method" .= String "katex", + "url" .= String url] data CiteMethod = Citeproc -- use citeproc to render them | Natbib -- output natbib cite commands | Biblatex -- output biblatex cite commands deriving (Show, Read, Eq, Data, Typeable, Generic) -instance FromYAML CiteMethod where - parseYAML = withStr "Citeproc" $ \t -> - case t of - "citeproc" -> return Citeproc - "natbib" -> return Natbib - "biblatex" -> return Biblatex - _ -> fail $ "Unknown citation method " ++ show t +instance FromJSON CiteMethod where + parseJSON v = + case v of + String "citeproc" -> return Citeproc + String "natbib" -> return Natbib + String "biblatex" -> return Biblatex + _ -> fail $ "Unknown citation method: " <> + toStringLazy (encode v) + +instance ToJSON CiteMethod where + toJSON Citeproc = String "citeproc" + toJSON Natbib = String "natbib" + toJSON Biblatex = String "biblatex" -- | Methods for obfuscating email addresses in HTML. data ObfuscationMethod = NoObfuscation @@ -150,13 +169,18 @@ data ObfuscationMethod = NoObfuscation | JavascriptObfuscation deriving (Show, Read, Eq, Data, Typeable, Generic) -instance FromYAML ObfuscationMethod where - parseYAML = withStr "Citeproc" $ \t -> - case t of - "none" -> return NoObfuscation - "references" -> return ReferenceObfuscation - "javascript" -> return JavascriptObfuscation - _ -> fail $ "Unknown obfuscation method " ++ show t +instance FromJSON ObfuscationMethod where + parseJSON v = + case v of + String "none" -> return NoObfuscation + String "references" -> return ReferenceObfuscation + String "javascript" -> return JavascriptObfuscation + _ -> fail $ "Unknown obfuscation method " ++ toStringLazy (encode v) + +instance ToJSON ObfuscationMethod where + toJSON NoObfuscation = String "none" + toJSON ReferenceObfuscation = String "references" + toJSON JavascriptObfuscation = String "javascript" -- | Varieties of HTML slide shows. data HTMLSlideVariant = S5Slides @@ -173,13 +197,22 @@ data TrackChanges = AcceptChanges | AllChanges deriving (Show, Read, Eq, Data, Typeable, Generic) -instance FromYAML TrackChanges where - parseYAML = withStr "TrackChanges" $ \t -> - case t of - "accept" -> return AcceptChanges - "reject" -> return RejectChanges - "all" -> return AllChanges - _ -> fail $ "Unknown track changes method " ++ show t +-- update in doc/filters.md if this changes: +instance FromJSON TrackChanges where + parseJSON v = + case v of + String "accept" -> return AcceptChanges + String "reject" -> return RejectChanges + String "all" -> return AllChanges + String "accept-changes" -> return AcceptChanges + String "reject-changes" -> return RejectChanges + String "all-changes" -> return AllChanges + _ -> fail $ "Unknown track changes method " <> toStringLazy (encode v) + +instance ToJSON TrackChanges where + toJSON AcceptChanges = String "accept-changes" + toJSON RejectChanges = String "reject-changes" + toJSON AllChanges = String "all-changes" -- | Options for wrapping text in the output. data WrapOption = WrapAuto -- ^ Automatically wrap to width @@ -187,14 +220,21 @@ data WrapOption = WrapAuto -- ^ Automatically wrap to width | WrapPreserve -- ^ Preserve wrapping of input source deriving (Show, Read, Eq, Data, Typeable, Generic) -instance FromYAML WrapOption where - parseYAML = withStr "WrapOption" $ \t -> - case t of - "auto" -> return WrapAuto - "none" -> return WrapNone - "preserve" -> return WrapPreserve - _ -> fail $ "Unknown wrap method " ++ show t - +instance FromJSON WrapOption where + parseJSON v = + case v of + String "auto" -> return WrapAuto + String "wrap-auto" -> return WrapAuto + String "none" -> return WrapNone + String "wrap-none" -> return WrapNone + String "preserve" -> return WrapPreserve + String "wrap-preserve" -> return WrapPreserve + _ -> fail $ "Unknown wrap method " <> toStringLazy (encode v) + +instance ToJSON WrapOption where + toJSON WrapAuto = "wrap-auto" + toJSON WrapNone = "wrap-none" + toJSON WrapPreserve = "wrap-preserve" -- | Options defining the type of top-level headers. data TopLevelDivision = TopLevelPart -- ^ Top-level headers become parts @@ -204,15 +244,24 @@ data TopLevelDivision = TopLevelPart -- ^ Top-level headers become parts -- heuristics deriving (Show, Read, Eq, Data, Typeable, Generic) -instance FromYAML TopLevelDivision where - parseYAML = withStr "TopLevelDivision" $ \t -> - case t of - "part" -> return TopLevelPart - "chapter" -> return TopLevelChapter - "section" -> return TopLevelSection - "default" -> return TopLevelDefault - _ -> fail $ "Unknown top level division " ++ show t - +instance FromJSON TopLevelDivision where + parseJSON v = + case v of + String "part" -> return TopLevelPart + String "top-level-part" -> return TopLevelPart + String "chapter" -> return TopLevelChapter + String "top-level-chapter" -> return TopLevelChapter + String "section" -> return TopLevelSection + String "top-level-section" -> return TopLevelSection + String "default" -> return TopLevelDefault + String "top-level-default" -> return TopLevelDefault + _ -> fail $ "Unknown top level division " <> toStringLazy (encode v) + +instance ToJSON TopLevelDivision where + toJSON TopLevelPart = "top-level-part" + toJSON TopLevelChapter = "top-level-chapter" + toJSON TopLevelSection = "top-level-section" + toJSON TopLevelDefault = "top-level-default" -- | Locations for footnotes and references in markdown output data ReferenceLocation = EndOfBlock -- ^ End of block @@ -220,14 +269,21 @@ data ReferenceLocation = EndOfBlock -- ^ End of block | EndOfDocument -- ^ at end of document deriving (Show, Read, Eq, Data, Typeable, Generic) -instance FromYAML ReferenceLocation where - parseYAML = withStr "ReferenceLocation" $ \t -> - case t of - "block" -> return EndOfBlock - "section" -> return EndOfSection - "document" -> return EndOfDocument - _ -> fail $ "Unknown reference location " ++ show t - +instance FromJSON ReferenceLocation where + parseJSON v = + case v of + String "block" -> return EndOfBlock + String "end-of-block" -> return EndOfBlock + String "section" -> return EndOfSection + String "end-of-section" -> return EndOfSection + String "document" -> return EndOfDocument + String "end-of-document" -> return EndOfDocument + _ -> fail $ "Unknown reference location " <> toStringLazy (encode v) + +instance ToJSON ReferenceLocation where + toJSON EndOfBlock = "end-of-block" + toJSON EndOfSection = "end-of-section" + toJSON EndOfDocument = "end-of-document" -- | Options for writers data WriterOptions = WriterOptions @@ -316,42 +372,9 @@ defaultKaTeXURL :: Text defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/" -- Update documentation in doc/filters.md if this is changed. -$(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseStrToHyphenated - } ''TrackChanges) - -$(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseStrToHyphenated - } ''WrapOption) - -$(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseStrToHyphenated . drop 8 - } ''TopLevelDivision) +$(deriveJSON defaultOptions{ fieldLabelModifier = + camelTo2 '-' . drop 6 } + ''ReaderOptions) -$(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseStrToHyphenated - } ''ReferenceLocation) - --- Update documentation in doc/filters.md if this is changed. -$(deriveJSON defaultOptions ''ReaderOptions) - -$(deriveJSON defaultOptions{ - constructorTagModifier = map toLower, - sumEncoding = TaggedObject{ - tagFieldName = "method", - contentsFieldName = "url" } - } ''HTMLMathMethod) - -$(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseStrToHyphenated - } ''CiteMethod) - -$(deriveJSON defaultOptions{ constructorTagModifier = - \case - "NoObfuscation" -> "none" - "ReferenceObfuscation" -> "references" - "JavascriptObfuscation" -> "javascript" - _ -> "none" - } ''ObfuscationMethod) - -$(deriveJSON defaultOptions ''HTMLSlideVariant) +$(deriveJSON defaultOptions{ constructorTagModifier = map toLower } + ''HTMLSlideVariant) diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index c4e30af34..9ff4bfb09 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -2,6 +2,7 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleContexts #-} {- | Module : Text.Pandoc.PDF Copyright : Copyright (C) 2012-2021 John MacFarlane @@ -50,13 +51,13 @@ import Text.Pandoc.Shared (inDirectory, stringify, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk (walkM) import Text.Pandoc.Writers.Shared (getField, metaToContext) +import Control.Monad.Catch (MonadMask) #ifdef _WINDOWS import Data.List (intercalate) #endif import Data.List (isPrefixOf, find) -import Text.Pandoc.Class.PandocIO (PandocIO, extractMedia, runIOorExplode) -import Text.Pandoc.Class.PandocMonad (fillMediaBag, getCommonState, getVerbosity, - putCommonState, report, setVerbosity) +import Text.Pandoc.Class (fillMediaBag, getVerbosity, + report, extractMedia, PandocMonad) import Text.Pandoc.Logging #ifdef _WINDOWS @@ -67,14 +68,15 @@ changePathSeparators = intercalate "/" . map (filter (/='\\')) . splitDirectories #endif -makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex, +makePDF :: (PandocMonad m, MonadIO m, MonadMask m) + => String -- ^ pdf creator (pdflatex, lualatex, xelatex, -- wkhtmltopdf, weasyprint, prince, context, pdfroff, -- or path to executable) -> [String] -- ^ arguments to pass to pdf creator - -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer + -> (WriterOptions -> Pandoc -> m Text) -- ^ writer -> WriterOptions -- ^ options -> Pandoc -- ^ document - -> PandocIO (Either ByteString ByteString) + -> m (Either ByteString ByteString) makePDF program pdfargs writer opts doc = case takeBaseName program of "wkhtmltopdf" -> makeWithWkhtmltopdf program pdfargs writer opts doc @@ -86,57 +88,52 @@ makePDF program pdfargs writer opts doc = source <- writer opts doc let args = ["-ms", "-mpdfmark", "-mspdf", "-e", "-t", "-k", "-KUTF-8", "-i"] ++ pdfargs - verbosity <- getVerbosity - liftIO $ generic2pdf verbosity program args source + generic2pdf program args source baseProg -> do - commonState <- getCommonState - verbosity <- getVerbosity - -- latex has trouble with tildes in paths, which - -- you find in Windows temp dir paths with longer - -- user names (see #777) - let withTempDir templ action = do - tmp <- getTemporaryDirectory - uname <- E.catch - (do (ec, sout, _) <- readProcessWithExitCode "uname" ["-o"] "" - if ec == ExitSuccess - then return $ Just $ filter (not . isSpace) sout - else return Nothing) - (\(_ :: E.SomeException) -> return Nothing) - if '~' `elem` tmp || uname == Just "Cygwin" -- see #5451 - then withTempDirectory "." templ action - else withSystemTempDirectory templ action - (newCommonState, res) <- liftIO $ withTempDir "tex2pdf." $ \tmpdir' -> do + withTempDir "tex2pdf." $ \tmpdir' -> do #ifdef _WINDOWS -- note: we want / even on Windows, for TexLive let tmpdir = changePathSeparators tmpdir' #else let tmpdir = tmpdir' #endif - runIOorExplode $ do - putCommonState commonState - doc' <- handleImages opts tmpdir doc - source <- writer opts{ writerExtensions = -- disable use of quote - -- ligatures to avoid bad ligatures like ?` - disableExtension Ext_smart - (writerExtensions opts) } doc' - res <- case baseProg of - "context" -> context2pdf verbosity program pdfargs tmpdir source - "tectonic" -> tectonic2pdf verbosity program pdfargs tmpdir source - prog | prog `elem` ["pdflatex", "lualatex", "xelatex", "latexmk"] - -> tex2pdf verbosity program pdfargs tmpdir source - _ -> return $ Left $ UTF8.fromStringLazy - $ "Unknown program " ++ program - cs <- getCommonState - return (cs, res) - putCommonState newCommonState - return res + doc' <- handleImages opts tmpdir doc + source <- writer opts{ writerExtensions = -- disable use of quote + -- ligatures to avoid bad ligatures like ?` + disableExtension Ext_smart + (writerExtensions opts) } doc' + case baseProg of + "context" -> context2pdf program pdfargs tmpdir source + "tectonic" -> tectonic2pdf program pdfargs tmpdir source + prog | prog `elem` ["pdflatex", "lualatex", "xelatex", "latexmk"] + -> tex2pdf program pdfargs tmpdir source + _ -> return $ Left $ UTF8.fromStringLazy + $ "Unknown program " ++ program + +-- latex has trouble with tildes in paths, which +-- you find in Windows temp dir paths with longer +-- user names (see #777) +withTempDir :: (PandocMonad m, MonadMask m, MonadIO m) + => FilePath -> (FilePath -> m a) -> m a +withTempDir templ action = do + tmp <- liftIO getTemporaryDirectory + uname <- liftIO $ E.catch + (do (ec, sout, _) <- readProcessWithExitCode "uname" ["-o"] "" + if ec == ExitSuccess + then return $ Just $ filter (not . isSpace) sout + else return Nothing) + (\(_ :: E.SomeException) -> return Nothing) + if '~' `elem` tmp || uname == Just "Cygwin" -- see #5451 + then withTempDirectory "." templ action + else withSystemTempDirectory templ action -makeWithWkhtmltopdf :: String -- ^ wkhtmltopdf or path +makeWithWkhtmltopdf :: (PandocMonad m, MonadIO m) + => String -- ^ wkhtmltopdf or path -> [String] -- ^ arguments - -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer + -> (WriterOptions -> Pandoc -> m Text) -- ^ writer -> WriterOptions -- ^ options -> Pandoc -- ^ document - -> PandocIO (Either ByteString ByteString) + -> m (Either ByteString ByteString) makeWithWkhtmltopdf program pdfargs writer opts doc@(Pandoc meta _) = do let mathArgs = case writerHTMLMathMethod opts of -- with MathJax, wait til all math is rendered: @@ -167,16 +164,18 @@ makeWithWkhtmltopdf program pdfargs writer opts doc@(Pandoc meta _) = do verbosity <- getVerbosity liftIO $ html2pdf verbosity program args source -handleImages :: WriterOptions +handleImages :: (PandocMonad m, MonadIO m) + => WriterOptions -> FilePath -- ^ temp dir to store images -> Pandoc -- ^ document - -> PandocIO Pandoc + -> m Pandoc handleImages opts tmpdir doc = fillMediaBag doc >>= extractMedia tmpdir >>= walkM (convertImages opts tmpdir) -convertImages :: WriterOptions -> FilePath -> Inline -> PandocIO Inline +convertImages :: (PandocMonad m, MonadIO m) + => WriterOptions -> FilePath -> Inline -> m Inline convertImages opts tmpdir (Image attr ils (src, tit)) = do img <- liftIO $ convertImage opts tmpdir $ T.unpack src newPath <- @@ -221,33 +220,32 @@ convertImage opts tmpdir fname = do mime = getMimeType fname doNothing = return (Right fname) -tectonic2pdf :: Verbosity -- ^ Verbosity level - -> String -- ^ tex program +tectonic2pdf :: (PandocMonad m, MonadIO m) + => String -- ^ tex program -> [String] -- ^ Arguments to the latex-engine -> FilePath -- ^ temp directory for output -> Text -- ^ tex source - -> PandocIO (Either ByteString ByteString) -tectonic2pdf verbosity program args tmpDir source = do - (exit, log', mbPdf) <- runTectonic verbosity program args tmpDir source + -> m (Either ByteString ByteString) +tectonic2pdf program args tmpDir source = do + (exit, log', mbPdf) <- runTectonic program args tmpDir source case (exit, mbPdf) of (ExitFailure _, _) -> return $ Left $ extractMsg log' (ExitSuccess, Nothing) -> return $ Left "" (ExitSuccess, Just pdf) -> do - missingCharacterWarnings verbosity log' + missingCharacterWarnings log' return $ Right pdf -tex2pdf :: Verbosity -- ^ Verbosity level - -> String -- ^ tex program +tex2pdf :: (PandocMonad m, MonadIO m) + => String -- ^ tex program -> [String] -- ^ Arguments to the latex-engine -> FilePath -- ^ temp directory for output -> Text -- ^ tex source - -> PandocIO (Either ByteString ByteString) -tex2pdf verbosity program args tmpDir source = do + -> m (Either ByteString ByteString) +tex2pdf program args tmpDir source = do let numruns | takeBaseName program == "latexmk" = 1 | "\\tableofcontents" `T.isInfixOf` source = 3 -- to get page numbers | otherwise = 2 -- 1 run won't give you PDF bookmarks - (exit, log', mbPdf) <- runTeXProgram verbosity program args numruns - tmpDir source + (exit, log', mbPdf) <- runTeXProgram program args numruns tmpDir source case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -260,11 +258,11 @@ tex2pdf verbosity program args tmpDir source = do return $ Left $ logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left "" (ExitSuccess, Just pdf) -> do - missingCharacterWarnings verbosity log' + missingCharacterWarnings log' return $ Right pdf -missingCharacterWarnings :: Verbosity -> ByteString -> PandocIO () -missingCharacterWarnings verbosity log' = do +missingCharacterWarnings :: PandocMonad m => ByteString -> m () +missingCharacterWarnings log' = do let ls = BC.lines log' let isMissingCharacterWarning = BC.isPrefixOf "Missing character: " let toCodePoint c @@ -275,7 +273,6 @@ missingCharacterWarnings verbosity log' = do | l <- ls , isMissingCharacterWarning l ] - setVerbosity verbosity mapM_ (report . MissingCharacter) warnings -- parsing output @@ -299,9 +296,10 @@ extractConTeXtMsg log' = do -- running tex programs -runTectonic :: Verbosity -> String -> [String] -> FilePath - -> Text -> PandocIO (ExitCode, ByteString, Maybe ByteString) -runTectonic verbosity program args' tmpDir' source = do +runTectonic :: (PandocMonad m, MonadIO m) + => String -> [String] -> FilePath + -> Text -> m (ExitCode, ByteString, Maybe ByteString) +runTectonic program args' tmpDir' source = do let getOutDir acc (a:b:xs) = if a `elem` ["-o", "--outdir"] then (reverse acc ++ xs, Just b) else getOutDir (b:a:acc) xs @@ -313,6 +311,7 @@ runTectonic verbosity program args' tmpDir' source = do let sourceBL = BL.fromStrict $ UTF8.fromText source let programArgs = ["--outdir", tmpDir] ++ args ++ ["-"] env <- liftIO getEnvironment + verbosity <- getVerbosity when (verbosity >= INFO) $ liftIO $ showVerboseInfo (Just tmpDir) program programArgs env (utf8ToText sourceBL) @@ -329,7 +328,9 @@ runTectonic verbosity program args' tmpDir' source = do -- read a pdf that has been written to a temporary directory, and optionally read -- logs -getResultingPDF :: Maybe String -> String -> PandocIO (Maybe ByteString, Maybe ByteString) +getResultingPDF :: (PandocMonad m, MonadIO m) + => Maybe String -> String + -> m (Maybe ByteString, Maybe ByteString) getResultingPDF logFile pdfFile = do pdfExists <- liftIO $ doesFileExist pdfFile pdf <- if pdfExists @@ -353,9 +354,10 @@ getResultingPDF logFile pdfFile = do -- Run a TeX program on an input bytestring and return (exit code, -- contents of stdout, contents of produced PDF if any). Rerun -- a fixed number of times to resolve references. -runTeXProgram :: Verbosity -> String -> [String] -> Int -> FilePath - -> Text -> PandocIO (ExitCode, ByteString, Maybe ByteString) -runTeXProgram verbosity program args numRuns tmpDir' source = do +runTeXProgram :: (PandocMonad m, MonadIO m) + => String -> [String] -> Int -> FilePath + -> Text -> m (ExitCode, ByteString, Maybe ByteString) +runTeXProgram program args numRuns tmpDir' source = do let isOutdirArg x = "-outdir=" `isPrefixOf` x || "-output-directory=" `isPrefixOf` x let tmpDir = @@ -378,6 +380,7 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do ("TEXMFOUTPUT", tmpDir) : [(k,v) | (k,v) <- env' , k /= "TEXINPUTS" && k /= "TEXMFOUTPUT"] + verbosity <- getVerbosity when (verbosity >= INFO) $ liftIO $ UTF8.readFile file >>= showVerboseInfo (Just tmpDir) program programArgs env'' @@ -398,16 +401,17 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do return (exit, fromMaybe out log', pdf) runTeX 1 -generic2pdf :: Verbosity - -> String +generic2pdf :: (PandocMonad m, MonadIO m) + => String -> [String] -> Text - -> IO (Either ByteString ByteString) -generic2pdf verbosity program args source = do - env' <- getEnvironment + -> m (Either ByteString ByteString) +generic2pdf program args source = do + env' <- liftIO getEnvironment + verbosity <- getVerbosity when (verbosity >= INFO) $ - showVerboseInfo Nothing program args env' source - (exit, out) <- E.catch + liftIO $ showVerboseInfo Nothing program args env' source + (exit, out) <- liftIO $ E.catch (pipeProcess (Just env') program args (BL.fromStrict $ UTF8.fromText source)) (handlePDFProgramNotFound program) @@ -454,19 +458,20 @@ html2pdf verbosity program args source = (ExitSuccess, Nothing) -> Left "" (ExitSuccess, Just pdf) -> Right pdf -context2pdf :: Verbosity -- ^ Verbosity level - -> String -- ^ "context" or path to it +context2pdf :: (PandocMonad m, MonadIO m) + => String -- ^ "context" or path to it -> [String] -- ^ extra arguments -> FilePath -- ^ temp directory for output -> Text -- ^ ConTeXt source - -> PandocIO (Either ByteString ByteString) -context2pdf verbosity program pdfargs tmpDir source = + -> m (Either ByteString ByteString) +context2pdf program pdfargs tmpDir source = do + verbosity <- getVerbosity liftIO $ inDirectory tmpDir $ do let file = "input.tex" BS.writeFile file $ UTF8.fromText source let programArgs = "--batchmode" : pdfargs ++ [file] env' <- getEnvironment - when (verbosity >= INFO) $ + when (verbosity >= INFO) $ liftIO $ UTF8.readFile file >>= showVerboseInfo (Just tmpDir) program programArgs env' (exit, out) <- E.catch diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 09445622d..cfda4bad2 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -599,9 +599,9 @@ parseFromString :: Monad m -> ParserT Sources st m r parseFromString parser str = do oldPos <- getPosition - setPosition $ initialPos "chunk" oldInput <- getInput setInput $ toSources str + setPosition $ initialPos $ sourceName oldPos <> "_chunk" result <- parser spaces setInput oldInput diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 5106f8058..dd3aecdc5 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -55,6 +55,7 @@ module Text.Pandoc.Readers , readCslJson , readBibTeX , readBibLaTeX + , readRTF -- * Miscellaneous , getReader , getDefaultExtensions @@ -102,6 +103,7 @@ import Text.Pandoc.Readers.Man import Text.Pandoc.Readers.CSV import Text.Pandoc.Readers.CslJson import Text.Pandoc.Readers.BibTeX +import Text.Pandoc.Readers.RTF import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Sources (ToSources(..), sourcesToText) @@ -149,6 +151,7 @@ readers = [("native" , TextReader readNative) ,("csljson" , TextReader readCslJson) ,("bibtex" , TextReader readBibTeX) ,("biblatex" , TextReader readBibLaTeX) + ,("rtf" , TextReader readRTF) ] -- | Retrieve reader, extensions based on formatSpec (format+extensions). diff --git a/src/Text/Pandoc/Readers/Custom.hs b/src/Text/Pandoc/Readers/Custom.hs new file mode 100644 index 000000000..9252a9e45 --- /dev/null +++ b/src/Text/Pandoc/Readers/Custom.hs @@ -0,0 +1,83 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Readers.Custom + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Supports custom parsers written in Lua which produce a Pandoc AST. +-} +module Text.Pandoc.Readers.Custom ( readCustom ) where +import Control.Exception +import Control.Monad (when) +import HsLua as Lua hiding (Operation (Div), render) +import Control.Monad.IO.Class (MonadIO) +import Text.Pandoc.Definition +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Logging +import Text.Pandoc.Lua (Global (..), runLua, setGlobals) +import Text.Pandoc.Lua.PandocLua +import Text.Pandoc.Lua.Marshal.Pandoc (peekPandoc) +import Text.Pandoc.Lua.Util (dofileWithTraceback, callWithTraceback, + pcallWithTraceback) +import Text.Pandoc.Options +import Text.Pandoc.Sources (ToSources(..), sourcesToText) +import qualified Data.Text as T + +-- | Convert custom markup to Pandoc. +readCustom :: (PandocMonad m, MonadIO m, ToSources s) + => FilePath -> ReaderOptions -> s -> m Pandoc +readCustom luaFile opts srcs = do + let globals = [ PANDOC_SCRIPT_FILE luaFile ] + res <- runLua $ do + setGlobals globals + stat <- dofileWithTraceback luaFile + -- check for error in lua script (later we'll change the return type + -- to handle this more gracefully): + when (stat /= Lua.OK) + Lua.throwErrorAsException + parseCustom + case res of + Left msg -> throw msg + Right doc -> return doc + where + parseCustom = do + let input = toSources srcs + getglobal "Reader" + push input + push opts + pcallWithTraceback 2 1 >>= \case + OK -> forcePeek $ peekPandoc top + ErrRun -> do + -- Caught a runtime error. Check if parsing might work if we + -- pass a string instead of a Sources list, then retry. + runPeek (peekText top) >>= \case + Failure {} -> + -- not a string error object. Bail! + throwErrorAsException + Success errmsg -> do + if "string expected, got pandoc Sources" `T.isInfixOf` errmsg + then do + pop 1 + _ <- unPandocLua $ do + report $ Deprecated "old Reader function signature" $ + T.unlines + [ "Reader functions should accept a sources list; " + , "functions expecting `string` input are deprecated. " + , "Use `tostring` to convert the first argument to a " + , "string." + ] + getglobal "Reader" + push $ sourcesToText input -- push sources as string + push opts + callWithTraceback 2 1 + forcePeek $ peekPandoc top + else + -- nothing we can do here + throwErrorAsException + _ -> -- not a runtime error, we won't be able to recover from that + throwErrorAsException diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index c49b82ccf..be90eb23e 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -19,7 +19,7 @@ import Data.Foldable (asum) import Data.Generics import Data.List (intersperse,elemIndex) import Data.List.NonEmpty (nonEmpty) -import Data.Maybe (fromMaybe,mapMaybe) +import Data.Maybe (catMaybes,fromMaybe,mapMaybe,maybeToList) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL @@ -316,7 +316,7 @@ List of all DocBook tags, with [x] indicating implemented, [ ] postcode - A postal code in an address [x] preface - Introductory matter preceding the first chapter of a book [ ] prefaceinfo - Meta-information for a Preface -[ ] primary - The primary word or phrase under which an index term should be +[x] primary - The primary word or phrase under which an index term should be sorted [ ] primaryie - A primary term in an index entry, not in the text [ ] printhistory - The printing history of a document @@ -385,7 +385,7 @@ List of all DocBook tags, with [x] indicating implemented, [o] screeninfo - Information about how a screen shot was produced [ ] screenshot - A representation of what the user sees or might see on a computer screen -[ ] secondary - A secondary word or phrase in an index term +[x] secondary - A secondary word or phrase in an index term [ ] secondaryie - A secondary term in an index entry, rather than in the text [x] sect1 - A top-level section of document [x] sect1info - Meta-information for a Sect1 @@ -461,7 +461,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] td - A table entry in an HTML table [x] term - The word or phrase being defined or described in a variable list [ ] termdef - An inline term definition -[ ] tertiary - A tertiary word or phrase in an index term +[x] tertiary - A tertiary word or phrase in an index term [ ] tertiaryie - A tertiary term in an index entry, rather than in the text [ ] textdata - Pointer to external text data [ ] textobject - A wrapper for a text description of an object and its @@ -829,7 +829,7 @@ parseBlock (Elem e) = "section" -> gets dbSectionLevel >>= sect . (+1) "simplesect" -> gets dbSectionLevel >>= - sectWith (attrValue "id" e,["unnumbered"],[]) . (+1) + sectWith(attrValue "id" e) ["unnumbered"] [] . (+1) "refsect1" -> sect 1 "refsect2" -> sect 2 "refsect3" -> sect 3 @@ -907,6 +907,7 @@ parseBlock (Elem e) = let classes' = case attrValue "language" e of "" -> [] x -> [x] + ++ ["numberLines" | attrValue "linenumbering" e == "numbered"] return $ codeBlockWith (attrValue "id" e, classes', []) $ trimNl $ strContentRecursive e parseBlockquote = do @@ -993,8 +994,8 @@ parseBlock (Elem e) = (TableHead nullAttr $ toHeaderRow headrows) [TableBody nullAttr 0 [] $ map toRow bodyrows] (TableFoot nullAttr []) - sect n = sectWith (attrValue "id" e,[],[]) n - sectWith attr n = do + sect n = sectWith(attrValue "id" e) [] [] n + sectWith elId classes attrs n = do isbook <- gets dbBook let n' = if isbook || n == 0 then n + 1 else n headerText <- case filterChild (named "title") e `mplus` @@ -1005,7 +1006,14 @@ parseBlock (Elem e) = modify $ \st -> st{ dbSectionLevel = n } b <- getBlocks e modify $ \st -> st{ dbSectionLevel = n - 1 } - return $ headerWith attr n' headerText <> b + return $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr++attrs) n' headerText <> b + titleabbrevElAsAttr = do + txt <- case filterChild (named "titleabbrev") e `mplus` + (filterChild (named "info") e >>= + filterChild (named "titleabbrev")) of + Just t -> Just ("titleabbrev", strContentRecursive t) + Nothing -> Nothing + return txt lineItems = mapM getInlines $ filterChildren (named "line") e -- | Admonitions are parsed into a div. Following other Docbook tools that output HTML, -- we parse the optional title as a div with the @title@ class, and give the @@ -1079,6 +1087,17 @@ elementToStr :: Content -> Content elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x +childElTextAsAttr :: Text -> Element -> Maybe (Text, Text) +childElTextAsAttr n e = case findChild q e of + Nothing -> Nothing + Just childEl -> Just (n, strContentRecursive childEl) + where q = QName n (Just "http://docbook.org/ns/docbook") Nothing + +attrValueAsOptionalAttr :: Text -> Element -> Maybe (Text, Text) +attrValueAsOptionalAttr n e = case attrValue n e of + "" -> Nothing + _ -> Just (n, attrValue n e) + parseInline :: PandocMonad m => Content -> DB m Inlines parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = @@ -1093,6 +1112,28 @@ parseInline (Elem e) = if ident /= "" || classes /= [] then innerInlines (spanWith (ident,classes,[])) else innerInlines id + "indexterm" -> do + let ident = attrValue "id" e + let classes = T.words $ attrValue "role" e + let attrs = + -- In DocBook, <primary>, <secondary>, <tertiary>, <see>, and <seealso> + -- have mixed content models. However, because we're representing these + -- elements in Pandoc's AST as attributes of a phrase, we flatten all + -- the descendant content of these elements. + [ childElTextAsAttr "primary" e + , childElTextAsAttr "secondary" e + , childElTextAsAttr "tertiary" e + , childElTextAsAttr "see" e + , childElTextAsAttr "seealso" e + , attrValueAsOptionalAttr "significance" e + , attrValueAsOptionalAttr "startref" e + , attrValueAsOptionalAttr "scope" e + , attrValueAsOptionalAttr "class" e + -- We don't do anything with the "pagenum" attribute, because these only + -- occur within literal <index> sections, which is not supported by Pandoc, + -- because Pandoc has no concept of pages. + ] + return $ spanWith (ident, ("indexterm" : classes), (catMaybes attrs)) mempty "equation" -> equation e displayMath "informalequation" -> equation e displayMath "inlineequation" -> equation e math diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index c06adf7e3..5c8f20c18 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -246,8 +246,8 @@ runToText _ = "" parPartToText :: ParPart -> T.Text parPartToText (PlainRun run) = runToText run -parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs -parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs +parPartToText (InternalHyperLink _ children) = T.concat $ map parPartToText children +parPartToText (ExternalHyperLink _ children) = T.concat $ map parPartToText children parPartToText _ = "" blacklistedCharStyles :: [CharStyleName] @@ -322,6 +322,7 @@ runToInlines (InlineDrawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" +runToInlines InlineDiagram = return $ spanWith ("", ["diagram"], []) $ text "[DIAGRAM]" extentToAttr :: Extent -> Attr extentToAttr (Just (w, h)) = @@ -434,18 +435,21 @@ parPartToInlines' (Drawing fp title alt bs ext) = do return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt parPartToInlines' Chart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" -parPartToInlines' (InternalHyperLink anchor runs) = do - ils <- smushInlines <$> mapM runToInlines runs +parPartToInlines' Diagram = + return $ spanWith ("", ["diagram"], []) $ text "[DIAGRAM]" +parPartToInlines' (InternalHyperLink anchor children) = do + ils <- smushInlines <$> mapM parPartToInlines' children return $ link ("#" <> anchor) "" ils -parPartToInlines' (ExternalHyperLink target runs) = do - ils <- smushInlines <$> mapM runToInlines runs +parPartToInlines' (ExternalHyperLink target children) = do + ils <- smushInlines <$> mapM parPartToInlines' children return $ link target "" ils parPartToInlines' (PlainOMath exps) = return $ math $ writeTeX exps -parPartToInlines' (Field info runs) = +parPartToInlines' (Field info children) = case info of - HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs - UnknownField -> smushInlines <$> mapM runToInlines runs + HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children + PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children + _ -> smushInlines <$> mapM parPartToInlines' children parPartToInlines' NullParPart = return mempty isAnchorSpan :: Inline -> Bool @@ -532,34 +536,36 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils extraAttr :: (Eq (StyleName a), HasStyleName a) => a -> Attr extraAttr s = ("", [], [("custom-style", fromStyleName $ getStyleName s)]) -parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks) -parStyleToTransform pPr = case pStyle pPr of - c@(getStyleName -> styleName):cs - | styleName `elem` divsToKeep -> do - let pPr' = pPr { pStyle = cs } - transform <- parStyleToTransform pPr' - return $ divWith ("", [normalizeToClassName styleName], []) . transform - | styleName `elem` listParagraphStyles -> do - let pPr' = pPr { pStyle = cs, indentation = Nothing} - transform <- parStyleToTransform pPr' - return $ divWith ("", [normalizeToClassName styleName], []) . transform - | otherwise -> do - let pPr' = pPr { pStyle = cs } - transform <- parStyleToTransform pPr' - styles <- asks (isEnabled Ext_styles . docxOptions) - return $ - (if styles then divWith (extraAttr c) else id) - . (if isBlockQuote c then blockQuote else id) - . transform - [] - | Just left <- indentation pPr >>= leftParIndent -> do - let pPr' = pPr { indentation = Nothing } - hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent - transform <- parStyleToTransform pPr' - return $ if (left - hang) > 0 - then blockQuote . transform - else transform - | otherwise -> return id +paragraphStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks) +paragraphStyleToTransform pPr = + let stylenames = map getStyleName (pStyle pPr) + transform = if (`elem` listParagraphStyles) `any` stylenames || relativeIndent pPr <= 0 + then id + else blockQuote + in do + extStylesEnabled <- asks (isEnabled Ext_styles . docxOptions) + return $ foldr (\parStyle transform' -> + (parStyleToTransform extStylesEnabled parStyle) . transform' + ) transform (pStyle pPr) + +parStyleToTransform :: Bool -> ParStyle -> Blocks -> Blocks +parStyleToTransform extStylesEnabled parStyle@(getStyleName -> styleName) + | (styleName `elem` divsToKeep) || (styleName `elem` listParagraphStyles) = + divWith ("", [normalizeToClassName styleName], []) + | otherwise = + (if extStylesEnabled then divWith (extraAttr parStyle) else id) + . (if isBlockQuote parStyle then blockQuote else id) + +-- The relative indent is the indentation minus the indentation of the parent style. +-- This tells us whether this paragraph in particular was indented more and thus +-- should be considered a block quote. +relativeIndent :: ParagraphStyle -> Integer +relativeIndent pPr = + let pStyleLeft = fromMaybe 0 $ pStyleIndentation pPr >>= leftParIndent + pStyleHang = fromMaybe 0 $ pStyleIndentation pPr >>= hangingParIndent + left = fromMaybe pStyleLeft $ indentation pPr >>= leftParIndent + hang = fromMaybe pStyleHang $ indentation pPr >>= hangingParIndent + in (left - hang) - (pStyleLeft - pStyleHang) normalizeToClassName :: (FromStyleName a) => a -> T.Text normalizeToClassName = T.map go . fromStyleName @@ -578,7 +584,7 @@ bodyPartToBlocks (Paragraph pPr parparts) local (\s -> s{ docxInBidi = True }) (bodyPartToBlocks (Paragraph pPr' parparts)) | isCodeDiv pPr = do - transform <- parStyleToTransform pPr + transform <- paragraphStyleToTransform pPr return $ transform $ codeBlock $ @@ -605,7 +611,7 @@ bodyPartToBlocks (Paragraph pPr parparts) else prevParaIls <> space) <> ils' handleInsertion = do modify $ \s -> s {docxPrevPara = mempty} - transform <- parStyleToTransform pPr' + transform <- paragraphStyleToTransform pPr' return $ transform $ paraOrPlain ils'' opts <- asks docxOptions case (pChange pPr', readerTrackChanges opts) of @@ -620,7 +626,7 @@ bodyPartToBlocks (Paragraph pPr parparts) , AllChanges) -> do let attr = ("", ["paragraph-insertion"], addAuthorAndDate cAuthor cDate) insertMark = spanWith attr mempty - transform <- parStyleToTransform pPr' + transform <- paragraphStyleToTransform pPr' return $ transform $ paraOrPlain $ ils'' <> insertMark (Just (TrackedChange Deletion _), AcceptChanges) -> do @@ -632,7 +638,7 @@ bodyPartToBlocks (Paragraph pPr parparts) , AllChanges) -> do let attr = ("", ["paragraph-deletion"], addAuthorAndDate cAuthor cDate) insertMark = spanWith attr mempty - transform <- parStyleToTransform pPr' + transform <- paragraphStyleToTransform pPr' return $ transform $ paraOrPlain $ ils'' <> insertMark _ -> handleInsertion diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index 442bc3466..5f090b6be 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -21,8 +21,11 @@ import Text.Parsec import Text.Parsec.Text (Parser) type URL = T.Text +type Anchor = T.Text data FieldInfo = HyperlinkField URL + -- The boolean indicates whether the field is a hyperlink. + | PagerefField Anchor Bool | UnknownField deriving (Show) @@ -33,6 +36,8 @@ fieldInfo :: Parser FieldInfo fieldInfo = try (HyperlinkField <$> hyperlink) <|> + try ((uncurry PagerefField) <$> pageref) + <|> return UnknownField escapedQuote :: Parser T.Text @@ -72,3 +77,23 @@ hyperlink = do ("\\l", s) : _ -> farg <> "#" <> s _ -> farg return url + +-- See §17.16.5.45 +pagerefSwitch :: Parser (T.Text, T.Text) +pagerefSwitch = do + sw <- string "\\h" + spaces + farg <- fieldArgument + return (T.pack sw, farg) + +pageref :: Parser (Anchor, Bool) +pageref = do + many space + string "PAGEREF" + spaces + farg <- fieldArgument + switches <- spaces *> many pagerefSwitch + let isLink = case switches of + ("\\h", _) : _ -> True + _ -> False + return (farg, isLink) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index dbb16a821..87a3aebef 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , archiveToDocxWithWarnings , getStyleNames , pHeading + , pStyleIndentation , constructBogusParStyleData , leftBiasedMergeRunStyle , rowsToRowspans @@ -92,14 +93,13 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes deriving Show data ReaderState = ReaderState { stateWarnings :: [T.Text] - , stateFldCharState :: FldCharState + , stateFldCharState :: [FldCharState] } deriving Show data FldCharState = FldCharOpen | FldCharFieldInfo FieldInfo - | FldCharContent FieldInfo [Run] - | FldCharClosed + | FldCharContent FieldInfo [ParPart] deriving (Show) data DocxError = DocxError @@ -194,11 +194,6 @@ data Notes = Notes NameSpaces data Comments = Comments NameSpaces (M.Map T.Text Element) deriving Show -data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer - , rightParIndent :: Maybe Integer - , hangingParIndent :: Maybe Integer} - deriving Show - data ChangeType = Insertion | Deletion deriving Show @@ -318,12 +313,13 @@ data ParPart = PlainRun Run | CommentStart CommentId Author (Maybe CommentDate) [BodyPart] | CommentEnd CommentId | BookMark BookMarkId Anchor - | InternalHyperLink Anchor [Run] - | ExternalHyperLink URL [Run] + | InternalHyperLink Anchor [ParPart] + | ExternalHyperLink URL [ParPart] | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | Chart -- placeholder for now + | Diagram -- placeholder for now | PlainOMath [Exp] - | Field FieldInfo [Run] + | Field FieldInfo [ParPart] | NullParPart -- when we need to return nothing, but -- not because of an error. deriving Show @@ -333,6 +329,7 @@ data Run = Run RunStyle [RunElem] | Endnote [BodyPart] | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | InlineChart -- placeholder + | InlineDiagram -- placeholder deriving Show data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen @@ -375,7 +372,7 @@ archiveToDocxWithWarnings archive = do , envDocXmlPath = docXmlPath } rState = ReaderState { stateWarnings = [] - , stateFldCharState = FldCharClosed + , stateFldCharState = [] } (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState case eitherDoc of @@ -437,6 +434,7 @@ getStyleNames = fmap getStyleName constructBogusParStyleData :: ParaStyleName -> ParStyle constructBogusParStyleData stName = ParStyle { headingLev = Nothing + , indent = Nothing , numInfo = Nothing , psParentStyle = Nothing , pStyleName = stName @@ -507,9 +505,7 @@ archiveToRelationships archive docXmlPath = filePathIsMedia :: FilePath -> Bool filePathIsMedia fp = - let (dir, _) = splitFileName fp - in - (dir == "word/media/") + "media" `elem` splitDirectories (takeDirectory fp) lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do @@ -673,20 +669,6 @@ elemToCell ns element | isElem ns "w" "tc" element = return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents elemToCell _ _ = throwError WrongElem -elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation -elemToParIndentation ns element | isElem ns "w" "ind" element = - Just ParIndentation { - leftParIndent = - findAttrByName ns "w" "left" element >>= - stringToInteger - , rightParIndent = - findAttrByName ns "w" "right" element >>= - stringToInteger - , hangingParIndent = - findAttrByName ns "w" "hanging" element >>= - stringToInteger } -elemToParIndentation _ _ = Nothing - testBitMask :: Text -> Int -> Bool testBitMask bitMaskS n = case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of @@ -699,6 +681,9 @@ pHeading = getParStyleField headingLev . pStyle pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) pNumInfo = getParStyleField numInfo . pStyle +pStyleIndentation :: ParagraphStyle -> Maybe ParIndentation +pStyleIndentation style = (getParStyleField indent . pStyle) style + elemToBodyPart :: NameSpaces -> Element -> D BodyPart elemToBodyPart ns element | isElem ns "w" "p" element @@ -715,28 +700,31 @@ elemToBodyPart ns element elemToBodyPart ns element | isElem ns "w" "p" element = do parstyle <- elemToParagraphStyle ns element <$> asks envParStyles - parparts <- mapD (elemToParPart ns) (elChildren element) + parparts' <- mapD (elemToParPart ns) (elChildren element) + fldCharState <- gets stateFldCharState + modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState} -- Word uses list enumeration for numbered headings, so we only -- want to infer a list from the styles if it is NOT a heading. - case pHeading parstyle of - Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do - levelInfo <- lookupLevel numId lvl <$> asks envNumbering - return $ ListItem parstyle numId lvl levelInfo parparts - _ -> let - hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) - - hasSimpleTableField = fromMaybe False $ do - fldSimple <- findChildByName ns "w" "fldSimple" element - instr <- findAttrByName ns "w" "instr" fldSimple - pure ("Table" `elem` T.words instr) - - hasComplexTableField = fromMaybe False $ do - instrText <- findElementByName ns "w" "instrText" element - pure ("Table" `elem` T.words (strContent instrText)) - - in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) - then return $ TblCaption parstyle parparts - else return $ Paragraph parstyle parparts + let parparts = parparts' ++ (openFldCharsToParParts fldCharState) in + case pHeading parstyle of + Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do + levelInfo <- lookupLevel numId lvl <$> asks envNumbering + return $ ListItem parstyle numId lvl levelInfo parparts + _ -> let + hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) + + hasSimpleTableField = fromMaybe False $ do + fldSimple <- findChildByName ns "w" "fldSimple" element + instr <- findAttrByName ns "w" "instr" fldSimple + pure ("Table" `elem` T.words instr) + + hasComplexTableField = fromMaybe False $ do + instrText <- findElementByName ns "w" "instrText" element + pure ("Table" `elem` T.words (strContent instrText)) + + in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) + then return $ TblCaption parstyle parparts + else return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do @@ -768,14 +756,30 @@ lookupRelationship docLocation relid rels = where pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels +openFldCharsToParParts :: [FldCharState] -> [ParPart] +openFldCharsToParParts [] = [] +openFldCharsToParParts (FldCharContent info children : ancestors) = case openFldCharsToParParts ancestors of + Field parentInfo siblings : _ -> [Field parentInfo $ siblings ++ [Field info $ reverse children]] + _ -> [Field info $ reverse children] +openFldCharsToParParts (_ : ancestors) = openFldCharsToParParts ancestors + +emptyFldCharContents :: [FldCharState] -> [FldCharState] +emptyFldCharContents = map + (\x -> case x of + FldCharContent info _ -> FldCharContent info [] + _ -> x) + expandDrawingId :: T.Text -> D (FilePath, B.ByteString) expandDrawingId s = do location <- asks envLocation target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships) case target of Just filepath -> do - bytes <- asks (lookup ("word/" ++ filepath) . envMedia) - case bytes of + media <- asks envMedia + let filepath' = case filepath of + ('/':rest) -> rest + _ -> "word/" ++ filepath + case lookup filepath' media of Just bs -> return (filepath, bs) Nothing -> throwError DocxError Nothing -> throwError DocxError @@ -789,44 +793,6 @@ getTitleAndAlt ns element = in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart -elemToParPart ns element - | isElem ns "w" "r" element - , Just drawingElem <- findChildByName ns "w" "drawing" element - , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture" - , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem - = let (title, alt) = getTitleAndAlt ns drawingElem - a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrByName ns "r" "embed" - in - case drawing of - Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) - Nothing -> throwError WrongElem --- The two cases below are an attempt to deal with images in deprecated vml format. --- Todo: check out title and attr for deprecated format. -elemToParPart ns element - | isElem ns "w" "r" element - , Just _ <- findChildByName ns "w" "pict" element = - let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrByName ns "r" "id" - in - case drawing of - Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing) - Nothing -> throwError WrongElem -elemToParPart ns element - | isElem ns "w" "r" element - , Just objectElem <- findChildByName ns "w" "object" element - , Just shapeElem <- findChildByName ns "v" "shape" objectElem - , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem - , Just drawingId <- findAttrByName ns "r" "id" imagedataElem - = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing) --- Chart -elemToParPart ns element - | isElem ns "w" "r" element - , Just drawingElem <- findChildByName ns "w" "drawing" element - , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart" - , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem - = return Chart {- The next one is a bit complicated. fldChar fields work by first having a <w:fldChar fldCharType="begin"> in a run, then a run with @@ -858,8 +824,13 @@ example (omissions and my comments in brackets): So we do this in a number of steps. If we encounter the fldchar begin tag, we start open a fldchar state variable (see state above). We add the instrtext to it as FieldInfo. Then we close that and start adding -the runs when we get to separate. Then when we get to end, we produce -the Field type with appropriate FieldInfo and Runs. +the children when we get to separate. Then when we get to end, we produce +the Field type with appropriate FieldInfo and ParParts. + +Since there can be nested fields, the fldchar state needs to be a stack, +so we can have multiple fldchars open at the same time. When a fldchar is +closed, we either add the resulting field to its parent or we return it if +there is no parent. -} elemToParPart ns element | isElem ns "w" "r" element @@ -867,78 +838,142 @@ elemToParPart ns element , Just fldCharType <- findAttrByName ns "w" "fldCharType" fldChar = do fldCharState <- gets stateFldCharState case fldCharState of - FldCharClosed | fldCharType == "begin" -> do - modify $ \st -> st {stateFldCharState = FldCharOpen} + _ | fldCharType == "begin" -> do + modify $ \st -> st {stateFldCharState = FldCharOpen : fldCharState} + return NullParPart + FldCharFieldInfo info : ancestors | fldCharType == "separate" -> do + modify $ \st -> st {stateFldCharState = FldCharContent info [] : ancestors} return NullParPart - FldCharFieldInfo info | fldCharType == "separate" -> do - modify $ \st -> st {stateFldCharState = FldCharContent info []} + -- Some fields have no content, since Pandoc doesn't understand any of those fields, we can just close it. + FldCharFieldInfo _ : ancestors | fldCharType == "end" -> do + modify $ \st -> st {stateFldCharState = ancestors} return NullParPart - FldCharContent info runs | fldCharType == "end" -> do - modify $ \st -> st {stateFldCharState = FldCharClosed} - return $ Field info $ reverse runs + [FldCharContent info children] | fldCharType == "end" -> do + modify $ \st -> st {stateFldCharState = []} + return $ Field info $ reverse children + FldCharContent info children : FldCharContent parentInfo siblings : ancestors | fldCharType == "end" -> + let parent = FldCharContent parentInfo $ (Field info (reverse children)) : siblings in do + modify $ \st -> st {stateFldCharState = parent : ancestors} + return NullParPart _ -> throwError WrongElem elemToParPart ns element | isElem ns "w" "r" element , Just instrText <- findChildByName ns "w" "instrText" element = do fldCharState <- gets stateFldCharState case fldCharState of - FldCharOpen -> do + FldCharOpen : ancestors -> do info <- eitherToD $ parseFieldInfo $ strContent instrText - modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} + modify $ \st -> st {stateFldCharState = FldCharFieldInfo info : ancestors} return NullParPart _ -> return NullParPart -elemToParPart ns element +{- +There is an open fldchar, so we calculate the element and add it to the +children. For this we need to first change the fldchar state to an empty +stack to avoid descendants of children simply being added to the state instead +of to their direct parent element. This would happen in the case of a +w:hyperlink element for example. +-} +elemToParPart ns element = do + fldCharState <- gets stateFldCharState + case fldCharState of + FldCharContent info children : ancestors -> do + modify $ \st -> st {stateFldCharState = []} + parPart <- elemToParPart' ns element `catchError` \_ -> return NullParPart + modify $ \st -> st{stateFldCharState = FldCharContent info (parPart : children) : ancestors} + return NullParPart + _ -> elemToParPart' ns element + +elemToParPart' :: NameSpaces -> Element -> D ParPart +elemToParPart' ns element + | isElem ns "w" "r" element + , Just drawingElem <- findChildByName ns "w" "drawing" element + , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture" + , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem + = let (title, alt) = getTitleAndAlt ns drawingElem + a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem + >>= findAttrByName ns "r" "embed" + in + case drawing of + Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) + Nothing -> throwError WrongElem +-- The two cases below are an attempt to deal with images in deprecated vml format. +-- Todo: check out title and attr for deprecated format. +elemToParPart' ns element + | isElem ns "w" "r" element + , Just _ <- findChildByName ns "w" "pict" element = + let drawing = findElement (elemName ns "v" "imagedata") element + >>= findAttrByName ns "r" "id" + in + case drawing of + Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing) + Nothing -> throwError WrongElem +elemToParPart' ns element + | isElem ns "w" "r" element + , Just objectElem <- findChildByName ns "w" "object" element + , Just shapeElem <- findChildByName ns "v" "shape" objectElem + , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem + , Just drawingId <- findAttrByName ns "r" "id" imagedataElem + = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing) +-- Diagram +elemToParPart' ns element + | isElem ns "w" "r" element + , Just drawingElem <- findChildByName ns "w" "drawing" element + , d_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram" + , Just _ <- findElement (QName "relIds" (Just d_ns) (Just "dgm")) drawingElem + = return Diagram +-- Chart +elemToParPart' ns element + | isElem ns "w" "r" element + , Just drawingElem <- findChildByName ns "w" "drawing" element + , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart" + , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem + = return Chart +elemToParPart' ns element | isElem ns "w" "r" element = do run <- elemToRun ns element - -- we check to see if we have an open FldChar in state that we're - -- recording. - fldCharState <- gets stateFldCharState - case fldCharState of - FldCharContent info runs -> do - modify $ \st -> st{stateFldCharState = FldCharContent info (run : runs)} - return NullParPart - _ -> return $ PlainRun run -elemToParPart ns element + return $ PlainRun run +elemToParPart' ns element | Just change <- getTrackedChange ns element = do runs <- mapD (elemToRun ns) (elChildren element) return $ ChangedRuns change runs -elemToParPart ns element +elemToParPart' ns element | isElem ns "w" "bookmarkStart" element , Just bmId <- findAttrByName ns "w" "id" element , Just bmName <- findAttrByName ns "w" "name" element = return $ BookMark bmId bmName -elemToParPart ns element +elemToParPart' ns element | isElem ns "w" "hyperlink" element , Just relId <- findAttrByName ns "r" "id" element = do location <- asks envLocation - runs <- mapD (elemToRun ns) (elChildren element) + children <- mapD (elemToParPart ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of Just target -> case findAttrByName ns "w" "anchor" element of - Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs - Nothing -> return $ ExternalHyperLink target runs - Nothing -> return $ ExternalHyperLink "" runs -elemToParPart ns element + Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) children + Nothing -> return $ ExternalHyperLink target children + Nothing -> return $ ExternalHyperLink "" children +elemToParPart' ns element | isElem ns "w" "hyperlink" element , Just anchor <- findAttrByName ns "w" "anchor" element = do - runs <- mapD (elemToRun ns) (elChildren element) - return $ InternalHyperLink anchor runs -elemToParPart ns element + children <- mapD (elemToParPart ns) (elChildren element) + return $ InternalHyperLink anchor children +elemToParPart' ns element | isElem ns "w" "commentRangeStart" element , Just cmtId <- findAttrByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem Nothing -> throwError WrongElem -elemToParPart ns element +elemToParPart' ns element | isElem ns "w" "commentRangeEnd" element , Just cmtId <- findAttrByName ns "w" "id" element = return $ CommentEnd cmtId -elemToParPart ns element +elemToParPart' ns element | isElem ns "m" "oMath" element = fmap PlainOMath (eitherToD $ readOMML $ showElement element) -elemToParPart _ _ = throwError WrongElem +elemToParPart' _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element @@ -987,6 +1022,11 @@ childElemToRun ns element , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) element = return InlineChart childElemToRun ns element + | isElem ns "w" "drawing" element + , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram" + , Just _ <- findElement (QName "relIds" (Just c_ns) (Just "dgm")) element + = return InlineDiagram +childElemToRun ns element | isElem ns "w" "footnoteReference" element , Just fnId <- findAttrByName ns "w" "id" element = do notes <- asks envNotes @@ -1071,8 +1111,7 @@ elemToParagraphStyle ns element sty in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style , indentation = - findChildByName ns "w" "ind" pPr >>= - elemToParIndentation ns + getIndentation ns element , dropCap = case findChildByName ns "w" "framePr" pPr >>= diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index 0d7271d6a..df942579a 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Docx.Parse.Styles ( , CharStyle , ParaStyleId(..) , ParStyle(..) + , ParIndentation(..) , RunStyle(..) , HasStyleName , StyleName @@ -37,6 +38,7 @@ module Text.Pandoc.Readers.Docx.Parse.Styles ( , fromStyleName , fromStyleId , stringToInteger + , getIndentation , getNumInfo , elemToRunStyle , defaultRunStyle @@ -115,7 +117,13 @@ data RunStyle = RunStyle { isBold :: Maybe Bool } deriving Show +data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer + , rightParIndent :: Maybe Integer + , hangingParIndent :: Maybe Integer} + deriving Show + data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) + , indent :: Maybe ParIndentation , numInfo :: Maybe (T.Text, T.Text) , psParentStyle :: Maybe ParStyle , pStyleName :: ParaStyleName @@ -290,6 +298,22 @@ getHeaderLevel ns element , n > 0 = Just (styleName, fromInteger n) getHeaderLevel _ _ = Nothing +getIndentation :: NameSpaces -> Element -> Maybe ParIndentation +getIndentation ns el = do + indElement <- findChildByName ns "w" "pPr" el >>= + findChildByName ns "w" "ind" + return $ ParIndentation + { + leftParIndent = findAttrByName ns "w" "left" indElement <|> + findAttrByName ns "w" "start" indElement >>= + stringToInteger + , rightParIndent = findAttrByName ns "w" "right" indElement <|> + findAttrByName ns "w" "end" indElement >>= + stringToInteger + , hangingParIndent = findAttrByName ns "w" "hanging" indElement >>= + stringToInteger + } + getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") @@ -314,6 +338,7 @@ elemToParStyleData ns element parentStyle = Just $ ParStyle { headingLev = getHeaderLevel ns element + , indent = getIndentation ns element , numInfo = getNumInfo ns element , psParentStyle = parentStyle , pStyleName = styleName diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index fdf4f28e0..8aa2646b2 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -551,7 +551,7 @@ pFigure = try $ do let caption = fromMaybe mempty mbcap case B.toList <$> mbimg of Just [Image attr _ (url, tit)] -> - return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption + return $ B.simpleFigureWith attr caption url tit _ -> mzero pCodeBlock :: PandocMonad m => TagParser m Blocks @@ -643,7 +643,7 @@ pQ = do case lookup "cite" attrs of Just url -> do let uid = fromMaybe mempty $ - lookup "name" attrs <> lookup "id" attrs + lookup "name" attrs <|> lookup "id" attrs let cls = maybe [] T.words $ lookup "class" attrs url' <- canonicalizeUrl url makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')]) @@ -705,20 +705,18 @@ pLineBreak = do pLink :: PandocMonad m => TagParser m Inlines pLink = try $ do - tag <- pSatisfy $ tagOpenLit "a" (const True) + tag@(TagOpen _ attr') <- pSatisfy $ tagOpenLit "a" (const True) let title = fromAttrib "title" tag - -- take id from id attribute if present, otherwise name - let uid = fromMaybe (fromAttrib "name" tag) $ - maybeFromAttrib "id" tag - let cls = T.words $ fromAttrib "class" tag + let attr = toAttr $ filter (\(k,_) -> k /= "title" && k /= "href") attr' lab <- mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of Nothing -> - return $ extractSpaces (B.spanWith (uid, cls, [])) lab + return $ extractSpaces (B.spanWith attr) lab Just url' -> do url <- canonicalizeUrl url' - return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab + return $ extractSpaces + (B.linkWith attr (escapeURI url) title) lab pImage :: PandocMonad m => TagParser m Inlines pImage = do diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs index bd8d7c96c..a8cdf1de2 100644 --- a/src/Text/Pandoc/Readers/HTML/Parsing.hs +++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs @@ -30,11 +30,11 @@ module Text.Pandoc.Readers.HTML.Parsing ) where -import Control.Monad (guard, void, mzero) +import Control.Monad (void, mzero, mplus) import Data.Maybe (fromMaybe) import Data.Text (Text) import Text.HTML.TagSoup - ( Attribute, Tag (..), isTagText, isTagPosition, isTagOpen, isTagClose, (~==) ) + ( Attribute, Tag (..), isTagPosition, isTagOpen, isTagClose, (~==) ) import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition (Attr) import Text.Pandoc.Parsing @@ -118,9 +118,11 @@ pCloses tagtype = try $ do _ -> mzero pBlank :: PandocMonad m => TagParser m () -pBlank = try $ do - (TagText str) <- pSatisfy isTagText - guard $ T.all isSpace str +pBlank = void $ pSatisfy isBlank + where + isBlank (TagText t) = T.all isSpace t + isBlank (TagComment _) = True + isBlank _ = False pLocation :: PandocMonad m => TagParser m () pLocation = do @@ -218,9 +220,10 @@ maybeFromAttrib _ _ = Nothing mkAttr :: [(Text, Text)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) - where attribsId = fromMaybe "" $ lookup "id" attr + where attribsId = fromMaybe "" $ lookup "id" attr `mplus` lookup "name" attr attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes - attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr + attribsKV = filter (\(k,_) -> k /= "class" && k /= "id" && k /= "name") + attr epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr toAttr :: [(Text, Text)] -> Attr diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index 6e62e12f5..b23a2abc8 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -16,7 +16,7 @@ HTML table parser. module Text.Pandoc.Readers.HTML.Table (pTable) where import Control.Applicative ((<|>)) -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, isJust) import Data.Either (lefts, rights) import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) @@ -27,12 +27,13 @@ import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Parsing ( eof, lookAhead, many, many1, manyTill, option, optional - , optionMaybe, skipMany, try) + , optionMaybe, skipMany, try ) import Text.Pandoc.Readers.HTML.Parsing import Text.Pandoc.Readers.HTML.Types (TagParser) import Text.Pandoc.Shared (onlySimpleTableCells, safeRead) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B +import Control.Monad (guard) -- | Parses a @<col>@ element, returning the column's width. -- An Either value is used: Left i means a "relative length" with @@ -183,11 +184,13 @@ pTableBody :: PandocMonad m -> TagParser m TableBody pTableBody block = try $ do skipMany pBlank - attribs <- option [] $ getAttribs <$> pSatisfy (matchTagOpen "tbody" []) - <* skipMany pBlank + mbattribs <- option Nothing $ Just . getAttribs <$> + pSatisfy (matchTagOpen "tbody" []) <* skipMany pBlank bodyheads <- many (pHeaderRow block) - (rowheads, rows) <- unzip <$> many1 (pRow block <* skipMany pBlank) + (rowheads, rows) <- unzip <$> many (pRow block <* skipMany pBlank) optional $ pSatisfy (matchTagClose "tbody") + guard $ isJust mbattribs || not (null bodyheads && null rows) + let attribs = fromMaybe [] mbattribs return $ TableBody (toAttr attribs) (foldr max 0 rowheads) bodyheads rows where getAttribs (TagOpen _ attribs) = attribs diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index cd1093109..8e742a888 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -19,6 +19,7 @@ import Data.Char (isDigit) import Data.Maybe (fromMaybe) import Data.Digest.Pure.SHA (sha1, showDigest) import Text.Pandoc.Options +import Control.Applicative ((<|>)) import qualified Data.Scientific as Scientific import qualified Text.Pandoc.Builder as B import Text.Pandoc.Logging @@ -76,7 +77,10 @@ cellToBlocks opts lang c = do let Source ts = cellSource c let source = mconcat ts let kvs = jsonMetaToPairs (cellMetadata c) - let attachments = maybe mempty M.toList $ cellAttachments c + let attachments = case cellAttachments c of + Nothing -> mempty + Just (MimeAttachments m) -> M.toList m + let ident = fromMaybe mempty $ cellId c mapM_ addAttachment attachments case cellType c of Ipynb.Markdown -> do @@ -85,29 +89,34 @@ cellToBlocks opts lang c = do else do Pandoc _ bs <- walk fixImage <$> readMarkdown opts source return bs - return $ B.divWith ("",["cell","markdown"],kvs) + return $ B.divWith (ident,["cell","markdown"],kvs) $ B.fromList bs Ipynb.Heading lev -> do Pandoc _ bs <- readMarkdown opts (T.replicate lev "#" <> " " <> source) - return $ B.divWith ("",["cell","markdown"],kvs) + return $ B.divWith (ident,["cell","markdown"],kvs) $ B.fromList bs Ipynb.Raw -> do -- we use ipynb to indicate no format given (a wildcard in nbformat) - let format = fromMaybe "ipynb" $ lookup "format" kvs + let format = fromMaybe "ipynb" $ lookup "raw_mimetype" kvs <|> lookup "format" kvs let format' = case format of - "text/html" -> "html" - "text/latex" -> "latex" - "application/pdf" -> "latex" - "text/markdown" -> "markdown" - "text/x-rsrt" -> "rst" - _ -> format - return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source + "text/html" -> "html" + "slides" -> "html" + "text/latex" -> "latex" + "application/pdf" -> "latex" + "pdf" -> "latex" + "text/markdown" -> "markdown" + "text/x-rst" -> "rst" + "text/restructuredtext" -> "rst" + "text/asciidoc" -> "asciidoc" + _ -> format + return $ B.divWith (ident,["cell","raw"],kvs) + $ B.rawBlock format' source Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do outputBlocks <- mconcat <$> mapM outputToBlock outputs let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec - return $ B.divWith ("",["cell","code"],kvs') $ + return $ B.divWith (ident,["cell","code"],kvs') $ B.codeBlockWith ("",[lang],[]) source <> outputBlocks @@ -156,7 +165,7 @@ outputToBlock Err{ errName = ename, -- the output format. handleData :: PandocMonad m => JSONMeta -> MimeBundle -> m B.Blocks -handleData metadata (MimeBundle mb) = +handleData (JSONMeta metadata) (MimeBundle mb) = mconcat <$> mapM dataBlock (M.toList mb) where @@ -192,6 +201,9 @@ handleData metadata (MimeBundle mb) = dataBlock ("text/latex", TextualData t) = return $ B.rawBlock "latex" t + dataBlock ("text/markdown", TextualData t) + = return $ B.rawBlock "markdown" t + dataBlock ("text/plain", TextualData t) = return $ B.codeBlock t @@ -201,7 +213,7 @@ handleData metadata (MimeBundle mb) = dataBlock _ = return mempty jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue -jsonMetaToMeta = M.map valueToMetaValue +jsonMetaToMeta (JSONMeta m) = M.map valueToMetaValue m where valueToMetaValue :: Value -> MetaValue valueToMetaValue x@Object{} = @@ -220,11 +232,11 @@ jsonMetaToMeta = M.map valueToMetaValue valueToMetaValue Aeson.Null = MetaString "" jsonMetaToPairs :: JSONMeta -> [(Text, Text)] -jsonMetaToPairs = M.toList . M.map +jsonMetaToPairs (JSONMeta m) = M.toList . M.map (\case String t | not (T.all isDigit t) , t /= "true" , t /= "false" -> t - x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) + x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) $ m diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 9cdbf1611..37e0d13bc 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -35,6 +35,7 @@ import Text.Pandoc.XML.Light import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) import Text.Pandoc.Sources (ToSources(..), sourcesToText) +import qualified Data.Foldable as DF type JATS m = StateT JATSState m @@ -226,9 +227,19 @@ parseBlock (Elem e) = mapM getInlines (filterChildren (const True) t) Nothing -> return mempty - img <- getGraphic (Just (capt, attrValue "id" e)) g - return $ para img + + let figAttributes = DF.toList $ + ("alt", ) . strContent <$> + filterChild (named "alt-text") e + + return $ simpleFigureWith + (attrValue "id" e, [], figAttributes) + capt + (attrValue "href" g) + (attrValue "title" g) + _ -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e + parseTable = do let isCaption x = named "title" x || named "caption" x capt <- case filterChild isCaption e of diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 27c018e73..20a2db76b 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -390,8 +390,8 @@ inlineCommands = M.unions unescapeURL . removeDoubleQuotes $ untokenize src) -- hyperref - , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$> - bracedUrl) + , ("url", (\url -> linkWith ("",["uri"],[]) url "" (str url)) + . unescapeURL . untokenize <$> bracedUrl) , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl) , ("href", do url <- bracedUrl sp @@ -893,7 +893,7 @@ blockCommands = M.fromList addMeta "bibliography" . splitBibs . untokenize)) , ("addbibresource", mempty <$ (skipopts *> braced >>= addMeta "bibliography" . splitBibs . untokenize)) - , ("endinput", mempty <$ skipMany anyTok) + , ("endinput", mempty <$ skipSameFileToks) -- includes , ("lstinputlisting", inputListing) , ("inputminted", inputMinted) @@ -924,6 +924,10 @@ blockCommands = M.fromList , ("epigraph", epigraph) ] +skipSameFileToks :: PandocMonad m => LP m () +skipSameFileToks = do + pos <- getPosition + skipMany $ infile (sourceName pos) environments :: PandocMonad m => M.Map Text (LP m Blocks) environments = M.union (tableEnvironments blocks inline) $ @@ -970,6 +974,7 @@ environments = M.union (tableEnvironments blocks inline) $ , ("toggletrue", braced >>= setToggle True) , ("togglefalse", braced >>= setToggle False) , ("iftoggle", try $ ifToggle >> block) + , ("CSLReferences", braced >> braced >> env "CSLReferences" blocks) ] filecontents :: PandocMonad m => LP m Blocks @@ -1109,24 +1114,28 @@ figure = try $ do addImageCaption :: PandocMonad m => Blocks -> LP m Blocks addImageCaption = walkM go - where go (Image attr@(_, cls, kvs) alt (src,tit)) + where go p@(Para [Image attr@(_, cls, kvs) _ (src, tit)]) | not ("fig:" `T.isPrefixOf` tit) = do st <- getState - let (alt', tit') = case sCaption st of - Just ils -> (toList ils, "fig:" <> tit) - Nothing -> (alt, tit) - attr' = case sLastLabel st of - Just lab -> (lab, cls, kvs) - Nothing -> attr - case attr' of - ("", _, _) -> return () - (ident, _, _) -> do - num <- getNextNumber sLastFigureNum - setState - st{ sLastFigureNum = num - , sLabels = M.insert ident - [Str (renderDottedNum num)] (sLabels st) } - return $ Image attr' alt' (src, tit') + case sCaption st of + Nothing -> return p + Just figureCaption -> do + let mblabel = sLastLabel st + let attr' = case mblabel of + Just lab -> (lab, cls, kvs) + Nothing -> attr + case attr' of + ("", _, _) -> return () + (ident, _, _) -> do + num <- getNextNumber sLastFigureNum + setState + st{ sLastFigureNum = num + , sLabels = M.insert ident + [Str (renderDottedNum num)] (sLabels st) } + + return $ SimpleFigure attr' + (maybe id removeLabel mblabel (B.toList figureCaption)) + (src, tit) go x = return x coloredBlock :: PandocMonad m => Text -> LP m Blocks diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs index 7b8bca4af..5938096fd 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Extensions (extensionEnabled, Extension(..)) import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy, manyTill, getInput, setInput, incSourceColumn, - option, many1, try) + option, many1) import Data.Char (isDigit) import Text.Pandoc.Highlighting (fromListingsLanguage,) import Data.Maybe (maybeToList, fromMaybe) @@ -56,8 +56,7 @@ dolabel = do let refstr = untokenize v updateState $ \st -> st{ sLastLabel = Just refstr } - return $ spanWith (refstr,[],[("label", refstr)]) - $ inBrackets $ str $ untokenize v + return $ spanWith (refstr,[],[("label", refstr)]) mempty doref :: PandocMonad m => Text -> LP m Inlines doref cls = do @@ -160,8 +159,8 @@ romanNumeralArg = spaces *> (parser <|> inBraces) accentWith :: PandocMonad m => LP m Inlines -> Char -> Maybe Char -> LP m Inlines -accentWith tok combiningAccent fallBack = try $ do - ils <- tok +accentWith tok combiningAccent fallBack = do + ils <- option mempty tok case toList ils of (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ -- try to normalize to the combined character: @@ -339,6 +338,7 @@ refCommands = M.fromList , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty + , ("autoref", rawInlineOr "autoref" $ doref "autoref") -- from hyperref.sty ] acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines) diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs index 5495a8e74..d40277eb5 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs @@ -15,6 +15,8 @@ import Control.Applicative ((<|>), optional) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T +import qualified Data.List.NonEmpty as NonEmpty +import Data.List.NonEmpty (NonEmpty(..)) macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a macroDef constructor = do @@ -23,51 +25,91 @@ macroDef constructor = do guardDisabled Ext_latex_macros) <|> return mempty where commandDef = do - nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif + nameMacroPairs <- newcommand <|> + checkGlobal (letmacro <|> edefmacro <|> defmacro <|> newif) guardDisabled Ext_latex_macros <|> - mapM_ (\(name, macro') -> - updateState (\s -> s{ sMacros = M.insert name macro' - (sMacros s) })) nameMacroPairs + mapM_ insertMacro nameMacroPairs environmentDef = do mbenv <- newenvironment case mbenv of Nothing -> return () Just (name, macro1, macro2) -> guardDisabled Ext_latex_macros <|> - do updateState $ \s -> s{ sMacros = - M.insert name macro1 (sMacros s) } - updateState $ \s -> s{ sMacros = - M.insert ("end" <> name) macro2 (sMacros s) } + do insertMacro (name, macro1) + insertMacro ("end" <> name, macro2) -- @\newenvironment{envname}[n-args][default]{begin}{end}@ -- is equivalent to -- @\newcommand{\envname}[n-args][default]{begin}@ -- @\newcommand{\endenvname}@ +insertMacro :: PandocMonad m => (Text, Macro) -> LP m () +insertMacro (name, macro'@(Macro GlobalScope _ _ _ _)) = + updateState $ \s -> + s{ sMacros = NonEmpty.map (M.insert name macro') (sMacros s) } +insertMacro (name, macro'@(Macro GroupScope _ _ _ _)) = + updateState $ \s -> + s{ sMacros = M.insert name macro' (NonEmpty.head (sMacros s)) :| + NonEmpty.tail (sMacros s) } + +lookupMacro :: PandocMonad m => Text -> LP m Macro +lookupMacro name = do + macros :| _ <- sMacros <$> getState + case M.lookup name macros of + Just m -> return m + Nothing -> fail "Macro not found" + letmacro :: PandocMonad m => LP m [(Text, Macro)] letmacro = do controlSeq "let" - (name, contents) <- withVerbatimMode $ do + withVerbatimMode $ do Tok _ (CtrlSeq name) _ <- anyControlSeq optional $ symbol '=' spaces -- we first parse in verbatim mode, and then expand macros, -- because we don't want \let\foo\bar to turn into -- \let\foo hello if we have previously \def\bar{hello} + target <- anyControlSeq <|> singleChar + case target of + (Tok _ (CtrlSeq name') _) -> + (do m <- lookupMacro name' + pure [(name, m)]) + <|> pure [(name, + Macro GroupScope ExpandWhenDefined [] Nothing [target])] + _ -> pure [(name, Macro GroupScope ExpandWhenDefined [] Nothing [target])] + +checkGlobal :: PandocMonad m => LP m [(Text, Macro)] -> LP m [(Text, Macro)] +checkGlobal p = + (controlSeq "global" *> + (map (\(n, Macro _ expand arg optarg contents) -> + (n, Macro GlobalScope expand arg optarg contents)) <$> p)) + <|> p + +edefmacro :: PandocMonad m => LP m [(Text, Macro)] +edefmacro = do + scope <- (GroupScope <$ controlSeq "edef") + <|> (GlobalScope <$ controlSeq "xdef") + (name, contents) <- withVerbatimMode $ do + Tok _ (CtrlSeq name) _ <- anyControlSeq + -- we first parse in verbatim mode, and then expand macros, + -- because we don't want \let\foo\bar to turn into + -- \let\foo hello if we have previously \def\bar{hello} contents <- bracedOrToken return (name, contents) - contents' <- doMacros' 0 contents - return [(name, Macro ExpandWhenDefined [] Nothing contents')] + -- expand macros + contents' <- parseFromToks (many anyTok) contents + return [(name, Macro scope ExpandWhenDefined [] Nothing contents')] defmacro :: PandocMonad m => LP m [(Text, Macro)] defmacro = do -- we use withVerbatimMode, because macros are to be expanded -- at point of use, not point of definition - controlSeq "def" + scope <- (GroupScope <$ controlSeq "def") + <|> (GlobalScope <$ controlSeq "gdef") withVerbatimMode $ do Tok _ (CtrlSeq name) _ <- anyControlSeq argspecs <- many (argspecArg <|> argspecPattern) contents <- bracedOrToken - return [(name, Macro ExpandWhenUsed argspecs Nothing contents)] + return [(name, Macro scope ExpandWhenUsed argspecs Nothing contents)] -- \newif\iffoo' defines: -- \iffoo to be \iffalse @@ -82,16 +124,16 @@ newif = do -- \def\footrue{\def\iffoo\iftrue} -- \def\foofalse{\def\iffoo\iffalse} let base = T.drop 2 name - return [ (name, Macro ExpandWhenUsed [] Nothing + return [ (name, Macro GroupScope ExpandWhenUsed [] Nothing [Tok pos (CtrlSeq "iffalse") "\\iffalse"]) , (base <> "true", - Macro ExpandWhenUsed [] Nothing + Macro GroupScope ExpandWhenUsed [] Nothing [ Tok pos (CtrlSeq "def") "\\def" , Tok pos (CtrlSeq name) ("\\" <> name) , Tok pos (CtrlSeq "iftrue") "\\iftrue" ]) , (base <> "false", - Macro ExpandWhenUsed [] Nothing + Macro GroupScope ExpandWhenUsed [] Nothing [ Tok pos (CtrlSeq "def") "\\def" , Tok pos (CtrlSeq name) ("\\" <> name) , Tok pos (CtrlSeq "iffalse") "\\iffalse" @@ -138,14 +180,13 @@ newcommand = do : (contents' ++ [ Tok pos Symbol "}", Tok pos Symbol "}" ]) _ -> contents' - macros <- sMacros <$> getState - case M.lookup name macros of - Just macro - | mtype == "newcommand" -> do - report $ MacroAlreadyDefined txt pos - return [(name, macro)] - | mtype == "providecommand" -> return [(name, macro)] - _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)] + let macro = Macro GroupScope ExpandWhenUsed argspecs optarg contents + (do lookupMacro name + case mtype of + "providecommand" -> return [] + "renewcommand" -> return [(name, macro)] + _ -> [] <$ report (MacroAlreadyDefined txt pos)) + <|> pure [(name, macro)] newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro)) newenvironment = do @@ -164,17 +205,23 @@ newenvironment = do let argspecs = map (\i -> ArgNum i) [1..numargs] startcontents <- spaces >> bracedOrToken endcontents <- spaces >> bracedOrToken - macros <- sMacros <$> getState - case M.lookup name macros of - Just _ - | mtype == "newenvironment" -> do - report $ MacroAlreadyDefined name pos - return Nothing - | mtype == "provideenvironment" -> - return Nothing - _ -> return $ Just (name, - Macro ExpandWhenUsed argspecs optarg startcontents, - Macro ExpandWhenUsed [] Nothing endcontents) + -- we need the environment to be in a group so macros defined + -- inside behave correctly: + let bg = Tok pos (CtrlSeq "bgroup") "\\bgroup " + let eg = Tok pos (CtrlSeq "egroup") "\\egroup " + let result = (name, + Macro GroupScope ExpandWhenUsed argspecs optarg + (bg:startcontents), + Macro GroupScope ExpandWhenUsed [] Nothing + (endcontents ++ [eg])) + (do lookupMacro name + case mtype of + "provideenvironment" -> return Nothing + "renewenvironment" -> return (Just result) + _ -> do + report $ MacroAlreadyDefined name pos + return Nothing) + <|> return (Just result) bracketedNum :: PandocMonad m => LP m Int bracketedNum = do diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs index 5b49a0376..01edce7ed 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Math.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs @@ -142,14 +142,15 @@ newtheorem inline = do theoremEnvironment :: PandocMonad m => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks theoremEnvironment blocks opt name = do + resetCaption tmap <- sTheoremMap <$> getState case M.lookup name tmap of Nothing -> mzero Just tspec -> do optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt - mblabel <- option Nothing $ Just . untokenize <$> - try (spaces >> controlSeq "label" >> spaces >> braced) bs <- env name blocks + mblabel <- sLastLabel <$> getState + number <- if theoremNumber tspec then do @@ -169,9 +170,7 @@ theoremEnvironment blocks opt name = do Just ident -> updateState $ \s -> s{ sLabels = M.insert ident - (B.toList $ - theoremName tspec <> "\160" <> - str (renderDottedNum num)) (sLabels s) } + (B.toList $ str (renderDottedNum num)) (sLabels s) } Nothing -> return () return $ space <> B.text (renderDottedNum num) else return mempty @@ -181,13 +180,14 @@ theoremEnvironment blocks opt name = do RemarkStyle -> B.emph let title = titleEmph (theoremName tspec <> number) <> optTitle <> "." <> space - return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title + return $ divWith (fromMaybe "" mblabel, [name], []) + $ addTitle title + $ maybe id removeLabel mblabel $ case theoremStyle tspec of PlainStyle -> walk italicize bs _ -> bs - proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks proof blocks opt = do title <- option (B.text "Proof") opt diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 9dac4d6ef..9eb4a0cbc 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -45,6 +45,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , isNewlineTok , isWordTok , isArgTok + , infile , spaces , spaces1 , tokTypeIn @@ -89,6 +90,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , resetCaption , env , addMeta + , removeLabel ) where import Control.Applicative (many, (<|>)) @@ -102,6 +104,9 @@ import qualified Data.IntMap as IntMap import qualified Data.Map as M import qualified Data.Set as Set import Data.Text (Text) +import Data.Maybe (fromMaybe) +import Data.List.NonEmpty (NonEmpty(..)) +import qualified Data.List.NonEmpty as NonEmpty import qualified Data.Text as T import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -115,6 +120,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), ArgSpec (..), Tok (..), TokType (..)) import Text.Pandoc.Shared import Text.Parsec.Pos +import Text.Pandoc.Walk newtype DottedNum = DottedNum [Int] deriving (Show, Eq) @@ -146,7 +152,7 @@ data TheoremSpec = data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sMeta :: Meta , sQuoteContext :: QuoteContext - , sMacros :: M.Map Text Macro + , sMacros :: NonEmpty (M.Map Text Macro) , sContainers :: [Text] , sLogMessages :: [LogMessage] , sIdentifiers :: Set.Set Text @@ -173,7 +179,7 @@ defaultLaTeXState :: LaTeXState defaultLaTeXState = LaTeXState{ sOptions = def , sMeta = nullMeta , sQuoteContext = NoQuote - , sMacros = M.empty + , sMacros = M.empty :| [] , sContainers = [] , sLogMessages = [] , sIdentifiers = Set.empty @@ -220,8 +226,9 @@ instance HasIncludeFiles LaTeXState where dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s } instance HasMacros LaTeXState where - extractMacros st = sMacros st - updateMacros f st = st{ sMacros = f (sMacros st) } + extractMacros st = NonEmpty.head $ sMacros st + updateMacros f st = st{ sMacros = f (NonEmpty.head (sMacros st)) + :| NonEmpty.tail (sMacros st) } instance HasReaderOptions LaTeXState where extractReaderOptions = sOptions @@ -254,7 +261,7 @@ rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } - let lstate' = lstate { sMacros = extractMacros pstate } + let lstate' = lstate { sMacros = extractMacros pstate :| [] } let setStartPos = case toks of Tok pos _ _ : _ -> setPosition pos _ -> return () @@ -267,14 +274,14 @@ rawLaTeXParser toks retokenize parser valParser = do Right (endpos, toks') -> do res <- lift $ runParserT (do when retokenize $ do -- retokenize, applying macros - ts <- many (satisfyTok (const True)) + ts <- many anyTok setInput ts rawparser) lstate' "chunk" toks' case res of Left _ -> mzero Right ((val, raw), st) -> do - updateState (updateMacros (sMacros st <>)) + updateState (updateMacros ((NonEmpty.head (sMacros st)) <>)) let skipTilPos stopPos = do anyChar pos <- getPosition @@ -296,10 +303,10 @@ rawLaTeXParser toks retokenize parser valParser = do applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) => Text -> ParserT Sources s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> - do let retokenize = untokenize <$> many (satisfyTok (const True)) + do let retokenize = untokenize <$> many anyTok pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate - , sMacros = extractMacros pstate } + , sMacros = extractMacros pstate :| [] } res <- runParserT retokenize lstate "math" (tokenize "math" s) case res of Left e -> Prelude.fail (show e) @@ -552,10 +559,10 @@ doMacros' n inp = handleMacros n' spos name ts = do when (n' > 20) -- detect macro expansion loops $ throwError $ PandocMacroLoop name - macros <- sMacros <$> getState + (macros :| _ ) <- sMacros <$> getState case M.lookup name macros of Nothing -> trySpecialMacro name ts - Just (Macro expansionPoint argspecs optarg newtoks) -> do + Just (Macro _scope expansionPoint argspecs optarg newtoks) -> do let getargs' = do args <- (case expansionPoint of @@ -642,6 +649,9 @@ isArgTok :: Tok -> Bool isArgTok (Tok _ (Arg _) _) = True isArgTok _ = False +infile :: PandocMonad m => SourceName -> LP m Tok +infile reference = satisfyTok (\(Tok source _ _) -> (sourceName source) == reference) + spaces :: PandocMonad m => LP m () spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) @@ -745,10 +755,22 @@ primEscape = do bgroup :: PandocMonad m => LP m Tok bgroup = try $ do optional sp - symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" + t <- symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" + -- Add a copy of the macro table to the top of the macro stack, + -- private for this group. We inherit all the macros defined in + -- the parent group. + updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s)) + (sMacros s) } + return t + egroup :: PandocMonad m => LP m Tok -egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup" +egroup = do + t <- symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup" + -- remove the group's macro table from the stack + updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $ + NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) } + return t grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a grouped parser = try $ do @@ -921,6 +943,9 @@ getRawCommand name txt = do void $ count 4 braced "def" -> void $ manyTill anyTok braced + "vadjust" -> + void (manyTill anyTok braced) <|> + void (satisfyTok isPreTok) -- see #7531 _ | isFontSizeCommand name -> return () | otherwise -> do skipopts @@ -928,6 +953,10 @@ getRawCommand name txt = do void $ many braced return $ txt <> untokenize rawargs +isPreTok :: Tok -> Bool +isPreTok (Tok _ Word "pre") = True +isPreTok _ = False + isDigitTok :: Tok -> Bool isDigitTok (Tok _ Word t) = T.all isDigit t isDigitTok _ = False @@ -1017,7 +1046,16 @@ resetCaption = updateState $ \st -> st{ sCaption = Nothing , sLastLabel = Nothing } env :: PandocMonad m => Text -> LP m a -> LP m a -env name p = p <* end_ name +env name p = do + -- environments are groups as far as macros are concerned, + -- so we need a local copy of the macro table (see above, bgroup, egroup): + updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s)) + (sMacros s) } + result <- p + updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $ + NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) } + end_ name + return result tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines tokWith inlineParser = try $ spaces >> @@ -1031,3 +1069,16 @@ tokWith inlineParser = try $ spaces >> addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () addMeta field val = updateState $ \st -> st{ sMeta = addMetaField field val $ sMeta st } + +-- remove label spans to avoid duplicated identifier +removeLabel :: Walkable [Inline] a => Text -> a -> a +removeLabel lbl = walk go + where + go (Span (_,_,kvs) _ : rest) + | Just lbl' <- lookup "label" kvs + , lbl' == lbl = go (dropWhile isSpaceOrSoftBreak rest) + go (x:xs) = x : go xs + go [] = [] + isSpaceOrSoftBreak Space = True + isSpaceOrSoftBreak SoftBreak = True + isSpaceOrSoftBreak _ = False diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index b8bf0ce7f..e4738a763 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -23,11 +23,15 @@ siunitxCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) siunitxCommands tok = M.fromList [ ("si", dosi tok) + , ("unit", dosi tok) -- v3 version of si , ("SI", doSI tok) + , ("qty", doSI tok) -- v3 version of SI , ("SIrange", doSIrange True tok) + , ("qtyrange", doSIrange True tok) -- v3 version of SIrange + , ("SIlist", doSIlist tok) + , ("qtylist", doSIlist tok) -- v3 version of SIlist , ("numrange", doSIrange False tok) , ("numlist", doSInumlist) - , ("SIlist", doSIlist tok) , ("num", doSInum) , ("ang", doSIang) ] diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs index f56728fe1..7d5c4f265 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs @@ -368,7 +368,9 @@ addTableCaption = walkM go ((_,classes,kvs), Just ident) -> (ident,classes,kvs) _ -> attr - return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf + return $ addAttrDiv attr' + $ maybe id removeLabel mblabel + $ Table nullAttr capt spec th tb tf go x = return x -- TODO: For now we add a Div to contain table attributes, since diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs index c20b72bc5..a4eae56db 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -15,6 +15,7 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) , Macro(..) , ArgSpec(..) , ExpansionPoint(..) + , MacroScope(..) , SourcePos ) where @@ -43,7 +44,10 @@ tokToText (Tok _ _ t) = t data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed deriving (Eq, Ord, Show) -data Macro = Macro ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok] +data MacroScope = GlobalScope | GroupScope + deriving (Eq, Ord, Show) + +data Macro = Macro MacroScope ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok] deriving Show data ArgSpec = ArgNum Int | Pattern [Tok] diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2dc7ddf52..b5017a433 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,5 @@ {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} @@ -21,14 +22,14 @@ module Text.Pandoc.Readers.Markdown ( import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isPunctuation, isSpace) +import Text.DocLayout (realLength) import Data.List (transpose, elemIndex, sortOn, foldl') -import Data.List.NonEmpty (NonEmpty(..)) import qualified Data.Map as M import Data.Maybe import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as BS import System.FilePath (addExtension, takeExtension, takeDirectory) import qualified System.FilePath.Windows as Windows import qualified System.FilePath.Posix as Posix @@ -39,6 +40,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), report) import Text.Pandoc.Definition as Pandoc import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Error +import Safe.Foldable (maximumBounded) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Walk (walk) @@ -72,14 +74,12 @@ readMarkdown opts s = do yamlToMeta :: PandocMonad m => ReaderOptions -> Maybe FilePath - -> BL.ByteString + -> BS.ByteString -> m Meta yamlToMeta opts mbfp bstr = do let parser = do oldPos <- getPosition - case mbfp of - Nothing -> return () - Just fp -> setPosition $ initialPos fp + setPosition $ initialPos (fromMaybe "" mbfp) meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr setPosition oldPos return $ runF meta defaultParserState @@ -95,7 +95,7 @@ yamlToRefs :: PandocMonad m => (Text -> Bool) -> ReaderOptions -> Maybe FilePath - -> BL.ByteString + -> BS.ByteString -> m [MetaValue] yamlToRefs idpred opts mbfp bstr = do let parser = do @@ -198,6 +198,7 @@ inlinesInBalancedBrackets = go openBrackets = (() <$ (escapedChar <|> code <|> + math <|> rawHtmlInline <|> rawLaTeXInline') >> go openBrackets) <|> @@ -326,6 +327,7 @@ referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do pos <- getPosition skipNonindentSpaces + notFollowedBy (void cite) (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') @@ -829,7 +831,7 @@ listLineCommon :: PandocMonad m => MarkdownParser m Text listLineCommon = T.concat <$> manyTill ( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`']) <|> fmap snd (withRaw code) - <|> fmap snd (htmlTag isCommentTag) + <|> fmap (renderTags . (:[]) . fst) (htmlTag isCommentTag) <|> countChar 1 anyChar ) newline @@ -1013,19 +1015,18 @@ normalDefinitionList = do para :: PandocMonad m => MarkdownParser m (F Blocks) para = try $ do exts <- getOption readerExtensions - let implicitFigures x - | extensionEnabled Ext_implicit_figures exts = do - x' <- x - case B.toList x' of - [Image attr alt (src,tit)] - | not (null alt) -> - -- the fig: at beginning of title indicates a figure - return $ B.singleton - $ Image attr alt (src, "fig:" <> tit) - _ -> return x' - | otherwise = x - result <- implicitFigures . trimInlinesF <$> inlines1 - option (B.plain <$> result) + + result <- trimInlinesF <$> inlines1 + let figureOr constr inlns = + case B.toList inlns of + [Image attr figCaption (src, tit)] + | extensionEnabled Ext_implicit_figures exts + , not (null figCaption) -> do + B.simpleFigureWith attr (B.fromList figCaption) src tit + + _ -> constr inlns + + option (figureOr B.plain <$> result) $ try $ do newline (mempty <$ blanklines) @@ -1047,7 +1048,7 @@ para = try $ do if divLevel > 0 then lookAhead divFenceEnd else mzero - return $ B.para <$> result + return $ figureOr B.para <$> result plain :: PandocMonad m => MarkdownParser m (F Blocks) plain = fmap B.plain . trimInlinesF <$> inlines1 @@ -1124,7 +1125,12 @@ rawHtmlBlocks = do let selfClosing = "/>" `T.isSuffixOf` raw -- we don't want '<td> text' to be a code block: skipMany spaceChar - indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0 + tabStop <- getOption readerTabStop + indentlevel <- option 0 $ + do blankline + sum <$> many ( (1 <$ char ' ') + <|> + (tabStop <$ char '\t') ) -- try to find closing tag -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags @@ -1355,26 +1361,30 @@ pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak - let heads' = take (length aligns) <$> heads + let cellContents = parseFromString' pipeTableCell . trim + let numcols = length aligns + let heads' = take numcols heads lines' <- many pipeTableRow - let lines'' = map (take (length aligns) <$>) lines' - let maxlength = maximum $ - fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'') - numColumns <- getOption readerColumns - let widths = if maxlength > numColumns + let lines'' = map (take numcols) lines' + let lineWidths = map (sum . map realLength) (heads' : lines'') + columns <- getOption readerColumns + -- add numcols + 1 for the pipes themselves + let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1) > columns then map (\len -> fromIntegral len / fromIntegral (sum seplengths)) seplengths else replicate (length aligns) 0.0 - return (aligns, widths, toHeaderRow <$> heads', map toRow <$> sequence lines'') + (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads' + (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines'' + return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows) sepPipe :: PandocMonad m => MarkdownParser m () sepPipe = try $ do char '|' <|> char '+' notFollowedBy blankline --- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks]) +-- parse a row, returning raw cell contents +pipeTableRow :: PandocMonad m => MarkdownParser m [Text] pipeTableRow = try $ do scanForPipe skipMany spaceChar @@ -1382,13 +1392,11 @@ pipeTableRow = try $ do -- split into cells let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') <|> void (noneOf "|\n\r") - let cellContents = withRaw (many chunk) >>= - parseFromString' pipeTableCell . trim . snd - cells <- cellContents `sepEndBy1` char '|' + cells <- (snd <$> withRaw (many chunk)) `sepEndBy1` char '|' -- surrounding pipes needed for a one-column table: guard $ not (length cells == 1 && not openPipe) blankline - return $ sequence cells + return cells pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks) pipeTableCell = @@ -1692,21 +1700,29 @@ strikeout = fmap B.strikeout <$> superscript :: PandocMonad m => MarkdownParser m (F Inlines) superscript = do - guardEnabled Ext_superscript fmap B.superscript <$> try (do char '^' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '^')) + mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript)) + where regularSuperscript = many1Till (do guardEnabled Ext_superscript + notFollowedBy spaceChar + notFollowedBy newline + inline) (char '^') + mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts + result <- T.pack <$> many1 alphaNum + return $ return $ return $ B.str result subscript :: PandocMonad m => MarkdownParser m (F Inlines) subscript = do - guardEnabled Ext_subscript fmap B.subscript <$> try (do char '~' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '~')) + mconcat <$> (try regularSubscript <|> mmdShortSubscript)) + where regularSubscript = many1Till (do guardEnabled Ext_subscript + notFollowedBy spaceChar + notFollowedBy newline + inline) (char '~') + mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts + result <- T.pack <$> many1 alphaNum + return $ return $ return $ B.str result whitespace :: PandocMonad m => MarkdownParser m (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" @@ -1768,7 +1784,6 @@ endline = try $ do reference :: PandocMonad m => MarkdownParser m (F Inlines, Text) reference = do guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^") - guardDisabled Ext_citations <|> notFollowedBy' (string "[@") withRaw $ trimInlinesF <$> inlinesInBalancedBrackets parenthesizedChars :: PandocMonad m => MarkdownParser m Text @@ -2187,6 +2202,7 @@ normalCite = try $ do citations <- citeList spnl char ']' + notFollowedBy (oneOf "{([") -- not a link or a bracketed span return citations suffix :: PandocMonad m => MarkdownParser m (F Inlines) @@ -2200,7 +2216,7 @@ suffix = try $ do prefix :: PandocMonad m => MarkdownParser m (F Inlines) prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' + manyTill (notFollowedBy (char ';') >> inline) (char ']' <|> lookAhead (try $ do optional (try (char ';' >> spnl)) citeKey True diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 825e4a2eb..9348a8053 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -201,7 +201,12 @@ para = do contents <- trimInlines . mconcat <$> many1 inline if F.all (==Space) contents then return mempty - else return $ B.para contents + else case B.toList contents of + -- For the MediaWiki format all images are considered figures + [Image attr figureCaption (src, title)] -> + return $ B.simpleFigureWith + attr (B.fromList figureCaption) src title + _ -> return $ B.para contents table :: PandocMonad m => MWParser m Blocks table = do @@ -631,7 +636,7 @@ image = try $ do let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.imageWith attr fname ("fig:" <> stringify caption) caption + return $ B.imageWith attr fname (stringify caption) caption imageOption :: PandocMonad m => MWParser m Text imageOption = try $ char '|' *> opt diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index cbc523b25..7991dca5c 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -17,102 +17,62 @@ module Text.Pandoc.Readers.Metadata ( yamlMetaBlock, yamlMap ) where -import Control.Monad + import Control.Monad.Except (throwError) -import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as B import qualified Data.Map as M -import Data.Maybe import Data.Text (Text) import qualified Data.Text as T -import qualified Data.YAML as YAML -import qualified Data.YAML.Event as YE +import qualified Data.Yaml as Yaml +import Data.Aeson (Value(..), Object, Result(..), fromJSON, (.:?), withObject) +import Data.Aeson.Types (parse) +import Text.Pandoc.Shared (tshow) import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) -import Text.Pandoc.Definition +import Text.Pandoc.Definition hiding (Null) import Text.Pandoc.Error -import Text.Pandoc.Parsing hiding (tableWith) -import Text.Pandoc.Shared -import qualified Data.Text.Lazy as TL +import Text.Pandoc.Parsing hiding (tableWith, parse) + + import qualified Text.Pandoc.UTF8 as UTF8 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st) => ParserT Sources st m (Future st MetaValue) - -> BL.ByteString + -> B.ByteString -> ParserT Sources st m (Future st Meta) yamlBsToMeta pMetaValue bstr = do - case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of - Right (YAML.Doc (YAML.Mapping _ _ o):_) - -> fmap Meta <$> yamlMap pMetaValue o + case Yaml.decodeAllEither' bstr of + Right (Object o:_) -> fmap Meta <$> yamlMap pMetaValue o Right [] -> return . return $ mempty - Right [YAML.Doc (YAML.Scalar _ YAML.SNull)] - -> return . return $ mempty - -- the following is what we get from a comment: - Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))] - -> return . return $ mempty + Right [Null] -> return . return $ mempty Right _ -> Prelude.fail "expected YAML object" - Left (yamlpos, err') - -> do pos <- getPosition - setPosition $ incSourceLine - (setSourceColumn pos (YE.posColumn yamlpos)) - (YE.posLine yamlpos - 1) - Prelude.fail err' - -fakePos :: YAML.Pos -fakePos = YAML.Pos (-1) (-1) 1 0 - -lookupYAML :: Text - -> YAML.Node YE.Pos - -> Maybe (YAML.Node YE.Pos) -lookupYAML t (YAML.Mapping _ _ m) = - M.lookup (YAML.Scalar fakePos (YAML.SUnknown YE.untagged t)) m - `mplus` - M.lookup (YAML.Scalar fakePos (YAML.SStr t)) m -lookupYAML _ _ = Nothing + Left err' -> do + throwError $ PandocParseError + $ T.pack $ Yaml.prettyPrintParseException err' -- Returns filtered list of references. yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st) => ParserT Sources st m (Future st MetaValue) -> (Text -> Bool) -- ^ Filter for id - -> BL.ByteString + -> B.ByteString -> ParserT Sources st m (Future st [MetaValue]) yamlBsToRefs pMetaValue idpred bstr = - case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of - Right (YAML.Doc o@YAML.Mapping{}:_) - -> case lookupYAML "references" o of - Just (YAML.Sequence _ _ ns) -> do - let g n = case lookupYAML "id" n of - Just n' -> - case nodeToKey n' of - Nothing -> False - Just t -> idpred t || - case lookupYAML "other-ids" n of - Just (YAML.Sequence _ _ ns') -> - let ts' = mapMaybe nodeToKey ns' - in any idpred ts' - _ -> False - Nothing -> False - sequence <$> - mapM (yamlToMetaValue pMetaValue) (filter g ns) - Just _ -> - Prelude.fail "expecting sequence in 'references' field" - Nothing -> - Prelude.fail "expecting 'references' field" - - Right [] -> return . return $ mempty - Right [YAML.Doc (YAML.Scalar _ YAML.SNull)] - -> return . return $ mempty - Right _ -> Prelude.fail "expecting YAML object" - Left (yamlpos, err') - -> do pos <- getPosition - setPosition $ incSourceLine - (setSourceColumn pos (YE.posColumn yamlpos)) - (YE.posLine yamlpos - 1) - Prelude.fail err' - - -nodeToKey :: YAML.Node YE.Pos -> Maybe Text -nodeToKey (YAML.Scalar _ (YAML.SStr t)) = Just t -nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t -nodeToKey _ = Nothing + case Yaml.decodeAllEither' bstr of + Right (Object m : _) -> do + let isSelected (String t) = idpred t + isSelected _ = False + let hasSelectedId (Object o) = + case parse (withObject "ref" (.:? "id")) (Object o) of + Success (Just id') -> isSelected id' + _ -> False + hasSelectedId _ = False + case parse (withObject "metadata" (.:? "references")) (Object m) of + Success (Just refs) -> sequence <$> + mapM (yamlToMetaValue pMetaValue) (filter hasSelectedId refs) + _ -> return $ return [] + Right _ -> return . return $ [] + Left err' -> do + throwError $ PandocParseError + $ T.pack $ Yaml.prettyPrintParseException err' normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st) => ParserT Sources st m (Future st MetaValue) @@ -133,47 +93,36 @@ normalizeMetaValue pMetaValue x = isSpaceChar '\t' = True isSpaceChar _ = False -checkBoolean :: Text -> Maybe Bool -checkBoolean t - | t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE" = Just True - | t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False - | otherwise = Nothing - yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st) => ParserT Sources st m (Future st MetaValue) - -> YAML.Node YE.Pos + -> Value -> ParserT Sources st m (Future st MetaValue) -yamlToMetaValue pMetaValue (YAML.Scalar _ x) = - case x of - YAML.SStr t -> normalizeMetaValue pMetaValue t - YAML.SBool b -> return $ return $ MetaBool b - YAML.SFloat d -> return $ return $ MetaString $ tshow d - YAML.SInt i -> return $ return $ MetaString $ tshow i - YAML.SUnknown _ t -> - case checkBoolean t of - Just b -> return $ return $ MetaBool b - Nothing -> normalizeMetaValue pMetaValue t - YAML.SNull -> return $ return $ MetaString "" - -yamlToMetaValue pMetaValue (YAML.Sequence _ _ xs) = - fmap MetaList . sequence - <$> mapM (yamlToMetaValue pMetaValue) xs -yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) = - fmap MetaMap <$> yamlMap pMetaValue o -yamlToMetaValue _ _ = return $ return $ MetaString "" +yamlToMetaValue pMetaValue v = + case v of + String t -> normalizeMetaValue pMetaValue t + Bool b -> return $ return $ MetaBool b + Number d -> normalizeMetaValue pMetaValue $ + case fromJSON v of + Success (x :: Int) -> tshow x + _ -> tshow d + Null -> return $ return $ MetaString "" + Array{} -> do + case fromJSON v of + Error err' -> throwError $ PandocParseError $ T.pack err' + Success xs -> fmap MetaList . sequence <$> + mapM (yamlToMetaValue pMetaValue) xs + Object o -> fmap MetaMap <$> yamlMap pMetaValue o yamlMap :: (PandocMonad m, HasLastStrPosition st) => ParserT Sources st m (Future st MetaValue) - -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) + -> Object -> ParserT Sources st m (Future st (M.Map Text MetaValue)) yamlMap pMetaValue o = do - kvs <- forM (M.toList o) $ \(key, v) -> do - k <- maybe (throwError $ PandocParseError - "Non-string key in YAML mapping") - return $ nodeToKey key - return (k, v) - let kvs' = filter (not . ignorable . fst) kvs - fmap M.fromList . sequence <$> mapM toMeta kvs' + case fromJSON (Object o) of + Error err' -> throwError $ PandocParseError $ T.pack err' + Success (m' :: M.Map Text Value) -> do + let kvs = filter (not . ignorable . fst) $ M.toList m' + fmap M.fromList . sequence <$> mapM toMeta kvs where ignorable t = "_" `T.isSuffixOf` t toMeta (k, v) = do @@ -194,7 +143,7 @@ yamlMetaBlock parser = try $ do -- by including --- and ..., we allow yaml blocks with just comments: let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml + yamlBsToMeta parser $ UTF8.fromText rawYaml stopLine :: Monad m => ParserT Sources st m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index f18d2f9a7..9a689b0e8 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -474,15 +474,16 @@ figure = try $ do figCaption = fromMaybe mempty $ blockAttrCaption figAttrs figKeyVals = blockAttrKeyValues figAttrs attr = (figLabel, mempty, figKeyVals) - figTitle = (if isFigure then withFigPrefix else id) figName - in - B.para . B.imageWith attr imgSrc figTitle <$> figCaption - - withFigPrefix :: Text -> Text - withFigPrefix cs = - if "fig:" `T.isPrefixOf` cs - then cs - else "fig:" <> cs + in if isFigure + then (\c -> + B.simpleFigureWith + attr c imgSrc (unstackFig figName)) <$> figCaption + else B.para . B.imageWith attr imgSrc figName <$> figCaption + unstackFig :: Text -> Text + unstackFig figName = + if "fig:" `T.isPrefixOf` figName + then T.drop 4 figName + else figName -- | Succeeds if looking at the end of the current paragraph endOfParagraph :: Monad m => OrgParser m () @@ -889,7 +890,10 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- T.concat <$> many (listContinuation markerLength) - contents <- parseFromString blocks $ firstLine <> blank <> rest + contents <- parseFromString (do initial <- paraOrPlain <|> pure mempty + subsequent <- blocks + return $ initial <> subsequent) + (firstLine <> blank <> rest) return (maybe id (prependInlines . checkboxToInlines) box <$> contents) -- | Prepend inlines to blocks, adding them to the first paragraph or diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index 2dcbecb1d..1c4f253cc 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -41,6 +41,7 @@ documentTree :: PandocMonad m -> OrgParser m (F Inlines) -> OrgParser m (F Headline) documentTree blocks inline = do + properties <- option mempty propertiesDrawer initialBlocks <- blocks headlines <- sequence <$> manyTill (headline blocks inline 1) eof title <- fmap docTitle . orgStateMeta <$> getState @@ -54,7 +55,7 @@ documentTree blocks inline = do , headlineText = B.fromList title' , headlineTags = mempty , headlinePlanning = emptyPlanning - , headlineProperties = mempty + , headlineProperties = properties , headlineContents = initialBlocks' , headlineChildren = headlines' } @@ -163,8 +164,15 @@ unprunedHeadlineToBlocks hdln st = in if not usingSelectedTags || any (`Set.member` orgStateSelectTags st) (headlineTags rootNode') then do headlineBlocks <- headlineToBlocks rootNode' + -- add metadata from root node :PROPERTIES: + updateState $ \s -> + s{ orgStateMeta = foldr + (\(PropertyKey k, PropertyValue v) m -> + B.setMeta k v <$> m) + (orgStateMeta s) + (headlineProperties rootNode') } -- ignore first headline, it's the document's title - return . drop 1 . B.toList $ headlineBlocks + return $ drop 1 $ B.toList headlineBlocks else do headlineBlocks <- mconcat <$> mapM headlineToBlocks (headlineChildren rootNode') return . B.toList $ headlineBlocks diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 6862dd71e..617f98a10 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -31,11 +31,10 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.TeXMath (DisplayType (..), readTeX, writePandoc) import Text.Pandoc.Sources (ToSources(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap - -import Control.Monad (guard, mplus, mzero, unless, void, when) +import Safe (lastMay) +import Control.Monad (guard, mplus, mzero, unless, when, void) import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) -import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T @@ -148,31 +147,177 @@ endline = try $ do -- Citations -- --- The state of citations is a bit confusing due to the lack of an official --- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the --- first to be implemented here and is almost identical to Markdown's citation --- syntax. The org-ref package is in wide use to handle citations, but the --- syntax is a bit limiting and not quite as simple to write. The --- semi-official Org-mode citation syntax is based on John MacFarlane's Pandoc --- sytax and Org-oriented enhancements contributed by Richard Lawrence and --- others. It's dubbed Berkeley syntax due the place of activity of its main --- contributors. All this should be consolidated once an official Org-mode --- citation syntax has emerged. +-- We first try to parse official org-cite citations, then fall +-- back to org-ref citations (which are still in wide use). + +-- | A citation in org-cite style +orgCite :: PandocMonad m => OrgParser m (F [Citation]) +orgCite = try $ do + string "[cite" + (sty, _variants) <- citeStyle + char ':' + spnl + globalPref <- option mempty (try (citePrefix <* char ';')) + items <- citeItems + globalSuff <- option mempty (try (char ';' *> citeSuffix)) + spnl + char ']' + return $ adjustCiteStyle sty . + addPrefixToFirstItem globalPref . + addSuffixToLastItem globalSuff $ items + +adjustCiteStyle :: CiteStyle -> (F [Citation]) -> (F [Citation]) +adjustCiteStyle sty cs = do + cs' <- cs + case cs' of + [] -> return [] + (d:ds) -- TODO needs refinement + -> case sty of + TextStyle -> return $ d{ citationMode = AuthorInText + , citationSuffix = dropWhile (== Space) + (citationSuffix d)} : ds + NoAuthorStyle -> return $ d{ citationMode = SuppressAuthor } : ds + _ -> return (d:ds) + +addPrefixToFirstItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) +addPrefixToFirstItem aff cs = do + cs' <- cs + aff' <- aff + case cs' of + [] -> return [] + (d:ds) -> return (d{ citationPrefix = + B.toList aff' <> citationPrefix d }:ds) + +addSuffixToLastItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) +addSuffixToLastItem aff cs = do + cs' <- cs + aff' <- aff + case lastMay cs' of + Nothing -> return cs' + Just d -> + return (init cs' ++ [d{ citationSuffix = + citationSuffix d <> B.toList aff' }]) + +citeItems :: PandocMonad m => OrgParser m (F [Citation]) +citeItems = sequence <$> citeItem `sepBy1` (char ';') + +citeItem :: PandocMonad m => OrgParser m (F Citation) +citeItem = do + pref <- citePrefix + itemKey <- orgCiteKey + suff <- citeSuffix + return $ do + pre' <- pref + suf' <- suff + return Citation + { citationId = itemKey + , citationPrefix = B.toList pre' + , citationSuffix = B.toList suf' + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + +orgCiteKey :: PandocMonad m => OrgParser m Text +orgCiteKey = do + char '@' + T.pack <$> many1 (satisfy orgCiteKeyChar) + +orgCiteKeyChar :: Char -> Bool +orgCiteKeyChar c = + isAlphaNum c || c `elem` ['.',':','?','!','`','\'','/','*','@','+','|', + '(',')','{','}','<','>','&','_','^','$','#', + '%','~','-'] + +rawAffix :: PandocMonad m => Bool -> OrgParser m Text +rawAffix isPrefix = snd <$> withRaw + (many + (affixChar + <|> + try (void (char '[' >> rawAffix isPrefix >> char ']')))) + where + affixChar = void $ satisfy $ \c -> + not (c == '^' || c == ';' || c == '[' || c == ']') && + (not isPrefix || c /= '@') + +citePrefix :: PandocMonad m => OrgParser m (F Inlines) +citePrefix = + rawAffix True >>= parseFromString (trimInlinesF . mconcat <$> many inline) + +citeSuffix :: PandocMonad m => OrgParser m (F Inlines) +citeSuffix = + rawAffix False >>= parseFromString parseSuffix + where + parseSuffix = do + hasSpace <- option False + (True <$ try (spaceChar >> skipSpaces >> lookAhead nonspaceChar)) + ils <- trimInlinesF . mconcat <$> many inline + return $ if hasSpace + then (B.space <>) <$> ils + else ils + +citeStyle :: PandocMonad m => OrgParser m (CiteStyle, [CiteVariant]) +citeStyle = option (DefStyle, []) $ do + sty <- option DefStyle $ try $ char '/' *> orgCiteStyle + variants <- option [] $ try $ char '/' *> orgCiteVariants + return (sty, variants) + +orgCiteStyle :: PandocMonad m => OrgParser m CiteStyle +orgCiteStyle = choice $ map try + [ NoAuthorStyle <$ string "noauthor" + , NoAuthorStyle <$ string "na" + , LocatorsStyle <$ string "locators" + , LocatorsStyle <$ char 'l' + , NociteStyle <$ string "nocite" + , NociteStyle <$ char 'n' + , TextStyle <$ string "text" + , TextStyle <$ char 't' + ] + +orgCiteVariants :: PandocMonad m => OrgParser m [CiteVariant] +orgCiteVariants = + (fullnameVariant `sepBy1` (char '-')) <|> (many1 onecharVariant) + where + fullnameVariant = choice $ map try + [ Bare <$ string "bare" + , Caps <$ string "caps" + , Full <$ string "full" + ] + onecharVariant = choice + [ Bare <$ char 'b' + , Caps <$ char 'c' + , Full <$ char 'f' + ] + +data CiteStyle = + NoAuthorStyle + | LocatorsStyle + | NociteStyle + | TextStyle + | DefStyle + deriving Show + +data CiteVariant = + Caps + | Bare + | Full + deriving Show + + +spnl :: PandocMonad m => OrgParser m () +spnl = + skipSpaces *> optional (newline *> notFollowedBy blankline *> skipSpaces) cite :: PandocMonad m => OrgParser m (F Inlines) -cite = try $ berkeleyCite <|> do +cite = do guardEnabled Ext_citations - (cs, raw) <- withRaw $ choice - [ pandocOrgCite + (cs, raw) <- withRaw $ try $ choice + [ orgCite , orgRefCite - , berkeleyTextualCite ] return $ flip B.cite (B.text raw) <$> cs --- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). -pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) -pandocOrgCite = try $ - char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' +-- org-ref orgRefCite :: PandocMonad m => OrgParser m (F [Citation]) orgRefCite = try $ choice @@ -201,100 +346,6 @@ normalOrgRefCite = try $ do , citationHash = 0 } --- | Read an Berkeley-style Org-mode citation. Berkeley citation style was --- develop and adjusted to Org-mode style by John MacFarlane and Richard --- Lawrence, respectively, both philosophers at UC Berkeley. -berkeleyCite :: PandocMonad m => OrgParser m (F Inlines) -berkeleyCite = try $ do - bcl <- berkeleyCitationList - return $ do - parens <- berkeleyCiteParens <$> bcl - prefix <- berkeleyCiteCommonPrefix <$> bcl - suffix <- berkeleyCiteCommonSuffix <$> bcl - citationList <- berkeleyCiteCitations <$> bcl - return $ - if parens - then toCite - . maybe id (alterFirst . prependPrefix) prefix - . maybe id (alterLast . appendSuffix) suffix - $ citationList - else maybe mempty (<> " ") prefix - <> toListOfCites (map toInTextMode citationList) - <> maybe mempty (", " <>) suffix - where - toCite :: [Citation] -> Inlines - toCite cs = B.cite cs mempty - - toListOfCites :: [Citation] -> Inlines - toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) - - toInTextMode :: Citation -> Citation - toInTextMode c = c { citationMode = AuthorInText } - - alterFirst, alterLast :: (a -> a) -> [a] -> [a] - alterFirst _ [] = [] - alterFirst f (c:cs) = f c : cs - alterLast f = reverse . alterFirst f . reverse - - prependPrefix, appendSuffix :: Inlines -> Citation -> Citation - prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c } - appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf } - -data BerkeleyCitationList = BerkeleyCitationList - { berkeleyCiteParens :: Bool - , berkeleyCiteCommonPrefix :: Maybe Inlines - , berkeleyCiteCommonSuffix :: Maybe Inlines - , berkeleyCiteCitations :: [Citation] - } -berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList) -berkeleyCitationList = try $ do - char '[' - parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] - char ':' - skipSpaces - commonPrefix <- optionMaybe (try $ citationListPart <* char ';') - citations <- citeList - commonSuffix <- optionMaybe (try citationListPart) - char ']' - return (BerkeleyCitationList parens - <$> sequence commonPrefix - <*> sequence commonSuffix - <*> citations) - where - citationListPart :: PandocMonad m => OrgParser m (F Inlines) - citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do - notFollowedBy' $ citeKey False - notFollowedBy (oneOf ";]") - inline - -berkeleyBareTag :: PandocMonad m => OrgParser m () -berkeleyBareTag = try $ void berkeleyBareTag' - -berkeleyParensTag :: PandocMonad m => OrgParser m () -berkeleyParensTag = try . void $ enclosedByPair1 '(' ')' berkeleyBareTag' - -berkeleyBareTag' :: PandocMonad m => OrgParser m () -berkeleyBareTag' = try $ void (string "cite") - -berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) -berkeleyTextualCite = try $ do - (suppressAuthor, key) <- citeKey False - returnF . return $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText - , citationNoteNum = 0 - , citationHash = 0 - } - --- The following is what a Berkeley-style bracketed textual citation parser --- would look like. However, as these citations are a subset of Pandoc's Org --- citation style, this isn't used. --- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation]) --- berkeleyBracketedTextualCite = try . (fmap head) $ --- enclosedByPair1 '[' ']' berkeleyTextualCite - -- | Read a link-like org-ref style citation. The citation includes pre and -- post text. However, multiple citations are not possible due to limitations -- in the syntax. @@ -345,39 +396,6 @@ orgRefCiteMode = , ("citeyear", SuppressAuthor) ] -citeList :: PandocMonad m => OrgParser m (F [Citation]) -citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) - -citation :: PandocMonad m => OrgParser m (F Citation) -citation = try $ do - pref <- prefix - (suppress_author, key) <- citeKey False - suff <- suffix - return $ do - x <- pref - y <- suff - return Citation - { citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - where - prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False))) - suffix = try $ do - hasSpace <- option False (notFollowedBy nonspaceChar >> return True) - skipSpaces - rest <- trimInlinesF . mconcat <$> - many (notFollowedBy (oneOf ";]") *> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest - footnote :: PandocMonad m => OrgParser m (F Inlines) footnote = try $ do note <- inlineNote <|> referencedNote diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index a1b21046a..ccb6744e7 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -27,13 +27,13 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Shared (blocksToInlines, safeRead) +import Text.Pandoc.Network.HTTP (urlEncode) import Control.Monad (mzero, void) import Data.List (intercalate, intersperse) import Data.Map (Map) import Data.Maybe (fromMaybe) import Data.Text (Text) -import Network.HTTP (urlEncode) import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.Text as T @@ -188,7 +188,7 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend -- inefficient replacePlain = try $ (\x -> T.concat . flip intersperse x) <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack) + replaceUrl = try $ (\x -> T.concat . flip intersperse x . urlEncode) <$> sequence [tillSpecifier 'h', rest] justAppend = try $ (<>) <$> rest diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 3990f0cb5..88471eb0a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -466,14 +466,11 @@ includeDirective top fields body = do let classes = maybe [] T.words (lookup "class" fields) let ident = maybe "" trimr $ lookup "name" fields let parser = - case lookup "code" fields of + case lookup "code" fields `mplus` lookup "literal" fields of Just lang -> (codeblock ident classes fields (trimr lang) False . sourcesToText) <$> getInput - Nothing -> - case lookup "literal" fields of - Just _ -> B.rawBlock "rst" . sourcesToText <$> getInput - Nothing -> parseBlocks + Nothing -> parseBlocks let isLiteral = isJust (lookup "code" fields `mplus` lookup "literal" fields) let selectLines = (case trim <$> lookup "end-before" fields of @@ -728,8 +725,8 @@ directive' = do "figure" -> do (caption, legend) <- parseFromString' extractCaption body' let src = escapeURI $ trim top - return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" - caption) <> legend + return $ B.simpleFigureWith + (imgAttr "figclass") caption src "" <> legend "image" -> do let src = escapeURI $ trim top let alt = B.str $ maybe "image" trim $ lookup "alt" fields @@ -922,14 +919,22 @@ addNewRole roleText fields = do (baseRole, baseFmt, baseAttr) = getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt - annotate :: [Text] -> [Text] - annotate = maybe id (:) $ - if baseRole == "code" - then lookup "language" fields - else Nothing - attr = let (ident, classes, keyValues) = baseAttr - -- nub in case role name & language class are the same - in (ident, nub . (role :) . annotate $ classes, keyValues) + + updateClasses :: [Text] -> [Text] + updateClasses oldClasses = let + + codeLanguageClass = if baseRole == "code" + then maybeToList (lookup "language" fields) + else [] + + -- if no ":class:" field is given, the default is the role name + classFieldClasses = maybe [role] T.words (lookup "class" fields) + + -- nub in case role name & language class are the same + in nub (classFieldClasses ++ codeLanguageClass ++ oldClasses) + + attr = let (ident, baseClasses, keyValues) = baseAttr + in (ident, updateClasses baseClasses, keyValues) -- warn about syntax we ignore forM_ fields $ \(key, _) -> case key of @@ -1158,10 +1163,11 @@ referenceNames = do let rn = try $ do string ".. _" ref <- quotedReferenceName - <|> manyChar ( noneOf ":\n" + <|> manyChar ( noneOf "\\:\n" <|> try (char '\n' <* string " " <* notFollowedBy blankline) + <|> try (char '\\' *> char ':') <|> try (char ':' <* lookAhead alphaNum) ) char ':' diff --git a/src/Text/Pandoc/Readers/RTF.hs b/src/Text/Pandoc/Readers/RTF.hs new file mode 100644 index 000000000..3938681f4 --- /dev/null +++ b/src/Text/Pandoc/Readers/RTF.hs @@ -0,0 +1,1351 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Readers.RTF + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane (<jgm@berkeley.edu>) + Stability : alpha + Portability : portable + +Conversion of RTF documents 'Pandoc' document. +We target version 1.5 of the RTF spec. +-} +module Text.Pandoc.Readers.RTF (readRTF) where + +import qualified Data.IntMap as IntMap +import qualified Data.Sequence as Seq +import Control.Monad +import Control.Monad.Except (throwError) +import Data.List (find, foldl') +import Data.Word (Word8, Word16) +import Data.Default +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Read as TR +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class.PandocMonad (PandocMonad (..), insertMedia) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing +import Text.Pandoc.Shared (safeRead, tshow) +import Data.Char (isAlphaNum, chr, isAscii, isLetter, isSpace, ord) +import qualified Data.ByteString.Lazy as BL +import Data.Digest.Pure.SHA (sha1, showDigest) +import Data.Maybe (mapMaybe, fromMaybe) +import Safe (lastMay, initSafe, headDef) +-- import Debug.Trace + +-- TODO: +-- [ ] more complex table features +-- + +-- | Read RTF from an input string and return a Pandoc document. +readRTF :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc +readRTF opts s = do + let sources = toSources s + parsed <- readWithM parseRTF def{ sOptions = opts } sources + case parsed of + Left e -> throwError e + Right d -> return d + +data CharSet = ANSI | Mac | Pc | Pca + deriving (Show, Eq) + +-- first index is the list (or override) id, second is the list level +type ListTable = IntMap.IntMap ListLevelTable +type ListLevelTable = IntMap.IntMap ListType + +data RTFState = RTFState { sOptions :: ReaderOptions + , sCharSet :: CharSet + , sGroupStack :: [Properties] + , sListStack :: [List] + , sCurrentCell :: Blocks + , sTableRows :: [TableRow] -- reverse order + , sTextContent :: [(Properties, Text)] + , sMetadata :: [(Text, Inlines)] + , sFontTable :: FontTable + , sStylesheet :: Stylesheet + , sListTable :: ListTable + , sListOverrideTable :: ListTable + , sEatChars :: Int + } deriving (Show) + +instance Default RTFState where + def = RTFState { sOptions = def + , sCharSet = ANSI + , sGroupStack = [] + , sListStack = [] + , sCurrentCell = mempty + , sTableRows = [] + , sTextContent = [] + , sMetadata = [] + , sFontTable = mempty + , sStylesheet = mempty + , sListTable = mempty + , sListOverrideTable = mempty + , sEatChars = 0 + } + +type FontTable = IntMap.IntMap FontFamily + +data FontFamily = + Roman | Swiss | Modern | Script | Decor | Tech | Bidi + deriving (Show, Eq) + +data StyleType = ParagraphStyle | SectionStyle | CharStyle | TableStyle + deriving (Show, Eq) + +data Style = + Style { styleNum :: Int + , styleType :: StyleType + , styleBasedOn :: Maybe Int + , styleName :: Text + , styleFormatting :: [Tok] + } deriving (Show, Eq) + +type Stylesheet = IntMap.IntMap Style + +data PictType = + Emfblip | Pngblip | Jpegblip + deriving (Show, Eq) + +data Pict = + Pict { picType :: Maybe PictType + , picWidth :: Maybe Int + , picHeight :: Maybe Int + , picWidthGoal :: Maybe Int + , picHeightGoal :: Maybe Int + , picBinary :: Bool + , picData :: Text + , picName :: Text + , picBytes :: BL.ByteString + } deriving (Show, Eq) + +instance Default Pict where + def = Pict { picType = Nothing + , picWidth = Nothing + , picHeight = Nothing + , picWidthGoal = Nothing + , picHeightGoal = Nothing + , picBinary = False + , picData = mempty + , picName = mempty + , picBytes = mempty } + +data Properties = + Properties + { gBold :: Bool + , gItalic :: Bool + , gCaps :: Bool + , gDeleted :: Bool + , gSub :: Bool + , gSuper :: Bool + , gSmallCaps :: Bool + , gUnderline :: Bool + , gHyperlink :: Maybe Text + , gAnchor :: Maybe Text + , gImage :: Maybe Pict + , gFontFamily :: Maybe FontFamily + , gHidden :: Bool + , gUC :: Int -- number of ansi chars to skip after unicode char + , gFootnote :: Maybe Blocks + , gOutlineLevel :: Maybe ListLevel + , gListOverride :: Maybe Override + , gListLevel :: Maybe Int + , gInTable :: Bool + } deriving (Show, Eq) + +instance Default Properties where + def = Properties { gBold = False + , gItalic = False + , gCaps = False + , gDeleted = False + , gSub = False + , gSuper = False + , gSmallCaps = False + , gUnderline = False + , gHyperlink = Nothing + , gAnchor = Nothing + , gImage = Nothing + , gFontFamily = Nothing + , gHidden = False + , gUC = 1 + , gFootnote = Nothing + , gOutlineLevel = Nothing + , gListOverride = Nothing + , gListLevel = Nothing + , gInTable = False + } + +type RTFParser m = ParserT Sources RTFState m + +data ListType = Bullet | Ordered ListAttributes + deriving (Show, Eq) + +type Override = Int + +type ListLevel = Int + +data List = + List Override ListLevel ListType [Blocks] -- items in reverse order + deriving (Show, Eq) + +newtype TableRow = TableRow [Blocks] -- cells in reverse order + deriving (Show, Eq) + +parseRTF :: PandocMonad m => RTFParser m Pandoc +parseRTF = do + skipMany nl + toks <- many tok + -- return $! traceShowId toks + bs <- (case toks of + -- if we start with {\rtf1...}, parse that and ignore + -- what follows (which in certain cases can be non-RTF content) + rtftok@(Tok _ (Grouped (Tok _ (ControlWord "rtf" (Just 1)) : _))) : _ + -> foldM processTok mempty [rtftok] + _ -> foldM processTok mempty toks) + >>= emitBlocks + unclosed <- closeContainers + let doc = B.doc $ bs <> unclosed + kvs <- sMetadata <$> getState + pure $ foldr (uncurry B.setMeta) doc kvs + +data Tok = Tok SourcePos TokContents + deriving (Show, Eq) + +data TokContents = + ControlWord Text (Maybe Int) + | ControlSymbol Char + | UnformattedText Text + | BinData BL.ByteString + | HexVal Word8 + | Grouped [Tok] + deriving (Show, Eq) + +tok :: PandocMonad m => RTFParser m Tok +tok = do + pos <- getPosition + Tok pos <$> ((controlThing <|> unformattedText <|> grouped) <* skipMany nl) + where + controlThing = do + char '\\' *> + ( binData + <|> (ControlWord <$> letterSequence <*> (parameter <* optional delimChar)) + <|> (HexVal <$> hexVal) + <|> (ControlSymbol <$> anyChar) ) + binData = try $ do + string "bin" <* notFollowedBy letter + n <- fromMaybe 0 <$> parameter + spaces + -- NOTE: We assume here that if the document contains binary + -- data, it will not be valid UTF-8 and hence it will have been + -- read as latin1, so we can recover the data in the following + -- way. This is probably not completely reliable, but I don't + -- know if we can do better without making this reader take + -- a ByteString input. + dat <- BL.pack . map (fromIntegral . ord) <$> count n anyChar + return $ BinData dat + parameter = do + hyph <- string "-" <|> pure "" + rest <- many digit + let pstr = T.pack $ hyph <> rest + return $ safeRead pstr + hexVal = do + char '\'' + x <- hexDigit + y <- hexDigit + return $ hexToWord (T.pack [x,y]) + letterSequence = T.pack <$> many1 (satisfy (\c -> isAscii c && isLetter c)) + unformattedText = + UnformattedText . T.pack . mconcat <$> + many1 ( many1 (satisfy (not . isSpecial)) + <|> ("" <$ nl)) + grouped = Grouped <$> (char '{' *> skipMany nl *> manyTill tok (char '}')) + +nl :: PandocMonad m => RTFParser m () +nl = void (char '\n' <|> char '\r') + +isSpecial :: Char -> Bool +isSpecial '{' = True +isSpecial '}' = True +isSpecial '\\' = True +isSpecial '\n' = True +isSpecial _ = False + +delimChar :: PandocMonad m => RTFParser m Char +delimChar = satisfy (\c -> not (isAlphaNum c || isSpecial c)) + +modifyGroup :: PandocMonad m + => (Properties -> Properties) + -> RTFParser m () +modifyGroup f = + updateState $ \st -> + st{ sGroupStack = + case sGroupStack st of + [] -> [] + (x:xs) -> f x : xs } + +addFormatting :: (Properties, Text) -> Inlines +addFormatting (_, "\n") = B.linebreak +addFormatting (props, _) | gHidden props = mempty +addFormatting (props, _) | Just bs <- gFootnote props = B.note bs +addFormatting (props, txt) = + (if gBold props then B.strong else id) . + (if gItalic props then B.emph else id) . + (if gDeleted props then B.strikeout else id) . + (if gSub props then B.subscript else id) . + (if gSuper props then B.superscript else id) . + (if gSmallCaps props then B.smallcaps else id) . + (if gUnderline props then B.underline else id) . + (case gHyperlink props of + Nothing -> id + Just linkdest -> B.link linkdest mempty) . + (case gAnchor props of + Nothing -> id + Just ident -> B.spanWith (ident,[],[])) . + (case gFontFamily props of + Just Modern -> B.code + _ -> case gImage props of + Just pict -> + let attr = ("",[], + (case picWidthGoal pict of + Nothing -> [] + Just w -> [("width", tshow (fromIntegral w / 1440 + :: Double) + <> "in")]) ++ + (case picHeightGoal pict of + Nothing -> [] + Just h -> [("height", tshow (fromIntegral h / 1440 + :: Double) + <> "in")])) + in B.imageWith attr (picName pict) "" . B.text + Nothing -> B.text) . + (if gCaps props then T.toUpper else id) + $ txt + +addText :: PandocMonad m => Text -> RTFParser m () +addText t = do + gs <- sGroupStack <$> getState + let props = case gs of + (x:_) -> x + _ -> def + updateState (\s -> s{ sTextContent = (props, t) : sTextContent s }) + +inGroup :: PandocMonad m => RTFParser m a -> RTFParser m a +inGroup p = do + updateState $ \st -> + st{ sGroupStack = + case sGroupStack st of + [] -> [def] + (x:xs) -> (x:x:xs) } -- inherit current group's properties + result <- p + updateState $ \st -> + st{ sGroupStack = + case sGroupStack st of + [] -> [] -- should not happen + (_:xs) -> xs } + return result + +getStyleFormatting :: PandocMonad m => Int -> RTFParser m [Tok] +getStyleFormatting stynum = do + stylesheet <- sStylesheet <$> getState + case IntMap.lookup stynum stylesheet of + Nothing -> return [] + Just sty -> + case styleBasedOn sty of + Just i -> (<> styleFormatting sty) <$> getStyleFormatting i + Nothing -> return $ styleFormatting sty + +isMetadataField :: Text -> Bool +isMetadataField "title" = True +isMetadataField "subject" = True +isMetadataField "author" = True +isMetadataField "manager" = True +isMetadataField "company" = True +isMetadataField "operator" = True +isMetadataField "category" = True +isMetadataField "keywords" = True +isMetadataField "comment" = True +isMetadataField "doccomm" = True +isMetadataField "hlinkbase" = True +isMetadataField "generator" = True +isMetadataField _ = False + +isHeaderFooter :: Text -> Bool +isHeaderFooter "header" = True +isHeaderFooter "headerl" = True +isHeaderFooter "headerr" = True +isHeaderFooter "headerf" = True +isHeaderFooter "footer" = True +isHeaderFooter "footerl" = True +isHeaderFooter "footerr" = True +isHeaderFooter "footerf" = True +isHeaderFooter _ = False + +boolParam :: Maybe Int -> Bool +boolParam (Just 0) = False +boolParam _ = True + +isUnderline :: Text -> Bool +isUnderline "ul" = True +isUnderline "uld" = True +isUnderline "uldash" = True +isUnderline "uldashd" = True +isUnderline "uldashdd" = True +isUnderline "uldb" = True +isUnderline "ulth" = True +isUnderline "ulthd" = True +isUnderline "ulthdash" = True +isUnderline "ulw" = True +isUnderline "ulwave" = True +isUnderline _ = False + +processTok :: PandocMonad m => Blocks -> Tok -> RTFParser m Blocks +processTok bs (Tok pos tok') = do + setPosition pos + case tok' of + HexVal{} -> return () + UnformattedText{} -> return () + _ -> updateState $ \s -> s{ sEatChars = 0 } + case tok' of + Grouped (Tok _ (ControlSymbol '*') : toks) -> + bs <$ (do oldTextContent <- sTextContent <$> getState + processTok mempty (Tok pos (Grouped toks)) + updateState $ \st -> st{ sTextContent = oldTextContent }) + Grouped (Tok _ (ControlWord "fonttbl" _) : toks) -> inGroup $ do + updateState $ \s -> s{ sFontTable = processFontTable toks } + pure bs + Grouped (Tok _ (ControlWord "field" _) : toks) -> + inGroup $ handleField bs toks + Grouped (Tok _ (ControlWord "pict" _) : toks) -> + bs <$ inGroup (handlePict toks) + Grouped (Tok _ (ControlWord "stylesheet" _) : toks) -> + bs <$ inGroup (handleStylesheet toks) + Grouped (Tok _ (ControlWord "listtext" _) : _) -> do + -- eject any previous list items...sometimes TextEdit + -- doesn't put in a \par + emitBlocks bs + Grouped (Tok _ (ControlWord "pgdsc" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "colortbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "listtable" _) : toks) -> + bs <$ inGroup (handleListTable toks) + Grouped (Tok _ (ControlWord "listoverridetable" _) : toks) -> + bs <$ inGroup (handleListOverrideTable toks) + Grouped (Tok _ (ControlWord "wgrffmtfilter" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "themedata" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "colorschememapping" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "datastore" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "latentstyles" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "pntxta" _) : _) -> pure bs -- TODO + Grouped (Tok _ (ControlWord "pntxtb" _) : _) -> pure bs -- TODO + Grouped (Tok _ (ControlWord "xmlnstbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "filetbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "expandedcolortbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "listtables" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "revtbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "bkmkstart" _) + : Tok _ (UnformattedText t) : _) -> do + -- TODO ideally we'd put the span around bkmkstart/end, but this + -- is good for now: + modifyGroup (\g -> g{ gAnchor = Just $ T.strip t }) + pure bs + Grouped (Tok _ (ControlWord "bkmkend" _) : _) -> do + modifyGroup (\g -> g{ gAnchor = Nothing }) + pure bs + Grouped (Tok _ (ControlWord f _) : _) | isHeaderFooter f -> pure bs + Grouped (Tok _ (ControlWord "footnote" _) : toks) -> do + noteBs <- inGroup $ processDestinationToks toks + modifyGroup (\g -> g{ gFootnote = Just noteBs }) + addText "*" + modifyGroup (\g -> g{ gFootnote = Nothing }) + return bs + Grouped (Tok _ (ControlWord "info" _) : toks) -> + bs <$ inGroup (processDestinationToks toks) + Grouped (Tok _ (ControlWord f _) : toks) | isMetadataField f -> inGroup $ do + foldM_ processTok mempty toks + annotatedToks <- reverse . sTextContent <$> getState + updateState $ \s -> s{ sTextContent = [] } + let ils = B.trimInlines . mconcat $ map addFormatting annotatedToks + updateState $ \s -> s{ sMetadata = (f, ils) : sMetadata s } + pure bs + Grouped toks -> inGroup (foldM processTok bs toks) + UnformattedText t -> bs <$ do + -- return $! traceShowId $! (pos, t) + eatChars <- sEatChars <$> getState + case eatChars of + 0 -> addText t + n | n < T.length t -> do + updateState $ \s -> s{ sEatChars = 0 } + addText (T.drop n t) + | otherwise -> do + updateState $ \s -> s{ sEatChars = n - T.length t } + HexVal n -> bs <$ do + eatChars <- sEatChars <$> getState + if eatChars == 0 + then do + charset <- sCharSet <$> getState + case charset of + ANSI -> addText (T.singleton $ ansiToChar n) + Mac -> addText (T.singleton $ macToChar n) + Pc -> addText (T.singleton $ pcToChar n) + Pca -> addText (T.singleton $ pcaToChar n) + else updateState $ \s -> s{ sEatChars = eatChars - 1 } + ControlWord "ansi" _ -> bs <$ + updateState (\s -> s{ sCharSet = ANSI }) + ControlWord "mac" _ -> bs <$ + updateState (\s -> s{ sCharSet = Mac }) + ControlWord "pc" _ -> bs <$ + updateState (\s -> s{ sCharSet = Pc }) + ControlWord "pca" _ -> bs <$ + updateState (\s -> s{ sCharSet = Pca }) + ControlWord "outlinelevel" mbp -> bs <$ + modifyGroup (\g -> g{ gOutlineLevel = mbp }) + ControlWord "ls" mbp -> bs <$ + modifyGroup (\g -> g{ gListOverride = mbp }) + ControlWord "ilvl" mbp -> bs <$ + modifyGroup (\g -> g{ gListLevel = mbp }) + ControlSymbol '\\' -> bs <$ addText "\\" + ControlSymbol '{' -> bs <$ addText "{" + ControlSymbol '}' -> bs <$ addText "}" + ControlSymbol '~' -> bs <$ addText "\x00a0" + ControlSymbol '-' -> bs <$ addText "\x00ad" + ControlSymbol '_' -> bs <$ addText "\x2011" + ControlWord "trowd" _ -> bs <$ do -- add new row + updateState $ \s -> s{ sTableRows = TableRow [] : sTableRows s + , sCurrentCell = mempty } + ControlWord "cell" _ -> bs <$ do + new <- emitBlocks mempty + curCell <- (<> new) . sCurrentCell <$> getState + updateState $ \s -> s{ sTableRows = + case sTableRows s of + TableRow cs : rs -> + TableRow (curCell : cs) : rs + [] -> [TableRow [curCell]] -- shouldn't happen + , sCurrentCell = mempty } + ControlWord "intbl" _ -> bs <$ modifyGroup (\g -> g{ gInTable = True }) + ControlWord "plain" _ -> bs <$ modifyGroup (const def) + ControlWord "lquote" _ -> bs <$ addText "\x2018" + ControlWord "rquote" _ -> bs <$ addText "\x2019" + ControlWord "ldblquote" _ -> bs <$ addText "\x201C" + ControlWord "rdblquote" _ -> bs <$ addText "\x201D" + ControlWord "emdash" _ -> bs <$ addText "\x2014" + ControlWord "emspace" _ -> bs <$ addText "\x2003" + ControlWord "enspace" _ -> bs <$ addText "\x2002" + ControlWord "endash" _ -> bs <$ addText "\x2013" + ControlWord "bullet" _ -> bs <$ addText "\x2022" + ControlWord "tab" _ -> bs <$ addText "\t" + ControlWord "line" _ -> bs <$ addText "\n" + ControlSymbol '\n' -> bs <$ addText "\n" + ControlSymbol '\r' -> bs <$ addText "\n" + ControlWord "uc" (Just i) -> bs <$ modifyGroup (\g -> g{ gUC = i }) + ControlWord "cs" (Just n) -> do + getStyleFormatting n >>= foldM processTok bs + ControlWord "s" (Just n) -> do + getStyleFormatting n >>= foldM processTok bs + ControlWord "ds" (Just n) -> do + getStyleFormatting n >>= foldM processTok bs + ControlWord "f" (Just i) -> bs <$ do + fontTable <- sFontTable <$> getState + modifyGroup (\g -> g{ gFontFamily = IntMap.lookup i fontTable }) + ControlWord "u" (Just i) -> bs <$ do + st <- getState + let curgroup = case sGroupStack st of + [] -> def + (x:_) -> x + updateState $ \s -> s{ sEatChars = gUC curgroup } + -- "RTF control words generally accept signed 16-bit numbers as + -- arguments. For this reason, Unicode values greater than 32767 + -- must be expressed as negative numbers." + let codepoint :: Word16 + codepoint = fromIntegral i + addText (T.singleton (chr $ fromIntegral codepoint)) + ControlWord "caps" mbp -> bs <$ + modifyGroup (\g -> g{ gCaps = boolParam mbp }) + ControlWord "deleted" mbp -> bs <$ + modifyGroup (\g -> g{ gDeleted = boolParam mbp }) + ControlWord "b" mbp -> bs <$ + modifyGroup (\g -> g{ gBold = boolParam mbp }) + ControlWord "i" mbp -> bs <$ + modifyGroup (\g -> g{ gItalic = boolParam mbp }) + ControlWord "sub" mbp -> bs <$ + modifyGroup (\g -> g{ gSub = boolParam mbp }) + ControlWord "super" mbp -> bs <$ + modifyGroup (\g -> g{ gSuper = boolParam mbp }) + ControlWord "up" mbp -> bs <$ + modifyGroup (\g -> g{ gSuper = boolParam mbp }) + ControlWord "strike" mbp -> bs <$ + modifyGroup (\g -> g{ gDeleted = boolParam mbp }) + ControlWord "strikedl" mbp -> bs <$ + modifyGroup (\g -> g{ gDeleted = boolParam mbp }) + ControlWord "striked" mbp -> bs <$ + modifyGroup (\g -> g{ gDeleted = boolParam mbp }) + ControlWord "scaps" mbp -> bs <$ + modifyGroup (\g -> g{ gSmallCaps = boolParam mbp }) + ControlWord "v" mbp -> bs <$ + modifyGroup (\g -> g{ gHidden = boolParam mbp }) + ControlWord x mbp | isUnderline x -> bs <$ + modifyGroup (\g -> g{ gUnderline = boolParam mbp }) + ControlWord "ulnone" _ -> bs <$ + modifyGroup (\g -> g{ gUnderline = False }) + ControlWord "pard" _ -> bs <$ do + modifyGroup (const def) + getStyleFormatting 0 >>= foldM processTok bs + ControlWord "par" _ -> emitBlocks bs + _ -> pure bs + +processDestinationToks :: PandocMonad m => [Tok] -> RTFParser m Blocks +processDestinationToks toks = do + textContent <- sTextContent <$> getState + liststack <- sListStack <$> getState + updateState $ \s -> s{ sTextContent = mempty + , sListStack = [] } + result <- inGroup $ + foldM processTok mempty toks >>= emitBlocks + unclosed <- closeContainers + updateState $ \s -> s{ sTextContent = textContent + , sListStack = liststack } + return $ result <> unclosed + +-- close lists >= level +closeLists :: PandocMonad m => Int -> RTFParser m Blocks +closeLists lvl = do + lists <- sListStack <$> getState + case lists of + (List _ lvl' lt items : rest) | lvl' >= lvl -> do + let newlist = (case lt of + Bullet -> B.bulletList + Ordered listAttr -> B.orderedListWith listAttr) + (reverse items) + updateState $ \s -> s{ sListStack = rest } + case rest of + [] -> do + updateState $ \s -> s{ sListStack = rest } + pure newlist + (List lo lvl'' lt' [] : rest') -> do -- should not happen + updateState $ \s -> s{ sListStack = + List lo lvl'' lt' [newlist] : rest' } + closeLists lvl + (List lo lvl'' lt' (i:is) : rest') -> do + updateState $ \s -> s{ sListStack = + List lo lvl'' lt' (i <> newlist : is) : rest' } + closeLists lvl + _ -> pure mempty + +closeTable :: PandocMonad m => RTFParser m Blocks +closeTable = do + rawrows <- sTableRows <$> getState + if null rawrows + then return mempty + else do + let getCells (TableRow cs) = reverse cs + let rows = map getCells . reverse $ rawrows + updateState $ \s -> s{ sCurrentCell = mempty + , sTableRows = [] } + return $ B.simpleTable [] rows + +closeContainers :: PandocMonad m => RTFParser m Blocks +closeContainers = do + tbl <- closeTable + lists <- closeLists 0 + return $ tbl <> lists + +trimFinalLineBreak :: Inlines -> Inlines +trimFinalLineBreak ils = + case Seq.viewr (B.unMany ils) of + rest Seq.:> LineBreak -> B.Many rest + _ -> ils + +emitBlocks :: PandocMonad m => Blocks -> RTFParser m Blocks +emitBlocks bs = do + annotatedToks <- reverse . sTextContent <$> getState + updateState $ \s -> s{ sTextContent = [] } + let justCode = def{ gFontFamily = Just Modern } + let prop = case annotatedToks of + [] -> def + ((p,_):_) -> p + tbl <- if gInTable prop || null annotatedToks + then pure mempty + else closeTable + new <- + case annotatedToks of + [] -> pure mempty + _ | Just lst <- gListOverride prop + -> do + let level = fromMaybe 0 $ gListLevel prop + listOverrideTable <- sListOverrideTable <$> getState + let listType = fromMaybe Bullet $ + IntMap.lookup lst listOverrideTable >>= IntMap.lookup level + lists <- sListStack <$> getState + -- get para contents of list item + let newbs = B.para . B.trimInlines . trimFinalLineBreak . mconcat $ + map addFormatting annotatedToks + case lists of + (List lo parentlevel _lt items : cs) + | lo == lst + , parentlevel == level + -- add another item to existing list + -> do updateState $ \s -> + s{ sListStack = + List lo level listType (newbs:items) : cs } + pure mempty + | lo /= lst || level < parentlevel + -- close parent list and add new list + -> do new <- closeLists level -- close open lists > level + updateState $ \s -> + s{ sListStack = List lst level listType [newbs] : + sListStack s } + pure new + _ -> do -- add new list (level > parentlevel) + updateState $ \s -> + s{ sListStack = List lst level listType [newbs] : + sListStack s } + pure mempty + | Just lvl <- gOutlineLevel prop + -> do + lists <- closeLists 0 + pure $ lists <> + B.header (lvl + 1) + (B.trimInlines . mconcat $ map addFormatting + $ removeCommonFormatting + annotatedToks) + | all ((== justCode) . fst) annotatedToks + -> do + lists <- closeLists 0 + pure $ lists <> + B.codeBlock (mconcat $ map snd annotatedToks) + | all (T.all isSpace . snd) annotatedToks + -> closeLists 0 + | otherwise -> do + lists <- closeLists 0 + pure $ lists <> + B.para (B.trimInlines . trimFinalLineBreak . mconcat + $ map addFormatting annotatedToks) + if gInTable prop + then do + updateState $ \s -> s{ sCurrentCell = sCurrentCell s <> new } + pure bs + else do + pure $ bs <> tbl <> new + +-- Headers often have a style applied. We usually want to remove +-- this, because headers will have their own styling in the target +-- format. +removeCommonFormatting :: [(Properties, Text)] -> [(Properties, Text)] +removeCommonFormatting = + (\ts -> + if all (gBold . fst) ts + then map (\(p,t) -> (p{ gBold = False }, t)) ts + else ts) . + (\ts -> + if all (gItalic . fst) ts + then map (\(p,t) -> (p{ gItalic = False }, t)) ts + else ts) + + +-- {\field{\*\fldinst{HYPERLINK "http://pandoc.org"}}{\fldrslt foo}} +handleField :: PandocMonad m => Blocks -> [Tok] -> RTFParser m Blocks +handleField bs + (Tok _ + (Grouped + (Tok _ (ControlSymbol '*') + :Tok _ (ControlWord "fldinst" Nothing) + :Tok _ (Grouped (Tok _ (UnformattedText insttext):rest)) + :_)) + :linktoks) + | Just linkdest <- getHyperlink insttext + = do let linkdest' = case rest of + (Tok _ (ControlSymbol '\\') + : Tok _ (UnformattedText t) + : _) | Just bkmrk <- T.stripPrefix "l" t + -> "#" <> unquote bkmrk + _ -> linkdest + modifyGroup $ \g -> g{ gHyperlink = Just linkdest' } + result <- foldM processTok bs linktoks + modifyGroup $ \g -> g{ gHyperlink = Nothing } + return result +handleField bs _ = pure bs + +unquote :: Text -> Text +unquote = T.dropWhile (=='"') . T.dropWhileEnd (=='"') . T.strip + +handleListTable :: PandocMonad m => [Tok] -> RTFParser m () +handleListTable toks = do + mapM_ handleList toks + +handleList :: PandocMonad m => Tok -> RTFParser m () +handleList (Tok _ (Grouped (Tok _ (ControlWord "list" _) : toks))) = do + let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks] + let levels = [ts | Tok _ (Grouped (Tok _ (ControlWord "listlevel" _) : ts)) + <- toks] + tbl <- foldM handleListLevel mempty (zip [0..] levels) + updateState $ \s -> s{ sListTable = IntMap.insert listid tbl $ sListTable s } +handleList _ = return () + +handleListLevel :: PandocMonad m + => ListLevelTable + -> (Int, [Tok]) + -> RTFParser m ListLevelTable +handleListLevel levelTable (lvl, toks) = do + let start = headDef 1 + [n | Tok _ (ControlWord "levelstartat" (Just n)) <- toks] + let mbNumberStyle = + case [n | Tok _ (ControlWord "levelnfc" (Just n)) <- toks] of + [] -> Nothing + (0:_) -> Just Decimal + (1:_) -> Just UpperRoman + (2:_) -> Just LowerRoman + (3:_) -> Just UpperAlpha + (4:_) -> Just LowerAlpha + (23:_) -> Nothing + (255:_) -> Nothing + _ -> Just DefaultStyle + let listType = case mbNumberStyle of + Nothing -> Bullet + Just numStyle -> Ordered (start,numStyle,Period) + return $ IntMap.insert lvl listType levelTable + +handleListOverrideTable :: PandocMonad m => [Tok] -> RTFParser m () +handleListOverrideTable toks = mapM_ handleListOverride toks + +handleListOverride :: PandocMonad m => Tok -> RTFParser m () +handleListOverride + (Tok _ (Grouped (Tok _ (ControlWord "listoverride" _) : toks))) = do + let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks] + let lsn = headDef 0 [n | Tok _ (ControlWord "ls" (Just n)) <- toks] + -- TODO override stuff, esp. start num -- for now we just handle indirection + listTable <- sListTable <$> getState + case IntMap.lookup listid listTable of + Nothing -> return () + Just tbl -> updateState $ \s -> + s{ sListOverrideTable = IntMap.insert lsn tbl $ + sListOverrideTable s } +handleListOverride _ = return () + +handleStylesheet :: PandocMonad m => [Tok] -> RTFParser m () +handleStylesheet toks = do + let styles = mapMaybe parseStyle toks + updateState $ \s -> s{ sStylesheet = IntMap.fromList + $ zip (map styleNum styles) styles } + +parseStyle :: Tok -> Maybe Style +parseStyle (Tok _ (Grouped toks)) = do + let (styType, styNum, rest) = + case toks of + Tok _ (ControlWord "s" (Just n)) : ts -> (ParagraphStyle, n, ts) + Tok _ (ControlWord "ds" (Just n)) : ts -> (SectionStyle, n, ts) + Tok _ (ControlWord "cs" (Just n)) : ts -> (CharStyle, n, ts) + Tok _ (ControlWord "ts" (Just n)) : ts -> (TableStyle, n, ts) + _ -> (ParagraphStyle, 0, toks) + let styName = case lastMay rest of + Just (Tok _ (UnformattedText t)) -> T.dropWhileEnd (==';') t + _ -> mempty + let isBasedOn (Tok _ (ControlWord "sbasedon" (Just _))) = True + isBasedOn _ = False + let styBasedOn = case find isBasedOn toks of + Just (Tok _ (ControlWord "sbasedon" (Just i))) -> Just i + _ -> Nothing + let isStyleControl (Tok _ (ControlWord x _)) = + x `elem` ["cs", "s", "ds", "additive", "sbasedon", "snext", + "sautoupd", "shidden", "keycode", "alt", "shift", + "ctrl", "fn"] + isStyleControl _ = False + let styFormatting = filter (not . isStyleControl) (initSafe rest) + return $ Style{ styleNum = styNum + , styleType = styType + , styleBasedOn = styBasedOn + , styleName = styName + , styleFormatting = styFormatting + } +parseStyle _ = Nothing + +hexToWord :: Text -> Word8 +hexToWord t = case TR.hexadecimal t of + Left _ -> 0 + Right (x,_) -> x + + +handlePict :: PandocMonad m => [Tok] -> RTFParser m () +handlePict toks = do + let pict = foldl' getPictData def toks + let altText = "image" + let bytes = + if picBinary pict + then picBytes pict + else BL.pack $ map hexToWord $ T.chunksOf 2 $ picData pict + let (mimetype, ext) = + case picType pict of + Just Emfblip -> (Just "image/x-emf", ".emf") + Just Pngblip -> (Just "image/png", ".png") + Just Jpegblip -> (Just "image/jpeg", ".jpg") + Nothing -> (Nothing, "") + case mimetype of + Just mt -> do + let pictname = showDigest (sha1 bytes) <> ext + insertMedia pictname (Just mt) bytes + modifyGroup $ \g -> g{ gImage = Just pict{ picName = T.pack pictname, + picBytes = bytes } } + addText altText + modifyGroup $ \g -> g{ gImage = Nothing } + _ -> return () + where + getPictData :: Pict -> Tok -> Pict + getPictData pict (Tok _ tok') = + case tok' of + ControlWord "emfblip" _-> pict{ picType = Just Emfblip } + ControlWord "pngblip" _-> pict{ picType = Just Pngblip } + ControlWord "jpegblip" _-> pict{ picType = Just Jpegblip } + ControlWord "picw" (Just w) -> pict{ picWidth = Just w } + ControlWord "pich" (Just h) -> pict{ picHeight = Just h } + ControlWord "picwgoal" (Just w) -> pict{ picWidthGoal = Just w } + ControlWord "pichgoal" (Just h) -> pict{ picHeightGoal = Just h } + BinData d | not (BL.null d) + -> pict{ picBinary = True, picBytes = picBytes pict <> d } + UnformattedText t -> pict{ picData = t } + _ -> pict + + +getHyperlink :: Text -> Maybe Text +getHyperlink t = + case T.stripPrefix "HYPERLINK" (T.strip t) of + Nothing -> Nothing + Just rest -> Just $ unquote rest + +processFontTable :: [Tok] -> FontTable +processFontTable = snd . foldl' go (0, mempty) + where + go (fontnum, tbl) (Tok _ tok') = + case tok' of + (ControlWord "f" (Just i)) -> (i, tbl) + (ControlWord "fnil" _) -> (fontnum, tbl) + (ControlWord "froman" _) -> (fontnum, IntMap.insert fontnum Roman tbl) + (ControlWord "fswiss" _) -> (fontnum, IntMap.insert fontnum Swiss tbl) + (ControlWord "fmodern" _) -> (fontnum, IntMap.insert fontnum Modern tbl) + (ControlWord "fscript" _) -> (fontnum, IntMap.insert fontnum Script tbl) + (ControlWord "fdecor" _) -> (fontnum, IntMap.insert fontnum Decor tbl) + (ControlWord "ftech" _) -> (fontnum, IntMap.insert fontnum Tech tbl) + (ControlWord "fbidi" _) -> (fontnum, IntMap.insert fontnum Bidi tbl) + (Grouped ts) -> foldl' go (fontnum, tbl) ts + _ -> (fontnum, tbl) + + +ansiToChar :: Word8 -> Char +ansiToChar i = chr $ + case i of + 128 -> 8364 + 130 -> 8218 + 131 -> 402 + 132 -> 8222 + 133 -> 8230 + 134 -> 8224 + 135 -> 8225 + 136 -> 710 + 137 -> 8240 + 138 -> 352 + 139 -> 8249 + 140 -> 338 + 142 -> 381 + 145 -> 8216 + 146 -> 8217 + 147 -> 8220 + 148 -> 8221 + 149 -> 8226 + 150 -> 8211 + 151 -> 8212 + 152 -> 732 + 153 -> 8482 + 154 -> 353 + 155 -> 8250 + 156 -> 339 + 158 -> 382 + 159 -> 376 + 173 -> 0xAD + _ -> fromIntegral i + +macToChar :: Word8 -> Char +macToChar i = chr $ + case i of + 0x80 -> 0xC4 + 0x81 -> 0xC5 + 0x82 -> 0xC7 + 0x83 -> 0xC9 + 0x84 -> 0xD1 + 0x85 -> 0xD6 + 0x86 -> 0xDC + 0x87 -> 0xE1 + 0x88 -> 0xE0 + 0x89 -> 0xE2 + 0x8A -> 0xE4 + 0x8B -> 0xE3 + 0x8C -> 0xE5 + 0x8D -> 0xE7 + 0x8E -> 0xE9 + 0x8F -> 0xE8 + 0x90 -> 0xEA + 0x91 -> 0xEB + 0x92 -> 0xED + 0x93 -> 0xEC + 0x94 -> 0xEE + 0x95 -> 0xEF + 0x96 -> 0xF1 + 0x97 -> 0xF3 + 0x98 -> 0xF2 + 0x99 -> 0xF4 + 0x9A -> 0xF6 + 0x9B -> 0xF5 + 0x9C -> 0xFA + 0x9D -> 0xF9 + 0x9E -> 0xFB + 0x9F -> 0xFC + 0xA0 -> 0xDD + 0xA1 -> 0xB0 + 0xA2 -> 0xA2 + 0xA3 -> 0xA3 + 0xA4 -> 0xA7 + 0xA5 -> 0xD7 + 0xA6 -> 0xB6 + 0xA7 -> 0xDF + 0xA8 -> 0xAE + 0xA9 -> 0xA9 + 0xAA -> 0xB2 + 0xAB -> 0xB4 + 0xAC -> 0xA8 + 0xAD -> 0xB3 + 0xAE -> 0xC6 + 0xAF -> 0xD8 + 0xB0 -> 0xB9 + 0xB1 -> 0xB1 + 0xB2 -> 0xBC + 0xB3 -> 0xBD + 0xB4 -> 0xA5 + 0xB5 -> 0xB5 + 0xBA -> 0xBE + 0xBB -> 0xAA + 0xBC -> 0xBA + 0xBE -> 0xE6 + 0xBF -> 0xF8 + 0xC0 -> 0xBF + 0xC1 -> 0xA1 + 0xC2 -> 0xAC + 0xC3 -> 0x0141 + 0xC4 -> 0x0192 + 0xC5 -> 0x02CB + 0xC7 -> 0xAB + 0xC8 -> 0xBB + 0xC9 -> 0xA6 + 0xCA -> 0xA0 + 0xCB -> 0xC0 + 0xCC -> 0xC3 + 0xCD -> 0xD5 + 0xCE -> 0x0152 + 0xCF -> 0x0153 + 0xD0 -> 0xAD + 0xD4 -> 0x0142 + 0xD6 -> 0xF7 + 0xD8 -> 0xFF + 0xD9 -> 0x0178 + 0xDB -> 0xA4 + 0xDC -> 0xD0 + 0xDD -> 0xF0 + 0xDE -> 0xDE + 0xDF -> 0xFE + 0xE0 -> 0xFD + 0xE1 -> 0xB7 + 0xE5 -> 0xC2 + 0xE6 -> 0xCA + 0xE7 -> 0xC1 + 0xE8 -> 0xCB + 0xE9 -> 0xC8 + 0xEA -> 0xCD + 0xEB -> 0xCE + 0xEC -> 0xCF + 0xED -> 0xCC + 0xEE -> 0xD3 + 0xEF -> 0xD4 + 0xF1 -> 0xD2 + 0xF2 -> 0xDA + 0xF3 -> 0xDB + 0xF4 -> 0xD9 + 0xF5 -> 0x0131 + 0xF6 -> 0x02C6 + 0xF7 -> 0x02DC + 0xF8 -> 0xAF + 0xF9 -> 0x02D8 + 0xFA -> 0x02D9 + 0xFB -> 0x02DA + 0xFC -> 0xB8 + 0xFD -> 0x02DD + 0xFE -> 0x02DB + 0xFF -> 0x02C7 + _ -> fromIntegral i + +pcToChar :: Word8 -> Char +pcToChar i = chr $ + case i of + 0x80 -> 0xc7 + 0x81 -> 0xfc + 0x82 -> 0xe9 + 0x83 -> 0xe2 + 0x84 -> 0xe4 + 0x85 -> 0xe0 + 0x86 -> 0xe5 + 0x87 -> 0xe7 + 0x88 -> 0xea + 0x89 -> 0xeb + 0x8a -> 0xe8 + 0x8b -> 0xef + 0x8c -> 0xee + 0x8d -> 0xec + 0x8e -> 0xc4 + 0x8f -> 0xc5 + 0x90 -> 0xc9 + 0x91 -> 0xe6 + 0x92 -> 0xc6 + 0x93 -> 0xf4 + 0x94 -> 0xf6 + 0x95 -> 0xf2 + 0x96 -> 0xfb + 0x97 -> 0xf9 + 0x98 -> 0xff + 0x99 -> 0xd6 + 0x9a -> 0xdc + 0x9b -> 0xa2 + 0x9c -> 0xa3 + 0x9d -> 0xa5 + 0x9e -> 0x20a7 + 0x9f -> 0x0192 + 0xa0 -> 0xe1 + 0xa1 -> 0xed + 0xa2 -> 0xf3 + 0xa3 -> 0xfa + 0xa4 -> 0xf1 + 0xa5 -> 0xd1 + 0xa6 -> 0xaa + 0xa7 -> 0xba + 0xa8 -> 0xbf + 0xa9 -> 0x2310 + 0xaa -> 0xac + 0xab -> 0xbd + 0xac -> 0xbc + 0xad -> 0xa1 + 0xae -> 0xab + 0xaf -> 0xbb + 0xb0 -> 0x2591 + 0xb1 -> 0x2592 + 0xb2 -> 0x2593 + 0xb3 -> 0x2502 + 0xb4 -> 0x2524 + 0xb5 -> 0x2561 + 0xb6 -> 0x2562 + 0xb7 -> 0x2556 + 0xb8 -> 0x2555 + 0xb9 -> 0x2563 + 0xba -> 0x2551 + 0xbb -> 0x2557 + 0xbc -> 0x255d + 0xbd -> 0x255c + 0xbe -> 0x255b + 0xbf -> 0x2510 + 0xc0 -> 0x2514 + 0xc1 -> 0x2534 + 0xc2 -> 0x252c + 0xc3 -> 0x251c + 0xc4 -> 0x2500 + 0xc5 -> 0x253c + 0xc6 -> 0x255e + 0xc7 -> 0x255f + 0xc8 -> 0x255a + 0xc9 -> 0x2554 + 0xca -> 0x2569 + 0xcb -> 0x2566 + 0xcc -> 0x2560 + 0xcd -> 0x2550 + 0xce -> 0x256c + 0xcf -> 0x2567 + 0xd0 -> 0x2568 + 0xd1 -> 0x2564 + 0xd2 -> 0x2565 + 0xd3 -> 0x2559 + 0xd4 -> 0x2558 + 0xd5 -> 0x2552 + 0xd6 -> 0x2553 + 0xd7 -> 0x256b + 0xd8 -> 0x256a + 0xd9 -> 0x2518 + 0xda -> 0x250c + 0xdb -> 0x2588 + 0xdc -> 0x2584 + 0xdd -> 0x258c + 0xde -> 0x2590 + 0xdf -> 0x2580 + 0xe0 -> 0x03b1 + 0xe1 -> 0xdf + 0xe2 -> 0x0393 + 0xe3 -> 0x03c0 + 0xe4 -> 0x03a3 + 0xe5 -> 0x03c3 + 0xe6 -> 0xb5 + 0xe7 -> 0x03c4 + 0xe8 -> 0x03a6 + 0xe9 -> 0x0398 + 0xea -> 0x03a9 + 0xeb -> 0x03b4 + 0xec -> 0x221e + 0xed -> 0x03c6 + 0xee -> 0x03b5 + 0xef -> 0x2229 + 0xf0 -> 0x2261 + 0xf1 -> 0xb1 + 0xf2 -> 0x2265 + 0xf3 -> 0x2264 + 0xf4 -> 0x2320 + 0xf5 -> 0x2321 + 0xf6 -> 0xf7 + 0xf7 -> 0x2248 + 0xf8 -> 0xb0 + 0xf9 -> 0x2219 + 0xfa -> 0xb7 + 0xfb -> 0x221a + 0xfc -> 0x207f + 0xfd -> 0xb2 + 0xfe -> 0x25a0 + 0xff -> 0xa0 + _ -> fromIntegral i + +pcaToChar :: Word8 -> Char +pcaToChar i = chr $ + case i of + 0x80 -> 0x00c7 + 0x81 -> 0x00fc + 0x82 -> 0x00e9 + 0x83 -> 0x00e2 + 0x84 -> 0x00e4 + 0x85 -> 0x00e0 + 0x86 -> 0x00e5 + 0x87 -> 0x00e7 + 0x88 -> 0x00ea + 0x89 -> 0x00eb + 0x8a -> 0x00e8 + 0x8b -> 0x00ef + 0x8c -> 0x00ee + 0x8d -> 0x00ec + 0x8e -> 0x00c4 + 0x8f -> 0x00c5 + 0x90 -> 0x00c9 + 0x91 -> 0x00e6 + 0x92 -> 0x00c6 + 0x93 -> 0x00f4 + 0x94 -> 0x00f6 + 0x95 -> 0x00f2 + 0x96 -> 0x00fb + 0x97 -> 0x00f9 + 0x98 -> 0x00ff + 0x99 -> 0x00d6 + 0x9a -> 0x00dc + 0x9b -> 0x00f8 + 0x9c -> 0x00a3 + 0x9d -> 0x00d8 + 0x9e -> 0x00d7 + 0x9f -> 0x0192 + 0xa0 -> 0x00e1 + 0xa1 -> 0x00ed + 0xa2 -> 0x00f3 + 0xa3 -> 0x00fa + 0xa4 -> 0x00f1 + 0xa5 -> 0x00d1 + 0xa6 -> 0x00aa + 0xa7 -> 0x00ba + 0xa8 -> 0x00bf + 0xa9 -> 0x00ae + 0xaa -> 0x00ac + 0xab -> 0x00bd + 0xac -> 0x00bc + 0xad -> 0x00a1 + 0xae -> 0x00ab + 0xaf -> 0x00bb + 0xb0 -> 0x2591 + 0xb1 -> 0x2592 + 0xb2 -> 0x2593 + 0xb3 -> 0x2502 + 0xb4 -> 0x2524 + 0xb5 -> 0x00c1 + 0xb6 -> 0x00c2 + 0xb7 -> 0x00c0 + 0xb8 -> 0x00a9 + 0xb9 -> 0x2563 + 0xba -> 0x2551 + 0xbb -> 0x2557 + 0xbc -> 0x255d + 0xbd -> 0x00a2 + 0xbe -> 0x00a5 + 0xbf -> 0x2510 + 0xc0 -> 0x2514 + 0xc1 -> 0x2534 + 0xc2 -> 0x252c + 0xc3 -> 0x251c + 0xc4 -> 0x2500 + 0xc5 -> 0x253c + 0xc6 -> 0x00e3 + 0xc7 -> 0x00c3 + 0xc8 -> 0x255a + 0xc9 -> 0x2554 + 0xca -> 0x2569 + 0xcb -> 0x2566 + 0xcc -> 0x2560 + 0xcd -> 0x2550 + 0xce -> 0x256c + 0xcf -> 0x00a4 + 0xd0 -> 0x00f0 + 0xd1 -> 0x00d0 + 0xd2 -> 0x00ca + 0xd3 -> 0x00cb + 0xd4 -> 0x00c8 + 0xd5 -> 0x0131 + 0xd6 -> 0x00cd + 0xd7 -> 0x00ce + 0xd8 -> 0x00cf + 0xd9 -> 0x2518 + 0xda -> 0x250c + 0xdb -> 0x2588 + 0xdc -> 0x2584 + 0xdd -> 0x00a6 + 0xde -> 0x00cc + 0xdf -> 0x2580 + 0xe0 -> 0x00d3 + 0xe1 -> 0x00df + 0xe2 -> 0x00d4 + 0xe3 -> 0x00d2 + 0xe4 -> 0x00f5 + 0xe5 -> 0x00d5 + 0xe6 -> 0x00b5 + 0xe7 -> 0x00fe + 0xe8 -> 0x00de + 0xe9 -> 0x00da + 0xea -> 0x00db + 0xeb -> 0x00d9 + 0xec -> 0x00fd + 0xed -> 0x00dd + 0xee -> 0x00af + 0xef -> 0x00b4 + 0xf0 -> 0x00ad + 0xf1 -> 0x00b1 + 0xf2 -> 0x2017 + 0xf3 -> 0x00be + 0xf4 -> 0x00b6 + 0xf5 -> 0x00a7 + 0xf6 -> 0x00f7 + 0xf7 -> 0x00b8 + 0xf8 -> 0x00b0 + 0xf9 -> 0x00a8 + 0xfa -> 0x00b7 + 0xfb -> 0x00b9 + 0xfc -> 0x00b3 + 0xfd -> 0x00b2 + 0xfe -> 0x25a0 + 0xff -> 0x00a0 + _ -> fromIntegral i diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 3bbab4bbe..bd73c37dc 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -60,18 +60,6 @@ convertTags :: PandocMonad m => [Tag T.Text] -> m [Tag T.Text] convertTags [] = return [] convertTags (t@TagOpen{}:ts) | fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts -convertTags (t@(TagOpen tagname as):ts) - | any (isSourceAttribute tagname) as - = do - as' <- mapM processAttribute as - rest <- convertTags ts - return $ TagOpen tagname as' : rest - where processAttribute (x,y) = - if isSourceAttribute tagname (x,y) - then do - enc <- getDataURI (fromAttrib "type" t) y - return (x, enc) - else return (x,y) convertTags (t@(TagOpen "script" as):TagClose "script":ts) = case fromAttrib "src" t of "" -> (t:) <$> convertTags ts @@ -125,6 +113,18 @@ convertTags (t@(TagOpen "link" as):ts) = return $ TagOpen "link" (("href",makeDataURI (mime, bs)) : [(x,y) | (x,y) <- as, x /= "href"]) : rest +convertTags (t@(TagOpen tagname as):ts) + | any (isSourceAttribute tagname) as + = do + as' <- mapM processAttribute as + rest <- convertTags ts + return $ TagOpen tagname as' : rest + where processAttribute (x,y) = + if isSourceAttribute tagname (x,y) + then do + enc <- getDataURI (fromAttrib "type" t) y + return (x, enc) + else return (x,y) convertTags (t:ts) = (t:) <$> convertTags ts cssURLs :: PandocMonad m diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 920edca7b..50abe6937 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -25,6 +25,7 @@ module Text.Pandoc.Shared ( ordNub, findM, -- * Text processing + inquotes, tshow, elemText, notElemText, @@ -68,7 +69,6 @@ module Text.Pandoc.Shared ( makeMeta, eastAsianLineBreakFilter, htmlSpanLikeElements, - splitSentences, filterIpynbOutput, -- * TagSoup HTML handling renderTags', @@ -187,6 +187,10 @@ findM p = foldr go (pure Nothing) -- Text processing -- +-- | Wrap double quotes around a Text +inquotes :: T.Text -> T.Text +inquotes txt = T.cons '\"' (T.snoc txt '\"') + tshow :: Show a => a -> T.Text tshow = T.pack . show @@ -709,33 +713,6 @@ eastAsianLineBreakFilter = bottomUp go htmlSpanLikeElements :: Set.Set T.Text htmlSpanLikeElements = Set.fromList ["kbd", "mark", "dfn"] --- | Returns the first sentence in a list of inlines, and the rest. -breakSentence :: [Inline] -> ([Inline], [Inline]) -breakSentence [] = ([],[]) -breakSentence xs = - let isSentenceEndInline (Str ys) - | Just (_, c) <- T.unsnoc ys = c == '.' || c == '?' - isSentenceEndInline LineBreak = True - isSentenceEndInline _ = False - (as, bs) = break isSentenceEndInline xs - in case bs of - [] -> (as, []) - [c] -> (as ++ [c], []) - (c:Space:cs) -> (as ++ [c], cs) - (c:SoftBreak:cs) -> (as ++ [c], cs) - (Str ".":Str s@(T.uncons -> Just (')',_)):cs) - -> (as ++ [Str ".", Str s], cs) - (x@(Str (T.stripPrefix ".)" -> Just _)):cs) -> (as ++ [x], cs) - (LineBreak:x@(Str (T.uncons -> Just ('.',_))):cs) -> (as ++[LineBreak], x:cs) - (c:cs) -> (as ++ [c] ++ ds, es) - where (ds, es) = breakSentence cs - --- | Split a list of inlines into sentences. -splitSentences :: [Inline] -> [[Inline]] -splitSentences xs = - let (sent, rest) = breakSentence xs - in if null rest then [sent] else sent : splitSentences rest - -- | Process ipynb output cells. If mode is Nothing, -- remove all output. If mode is Just format, select -- best output for the format. If format is not ipynb, @@ -755,17 +732,17 @@ filterIpynbOutput mode = walk go where rank (RawBlock (Format "html") _) | fmt == Format "html" = 1 :: Int - | fmt == Format "markdown" = 2 - | otherwise = 3 + | fmt == Format "markdown" = 3 + | otherwise = 4 rank (RawBlock (Format "latex") _) | fmt == Format "latex" = 1 - | fmt == Format "markdown" = 2 - | otherwise = 3 + | fmt == Format "markdown" = 3 + | otherwise = 4 rank (RawBlock f _) | fmt == f = 1 - | otherwise = 3 - rank (Para [Image{}]) = 1 - rank _ = 2 + | otherwise = 4 + rank (Para [Image{}]) = 2 + rank _ = 3 removeANSI (CodeBlock attr code) = CodeBlock attr (removeANSIEscapes code) removeANSI x = x diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs index 0c7d7ab23..b0476a0ab 100644 --- a/src/Text/Pandoc/Translations.hs +++ b/src/Text/Pandoc/Translations.hs @@ -31,13 +31,13 @@ module Text.Pandoc.Translations ( where import Data.Aeson.Types (Value(..), FromJSON(..)) import qualified Data.Aeson.Types as Aeson -import qualified Data.HashMap.Strict as HM import qualified Data.Map as M import qualified Data.Text as T -import qualified Data.YAML as YAML +import qualified Data.Yaml as Yaml import GHC.Generics (Generic) import Text.Pandoc.Shared (safeRead) import qualified Text.Pandoc.UTF8 as UTF8 +import Data.Yaml (prettyPrintParseException) data Term = Abstract @@ -74,17 +74,9 @@ instance FromJSON Term where show t parseJSON invalid = Aeson.typeMismatch "Term" invalid -instance YAML.FromYAML Term where - parseYAML (YAML.Scalar _ (YAML.SStr t)) = - case safeRead t of - Just t' -> pure t' - Nothing -> Prelude.fail $ "Invalid Term name " ++ - show t - parseYAML invalid = YAML.typeMismatch "Term" invalid - instance FromJSON Translations where - parseJSON (Object hm) = do - xs <- mapM addItem (HM.toList hm) + parseJSON o@(Object{}) = do + xs <- parseJSON o >>= mapM addItem . M.toList return $ Translations (M.fromList xs) where addItem (k,v) = case safeRead k of @@ -95,27 +87,12 @@ instance FromJSON Translations where inv -> Aeson.typeMismatch "String" inv parseJSON invalid = Aeson.typeMismatch "Translations" invalid -instance YAML.FromYAML Translations where - parseYAML = YAML.withMap "Translations" $ - \tr -> Translations .M.fromList <$> mapM addItem (M.toList tr) - where addItem (n@(YAML.Scalar _ (YAML.SStr k)), v) = - case safeRead k of - Nothing -> YAML.typeMismatch "Term" n - Just t -> - case v of - (YAML.Scalar _ (YAML.SStr s)) -> - return (t, T.strip s) - n' -> YAML.typeMismatch "String" n' - addItem (n, _) = YAML.typeMismatch "String" n - lookupTerm :: Term -> Translations -> Maybe T.Text lookupTerm t (Translations tm) = M.lookup t tm readTranslations :: T.Text -> Either T.Text Translations readTranslations s = - case YAML.decodeStrict $ UTF8.fromText s of - Left (pos,err') -> Left $ T.pack $ err' ++ - " (line " ++ show (YAML.posLine pos) ++ " column " ++ - show (YAML.posColumn pos) ++ ")" + case Yaml.decodeAllEither' $ UTF8.fromText s of + Left err' -> Left $ T.pack $ prettyPrintParseException err' Right (t:_) -> Right t Right [] -> Left "empty YAML document" diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 4d5921faf..e154f0535 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -96,7 +96,7 @@ hGetContents :: Handle -> IO Text hGetContents = fmap toText . B.hGetContents -- | Convert UTF8-encoded ByteString to Text, also --- removing '\r' characters. +-- removing '\\r' characters. toText :: B.ByteString -> Text toText = T.decodeUtf8 . filterCRs . dropBOM where dropBOM bs = @@ -106,12 +106,12 @@ toText = T.decodeUtf8 . filterCRs . dropBOM filterCRs = B.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also --- removing '\r' characters. +-- removing '\\r' characters. toString :: B.ByteString -> String toString = T.unpack . toText -- | Convert UTF8-encoded ByteString to Text, also --- removing '\r' characters. +-- removing '\\r' characters. toTextLazy :: BL.ByteString -> TL.Text toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM where dropBOM bs = @@ -121,7 +121,7 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM filterCRs = BL.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also --- removing '\r' characters. +-- removing '\\r' characters. toStringLazy :: BL.ByteString -> String toStringLazy = TL.unpack . toTextLazy diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index c348477c2..960b9074c 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -51,6 +51,7 @@ module Text.Pandoc.Writers , writeLaTeX , writeMan , writeMarkdown + , writeMarkua , writeMediaWiki , writeMs , writeMuse @@ -190,6 +191,7 @@ writers = [ ,("csljson" , TextWriter writeCslJson) ,("bibtex" , TextWriter writeBibTeX) ,("biblatex" , TextWriter writeBibLaTeX) + ,("markua" , TextWriter writeMarkua) ] -- | Retrieve writer, extensions based on formatSpec (format+extensions). diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index ab7e5f1a9..24438370a 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -21,7 +21,7 @@ AsciiDoc: <http://www.methods.co.nz/asciidoc/> module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc, writeAsciiDoctor) where import Control.Monad.State.Strict import Data.Char (isPunctuation, isSpace) -import Data.List (intercalate, intersperse) +import Data.List (delete, intercalate, intersperse) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (fromMaybe, isJust) import qualified Data.Set as Set @@ -149,9 +149,8 @@ blockToAsciiDoc opts (Div (id',"section":_,_) blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines return $ contents <> blankline -blockToAsciiDoc opts (Para [Image attr alternate (src,tgt)]) +blockToAsciiDoc opts (SimpleFigure attr alternate (src, tit)) -- image::images/logo.png[Company logo, title="blah"] - | Just tit <- T.stripPrefix "fig:" tgt = (\args -> "image::" <> args <> blankline) <$> imageArguments opts attr alternate src tit blockToAsciiDoc opts (Para inlines) = do @@ -193,7 +192,10 @@ blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ flush ( then "...." $$ literal str $$ "...." else attrs $$ "----" $$ literal str $$ "----") <> blankline - where attrs = "[" <> literal (T.intercalate "," ("source" : classes)) <> "]" + where attrs = "[" <> literal (T.intercalate "," classes') <> "]" + classes' = if "numberLines" `elem` classes + then "source%linesnum" : delete "numberLines" classes + else "source" : classes blockToAsciiDoc opts (BlockQuote blocks) = do contents <- blockListToAsciiDoc opts blocks let isBlock (BlockQuote _) = True @@ -546,6 +548,7 @@ inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do -- or my@email.com[email john] linktext <- inlineListToAsciiDoc opts txt let isRelative = T.all (/= ':') src + let needsPassthrough = "--" `T.isInfixOf` src let prefix = if isRelative then text "link:" else empty @@ -553,9 +556,16 @@ inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do let useAuto = case txt of [Str s] | escapeURI s == srcSuffix -> True _ -> False - return $ if useAuto - then literal srcSuffix - else prefix <> literal src <> "[" <> linktext <> "]" + return $ + if needsPassthrough + then + if useAuto + then "link:++" <> literal srcSuffix <> "++[]" + else "link:++" <> literal src <> "++[" <> linktext <> "]" + else + if useAuto + then literal srcSuffix + else prefix <> literal src <> "[" <> linktext <> "]" inlineToAsciiDoc opts (Image attr alternate (src, tit)) = ("image:" <>) <$> imageArguments opts attr alternate src tit inlineToAsciiDoc opts (Note [Para inlines]) = diff --git a/src/Text/Pandoc/Writers/Blaze.hs b/src/Text/Pandoc/Writers/Blaze.hs new file mode 100644 index 000000000..0e3bd0f98 --- /dev/null +++ b/src/Text/Pandoc/Writers/Blaze.hs @@ -0,0 +1,139 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.Shared + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Render blaze-html Html to DocLayout document (so it can be wrapped). +-} +module Text.Pandoc.Writers.Blaze ( layoutMarkup ) +where +import Text.Blaze +import qualified Data.ByteString as S +import Data.List (isInfixOf) +import Data.Text.Encoding (decodeUtf8) +import qualified Data.Text as T +import Data.Text (Text) +import Text.DocLayout hiding (Text, Empty) +import Text.Blaze.Internal (ChoiceString(..), getText, MarkupM(..)) + +layoutMarkup :: Markup -> Doc T.Text +layoutMarkup = go True mempty + where + go :: Bool -> Doc T.Text -> MarkupM b -> Doc T.Text + go wrap attrs (Parent _ open close content) = + let open' = getText open + in literal open' + <> attrs + <> char '>' + <> (if allowsWrap open' + then go wrap mempty content + else flush $ go False mempty content) + <> literal (getText close) + go wrap attrs (CustomParent tag content) = + char '<' + <> fromChoiceString wrap tag + <> attrs + <> char '>' + <> go wrap mempty content + <> literal "</" + <> fromChoiceString wrap tag + <> char '>' + go _wrap attrs (Leaf _ begin end _) = + literal (getText begin) + <> attrs + <> literal (getText end) + go wrap attrs (CustomLeaf tag close _) = + char '<' + <> fromChoiceString wrap tag + <> attrs + <> (if close then literal " />" else char '>') + go wrap attrs (AddAttribute rawkey _ value h) = + go wrap + (space' wrap + <> literal (getText rawkey) + <> char '=' + <> doubleQuotes (fromChoiceString wrap value) + <> attrs) h + go wrap attrs (AddCustomAttribute key value h) = + go wrap + (space' wrap + <> fromChoiceString wrap key + <> char '=' + <> doubleQuotes (fromChoiceString wrap value) + <> attrs) h + go wrap _ (Content content _) = fromChoiceString wrap content + go wrap _ (Comment comment _) = + literal "<!--" + <> space' wrap + <> fromChoiceString wrap comment + <> space' wrap + <> "-->" + go wrap attrs (Append h1 h2) = go wrap attrs h1 <> go wrap attrs h2 + go _ _ (Empty _) = mempty + space' wrap = if wrap then space else char ' ' + +allowsWrap :: T.Text -> Bool +allowsWrap t = + not (t == "<pre" || t == "<style" || t == "<script" || t == "<textarea") + +fromChoiceString :: Bool -- ^ Allow wrapping + -> ChoiceString -- ^ String to render + -> Doc Text -- ^ Resulting builder +fromChoiceString wrap (Static s) = withWrap wrap $ getText s +fromChoiceString wrap (String s) = withWrap wrap $ + escapeMarkupEntities $ T.pack s +fromChoiceString wrap (Text s) = withWrap wrap $ escapeMarkupEntities s +fromChoiceString wrap (ByteString s) = withWrap wrap $ decodeUtf8 s +fromChoiceString _wrap (PreEscaped x) = -- don't wrap! + case x of + String s -> literal $ T.pack s + Text s -> literal s + s -> fromChoiceString False s +fromChoiceString wrap (External x) = case x of + -- Check that the sequence "</" is *not* in the external data. + String s -> if "</" `isInfixOf` s then mempty else withWrap wrap (T.pack s) + Text s -> if "</" `T.isInfixOf` s then mempty else withWrap wrap s + ByteString s -> if "</" `S.isInfixOf` s then mempty else withWrap wrap (decodeUtf8 s) + s -> fromChoiceString wrap s +fromChoiceString wrap (AppendChoiceString x y) = + fromChoiceString wrap x <> fromChoiceString wrap y +fromChoiceString _ EmptyChoiceString = mempty + +withWrap :: Bool -> Text -> Doc Text +withWrap wrap + | wrap = mconcat . toChunks + | otherwise = literal + +toChunks :: Text -> [Doc Text] +toChunks = map toDoc . T.groupBy sameStatus + where + toDoc t = + if T.any (== ' ') t + then space + else if T.any (== '\n') t + then cr + else literal t + sameStatus c d = + (c == ' ' && d == ' ') || + (c == '\n' && d == '\n') || + (c /= ' ' && d /= ' ' && c /= '\n' && d /= '\n') + + +-- | Escape predefined XML entities in a text value +-- +escapeMarkupEntities :: Text -- ^ Text to escape + -> Text -- ^ Resulting Doc +escapeMarkupEntities = T.concatMap escape + where + escape :: Char -> Text + escape '<' = "<" + escape '>' = ">" + escape '&' = "&" + escape '"' = """ + escape '\'' = "'" + escape x = T.singleton x diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 3cafcefba..13970cbc3 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -162,10 +162,7 @@ blockToConTeXt (Div attr@(_,"section":_,_) innerContents <- blockListToConTeXt xs return $ header' $$ innerContents $$ footer' blockToConTeXt (Plain lst) = inlineListToConTeXt lst --- title beginning with fig: indicates that the image is a figure -blockToConTeXt (Para [Image attr txt (src,tgt)]) - | Just _ <- T.stripPrefix "fig:" tgt - = do +blockToConTeXt (SimpleFigure attr txt (src, _)) = do capt <- inlineListToConTeXt txt img <- inlineToConTeXt (Image attr txt (src, "")) let (ident, _, _) = attr diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 58c4bb5be..da212ab4e 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,5 +1,8 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} {- | Module : Text.Pandoc.Writers.Custom Copyright : Copyright (C) 2012-2021 John MacFarlane @@ -10,7 +13,7 @@ Portability : portable Conversion of 'Pandoc' documents to custom markup using -a lua writer. +a Lua writer. -} module Text.Pandoc.Writers.Custom ( writeCustom ) where import Control.Arrow ((***)) @@ -20,49 +23,51 @@ import Data.List (intersperse) import qualified Data.Map as M import qualified Data.Text as T import Data.Text (Text, pack) -import Foreign.Lua (Lua, Pushable) +import HsLua as Lua hiding (Operation (Div), render) +import HsLua.Class.Peekable (PeekError) import Text.DocLayout (render, literal) -import Text.Pandoc.Class.PandocIO (PandocIO) +import Control.Monad.IO.Class (MonadIO) import Text.Pandoc.Definition import Text.Pandoc.Lua (Global (..), runLua, setGlobals) import Text.Pandoc.Lua.Util (addField, dofileWithTraceback) import Text.Pandoc.Options +import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared -import qualified Foreign.Lua as Lua - attrToMap :: Attr -> M.Map T.Text T.Text attrToMap (id',classes,keyvals) = M.fromList $ ("id", id') : ("class", T.unwords classes) : keyvals -newtype Stringify a = Stringify a +newtype Stringify e a = Stringify a -instance Pushable (Stringify Format) where +instance Pushable (Stringify e Format) where push (Stringify (Format f)) = Lua.push (T.toLower f) -instance Pushable (Stringify [Inline]) where - push (Stringify ils) = Lua.push =<< inlineListToCustom ils +instance PeekError e => Pushable (Stringify e [Inline]) where + push (Stringify ils) = Lua.push =<< + changeErrorType ((inlineListToCustom @e) ils) -instance Pushable (Stringify [Block]) where - push (Stringify blks) = Lua.push =<< blockListToCustom blks +instance PeekError e => Pushable (Stringify e [Block]) where + push (Stringify blks) = Lua.push =<< + changeErrorType ((blockListToCustom @e) blks) -instance Pushable (Stringify MetaValue) where - push (Stringify (MetaMap m)) = Lua.push (fmap Stringify m) - push (Stringify (MetaList xs)) = Lua.push (map Stringify xs) +instance PeekError e => Pushable (Stringify e MetaValue) where + push (Stringify (MetaMap m)) = Lua.push (fmap (Stringify @e) m) + push (Stringify (MetaList xs)) = Lua.push (map (Stringify @e) xs) push (Stringify (MetaBool x)) = Lua.push x push (Stringify (MetaString s)) = Lua.push s - push (Stringify (MetaInlines ils)) = Lua.push (Stringify ils) - push (Stringify (MetaBlocks bs)) = Lua.push (Stringify bs) + push (Stringify (MetaInlines ils)) = Lua.push (Stringify @e ils) + push (Stringify (MetaBlocks bs)) = Lua.push (Stringify @e bs) -instance Pushable (Stringify Citation) where +instance PeekError e => Pushable (Stringify e Citation) where push (Stringify cit) = do Lua.createtable 6 0 addField "citationId" $ citationId cit - addField "citationPrefix" . Stringify $ citationPrefix cit - addField "citationSuffix" . Stringify $ citationSuffix cit + addField "citationPrefix" . Stringify @e $ citationPrefix cit + addField "citationSuffix" . Stringify @e $ citationSuffix cit addField "citationMode" $ show (citationMode cit) addField "citationNoteNum" $ citationNoteNum cit addField "citationHash" $ citationHash cit @@ -76,10 +81,11 @@ instance (Pushable a, Pushable b) => Pushable (KeyValue a b) where Lua.newtable Lua.push k Lua.push v - Lua.rawset (Lua.nthFromTop 3) + Lua.rawset (Lua.nth 3) -- | Convert Pandoc to custom markup. -writeCustom :: FilePath -> WriterOptions -> Pandoc -> PandocIO Text +writeCustom :: (PandocMonad m, MonadIO m) + => FilePath -> WriterOptions -> Pandoc -> m Text writeCustom luaFile opts doc@(Pandoc meta _) = do let globals = [ PANDOC_DOCUMENT doc , PANDOC_SCRIPT_FILE luaFile @@ -90,7 +96,7 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do -- check for error in lua script (later we'll change the return type -- to handle this more gracefully): when (stat /= Lua.OK) - Lua.throwTopMessage + Lua.throwErrorAsException rendered <- docToCustom opts doc context <- metaToContext opts (fmap (literal . pack) . blockListToCustom) @@ -105,126 +111,132 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do Just tpl -> render Nothing $ renderTemplate tpl $ setField "body" body context -docToCustom :: WriterOptions -> Pandoc -> Lua String +docToCustom :: forall e. PeekError e + => WriterOptions -> Pandoc -> LuaE e String docToCustom opts (Pandoc (Meta metamap) blocks) = do body <- blockListToCustom blocks - Lua.callFunc "Doc" body (fmap Stringify metamap) (writerVariables opts) + invoke @e "Doc" body (fmap (Stringify @e) metamap) (writerVariables opts) -- | Convert Pandoc block element to Custom. -blockToCustom :: Block -- ^ Block element - -> Lua String +blockToCustom :: forall e. PeekError e + => Block -- ^ Block element + -> LuaE e String blockToCustom Null = return "" -blockToCustom (Plain inlines) = Lua.callFunc "Plain" (Stringify inlines) +blockToCustom (Plain inlines) = invoke @e "Plain" (Stringify @e inlines) blockToCustom (Para [Image attr txt (src,tit)]) = - Lua.callFunc "CaptionedImage" src tit (Stringify txt) (attrToMap attr) + invoke @e "CaptionedImage" src tit (Stringify @e txt) (attrToMap attr) -blockToCustom (Para inlines) = Lua.callFunc "Para" (Stringify inlines) +blockToCustom (Para inlines) = invoke @e "Para" (Stringify @e inlines) blockToCustom (LineBlock linesList) = - Lua.callFunc "LineBlock" (map Stringify linesList) + invoke @e "LineBlock" (map (Stringify @e) linesList) blockToCustom (RawBlock format str) = - Lua.callFunc "RawBlock" (Stringify format) str + invoke @e "RawBlock" (Stringify @e format) str -blockToCustom HorizontalRule = Lua.callFunc "HorizontalRule" +blockToCustom HorizontalRule = invoke @e "HorizontalRule" blockToCustom (Header level attr inlines) = - Lua.callFunc "Header" level (Stringify inlines) (attrToMap attr) + invoke @e "Header" level (Stringify @e inlines) (attrToMap attr) blockToCustom (CodeBlock attr str) = - Lua.callFunc "CodeBlock" str (attrToMap attr) + invoke @e "CodeBlock" str (attrToMap attr) blockToCustom (BlockQuote blocks) = - Lua.callFunc "BlockQuote" (Stringify blocks) + invoke @e "BlockQuote" (Stringify @e blocks) blockToCustom (Table _ blkCapt specs thead tbody tfoot) = let (capt, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot aligns' = map show aligns - capt' = Stringify capt - headers' = map Stringify headers - rows' = map (map Stringify) rows - in Lua.callFunc "Table" capt' aligns' widths headers' rows' + capt' = Stringify @e capt + headers' = map (Stringify @e) headers + rows' = map (map (Stringify @e)) rows + in invoke @e "Table" capt' aligns' widths headers' rows' blockToCustom (BulletList items) = - Lua.callFunc "BulletList" (map Stringify items) + invoke @e "BulletList" (map (Stringify @e) items) blockToCustom (OrderedList (num,sty,delim) items) = - Lua.callFunc "OrderedList" (map Stringify items) num (show sty) (show delim) + invoke @e "OrderedList" (map (Stringify @e) items) num (show sty) (show delim) blockToCustom (DefinitionList items) = - Lua.callFunc "DefinitionList" - (map (KeyValue . (Stringify *** map Stringify)) items) + invoke @e "DefinitionList" + (map (KeyValue . (Stringify @e *** map (Stringify @e))) items) blockToCustom (Div attr items) = - Lua.callFunc "Div" (Stringify items) (attrToMap attr) + invoke @e "Div" (Stringify @e items) (attrToMap attr) -- | Convert list of Pandoc block elements to Custom. -blockListToCustom :: [Block] -- ^ List of block elements - -> Lua String +blockListToCustom :: forall e. PeekError e + => [Block] -- ^ List of block elements + -> LuaE e String blockListToCustom xs = do - blocksep <- Lua.callFunc "Blocksep" + blocksep <- invoke @e "Blocksep" bs <- mapM blockToCustom xs return $ mconcat $ intersperse blocksep bs -- | Convert list of Pandoc inline elements to Custom. -inlineListToCustom :: [Inline] -> Lua String +inlineListToCustom :: forall e. PeekError e => [Inline] -> LuaE e String inlineListToCustom lst = do - xs <- mapM inlineToCustom lst + xs <- mapM (inlineToCustom @e) lst return $ mconcat xs -- | Convert Pandoc inline element to Custom. -inlineToCustom :: Inline -> Lua String +inlineToCustom :: forall e. PeekError e => Inline -> LuaE e String -inlineToCustom (Str str) = Lua.callFunc "Str" str +inlineToCustom (Str str) = invoke @e "Str" str -inlineToCustom Space = Lua.callFunc "Space" +inlineToCustom Space = invoke @e "Space" -inlineToCustom SoftBreak = Lua.callFunc "SoftBreak" +inlineToCustom SoftBreak = invoke @e "SoftBreak" -inlineToCustom (Emph lst) = Lua.callFunc "Emph" (Stringify lst) +inlineToCustom (Emph lst) = invoke @e "Emph" (Stringify @e lst) -inlineToCustom (Underline lst) = Lua.callFunc "Underline" (Stringify lst) +inlineToCustom (Underline lst) = invoke @e "Underline" (Stringify @e lst) -inlineToCustom (Strong lst) = Lua.callFunc "Strong" (Stringify lst) +inlineToCustom (Strong lst) = invoke @e "Strong" (Stringify @e lst) -inlineToCustom (Strikeout lst) = Lua.callFunc "Strikeout" (Stringify lst) +inlineToCustom (Strikeout lst) = invoke @e "Strikeout" (Stringify @e lst) -inlineToCustom (Superscript lst) = Lua.callFunc "Superscript" (Stringify lst) +inlineToCustom (Superscript lst) = invoke @e "Superscript" (Stringify @e lst) -inlineToCustom (Subscript lst) = Lua.callFunc "Subscript" (Stringify lst) +inlineToCustom (Subscript lst) = invoke @e "Subscript" (Stringify @e lst) -inlineToCustom (SmallCaps lst) = Lua.callFunc "SmallCaps" (Stringify lst) +inlineToCustom (SmallCaps lst) = invoke @e "SmallCaps" (Stringify @e lst) -inlineToCustom (Quoted SingleQuote lst) = Lua.callFunc "SingleQuoted" (Stringify lst) +inlineToCustom (Quoted SingleQuote lst) = + invoke @e "SingleQuoted" (Stringify @e lst) -inlineToCustom (Quoted DoubleQuote lst) = Lua.callFunc "DoubleQuoted" (Stringify lst) +inlineToCustom (Quoted DoubleQuote lst) = + invoke @e "DoubleQuoted" (Stringify @e lst) -inlineToCustom (Cite cs lst) = Lua.callFunc "Cite" (Stringify lst) (map Stringify cs) +inlineToCustom (Cite cs lst) = + invoke @e "Cite" (Stringify @e lst) (map (Stringify @e) cs) inlineToCustom (Code attr str) = - Lua.callFunc "Code" str (attrToMap attr) + invoke @e "Code" str (attrToMap attr) inlineToCustom (Math DisplayMath str) = - Lua.callFunc "DisplayMath" str + invoke @e "DisplayMath" str inlineToCustom (Math InlineMath str) = - Lua.callFunc "InlineMath" str + invoke @e "InlineMath" str inlineToCustom (RawInline format str) = - Lua.callFunc "RawInline" (Stringify format) str + invoke @e "RawInline" (Stringify @e format) str -inlineToCustom LineBreak = Lua.callFunc "LineBreak" +inlineToCustom LineBreak = invoke @e "LineBreak" inlineToCustom (Link attr txt (src,tit)) = - Lua.callFunc "Link" (Stringify txt) src tit (attrToMap attr) + invoke @e "Link" (Stringify @e txt) src tit (attrToMap attr) inlineToCustom (Image attr alt (src,tit)) = - Lua.callFunc "Image" (Stringify alt) src tit (attrToMap attr) + invoke @e "Image" (Stringify @e alt) src tit (attrToMap attr) -inlineToCustom (Note contents) = Lua.callFunc "Note" (Stringify contents) +inlineToCustom (Note contents) = invoke @e "Note" (Stringify @e contents) inlineToCustom (Span attr items) = - Lua.callFunc "Span" (Stringify items) (attrToMap attr) + invoke @e "Span" (Stringify @e items) (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 33a6f5f0c..c9e49517f 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,6 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Docbook Copyright : Copyright (C) 2006-2021 John MacFarlane @@ -188,7 +187,7 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) -- standalone documents will include them in the template. then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] else [] - + -- Populate miscAttr with Header.Attr.attributes, filtering out non-valid DocBook section attributes, id, and xml:id miscAttr = filter (isSectionAttr version) attrs attribs = nsAttr <> idAttr <> miscAttr @@ -233,7 +232,7 @@ blockToDocbook _ h@Header{} = do return empty blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure -blockToDocbook opts (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just _)]) = do +blockToDocbook opts (SimpleFigure attr txt (src, _)) = do alt <- inlinesToDocbook opts txt let capt = if null txt then empty diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index a3c4b6be1..ce7133f33 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,9 @@ import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting import Text.Collate.Lang (renderLang) -import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang, translateTerm) +import Text.Pandoc.Class (PandocMonad, report, toLang, translateTerm, + getMediaBag) +import Text.Pandoc.MediaBag (lookupMedia, MediaItem(..)) import qualified Text.Pandoc.Translations as Term import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time @@ -175,6 +177,7 @@ writeDocx opts doc = do let initialSt = defaultWriterState { stStyleMaps = styleMaps , stTocTitle = tocTitle + , stCurId = 20 } let isRTLmeta = case lookupMeta "dir" meta of @@ -783,8 +786,6 @@ rStyleM styleName = do return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] () getUniqueId :: (PandocMonad m) => WS m Text --- the + 20 is to ensure that there are no clashes with the rIds --- already in word/document.xml.rel getUniqueId = do n <- gets stCurId modify $ \st -> st{stCurId = n + 1} @@ -853,11 +854,13 @@ blockToOpenXML' opts (Plain lst) = do then withParaProp prop block else block -- title beginning with fig: indicates that the image is a figure -blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do +blockToOpenXML' opts (SimpleFigure attr@(imgident, _, _) alt (src, tit)) = do setFirstPara fignum <- gets stNextFigureNum unless (null alt) $ modify $ \st -> st{ stNextFigureNum = fignum + 1 } - let figid = "fig" <> tshow fignum + let refid = if T.null imgident + then "ref_fig" <> tshow fignum + else "ref_" <> imgident figname <- translateTerm Term.Figure prop <- pStyleM $ if null alt @@ -869,14 +872,16 @@ blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit then return [] else withParaPropM (pStyleM "Image Caption") $ blockToOpenXML opts - (Para $ Span (figid,[],[]) - [Str (figname <> "\160"), - RawInline (Format "openxml") - ("<w:fldSimple w:instr=\"SEQ Figure" - <> " \\* ARABIC \"><w:r><w:t>" - <> tshow fignum - <> "</w:t></w:r></w:fldSimple>"), - Str ":", Space] : alt) + $ Para + $ if isEnabled Ext_native_numbering opts + then Span (refid,[],[]) + [Str (figname <> "\160"), + RawInline (Format "openxml") + ("<w:fldSimple w:instr=\"SEQ Figure" + <> " \\* ARABIC \"><w:r><w:t>" + <> tshow fignum + <> "</w:t></w:r></w:fldSimple>")] : Str ": " : alt + else alt return $ Elem (mknode "w:p" [] (map Elem paraProps ++ contents)) : captionNode @@ -922,7 +927,8 @@ blockToOpenXML' _ HorizontalRule = do ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] blockToOpenXML' opts (Table attr caption colspecs thead tbodies tfoot) = - tableToOpenXML (blocksToOpenXML opts) + tableToOpenXML opts + (blocksToOpenXML opts) (Grid.toTable attr caption colspecs thead tbodies tfoot) blockToOpenXML' opts el | BulletList lst <- el = addOpenXMLList BulletMarker lst @@ -1230,7 +1236,42 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do imgs <- gets stImages let stImage = M.lookup (T.unpack src) imgs - generateImgElt (ident, _, _, img) = + generateImgElt (ident, _fp, mt, img) = do + docprid <- getUniqueId + nvpicprid <- getUniqueId + (blipAttrs, blipContents) <- + case T.takeWhile (/=';') <$> mt of + Just "image/svg+xml" -> do + -- get fallback png + mediabag <- getMediaBag + mbFallback <- + case lookupMedia (T.unpack (src <> ".png")) mediabag of + Just item -> do + id' <- T.unpack . ("rId" <>) <$> getUniqueId + let fp' = "media/" <> id' <> ".png" + let imgdata = (id', + fp', + Just (mediaMimeType item), + BL.toStrict $ mediaContents item) + modify $ \st -> st { stImages = + M.insert fp' imgdata $ stImages st } + return $ Just id' + Nothing -> return Nothing + let extLst = mknode "a:extLst" [] + [ mknode "a:ext" + [("uri","{28A0092B-C50C-407E-A947-70E740481C1C}")] + [ mknode "a14:useLocalDpi" + [("xmlns:a14","http://schemas.microsoft.com/office/drawing/2010/main"), + ("val","0")] () ] + , mknode "a:ext" + [("uri","{96DAC541-7B7A-43D3-8B79-37D633B846F1}")] + [ mknode "asvg:svgBlip" + [("xmlns:asvg", "http://schemas.microsoft.com/office/drawing/2016/SVG/main"), + ("r:embed",T.pack ident)] () ] + ] + return (maybe [] (\id'' -> [("r:embed", T.pack id'')]) mbFallback, + [extLst]) + _ -> return ([("r:embed", T.pack ident)], []) let (xpt,ypt) = desiredSizeInPoints opts attr (either (const def) id (imageSize opts img)) @@ -1242,10 +1283,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do ,("noChangeAspect","1")] () nvPicPr = mknode "pic:nvPicPr" [] [ mknode "pic:cNvPr" - [("descr",src),("id","0"),("name","Picture")] () + [("descr",src) + ,("id", nvpicprid) + ,("name","Picture")] () , cNvPicPr ] blipFill = mknode "pic:blipFill" [] - [ mknode "a:blip" [("r:embed",T.pack ident)] () + [ mknode "a:blip" blipAttrs blipContents , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] @@ -1279,16 +1322,15 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do , mknode "wp:docPr" [ ("descr", stringify alt) , ("title", title) - , ("id","1") + , ("id", docprid) , ("name","Picture") ] () , graphic ] - in - imgElt + return [Elem imgElt] wrapBookmark imgident =<< case stImage of - Just imgData -> return [Elem $ generateImgElt imgData] + Just imgData -> generateImgElt imgData Nothing -> ( do --try (img, mt) <- P.fetchItem src ident <- ("rId" <>) <$> getUniqueId @@ -1317,7 +1359,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do else do -- insert mime type to use in constructing [Content_Types].xml modify $ \st -> st { stImages = M.insert (T.unpack src) imgData $ stImages st } - return [Elem $ generateImgElt imgData] + generateImgElt imgData ) `catchError` ( \e -> do report $ CouldNotFetchResource src $ T.pack (show e) diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index 7a84c5278..4dc4ad6a2 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -20,6 +20,8 @@ import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm) import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared +import Text.Pandoc.Options (WriterOptions, isEnabled) +import Text.Pandoc.Extensions (Extension(Ext_native_numbering)) import Text.Printf (printf) import Text.Pandoc.Writers.GridTable hiding (Table) import Text.Pandoc.Writers.OOXML @@ -29,10 +31,11 @@ import qualified Text.Pandoc.Translations as Term import qualified Text.Pandoc.Writers.GridTable as Grid tableToOpenXML :: PandocMonad m - => ([Block] -> WS m [Content]) + => WriterOptions + -> ([Block] -> WS m [Content]) -> Grid.Table -> WS m [Content] -tableToOpenXML blocksToOpenXML gridTable = do +tableToOpenXML opts blocksToOpenXML gridTable = do setFirstPara let (Grid.Table (ident,_,_) caption colspecs _rowheads thead tbodies tfoot) = gridTable @@ -50,7 +53,9 @@ tableToOpenXML blocksToOpenXML gridTable = do then return [] else withParaPropM (pStyleM "Table Caption") $ blocksToOpenXML - $ addLabel tableid tablename tablenum captionBlocks + $ if isEnabled Ext_native_numbering opts + then addLabel tableid tablename tablenum captionBlocks + else captionBlocks -- We set "in table" after processing the caption, because we don't -- want the "Table Caption" style to be overwritten with "Compact". modify $ \s -> s { stInTable = True } @@ -93,8 +98,8 @@ tableToOpenXML blocksToOpenXML gridTable = do addLabel :: Text -> Text -> Int -> [Block] -> [Block] addLabel tableid tablename tablenum bs = case bs of - (Para ils : rest) -> Para (label : Space : ils) : rest - (Plain ils : rest) -> Plain (label : Space : ils) : rest + (Para ils : rest) -> Para (label : Str ": " : ils) : rest + (Plain ils : rest) -> Plain (label : Str ": " : ils) : rest _ -> Para [label] : bs where label = Span (tableid,[],[]) @@ -103,8 +108,7 @@ addLabel tableid tablename tablenum bs = ("<w:fldSimple w:instr=\"SEQ Table" <> " \\* ARABIC \"><w:r><w:t>" <> tshow tablenum - <> "</w:t></w:r></w:fldSimple>"), - Str ":"] + <> "</w:t></w:r></w:fldSimple>")] -- | Parts of a table data RowType = HeadRow | BodyRow | FootRow diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 602c70ebe..c77f20ec1 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -109,9 +109,7 @@ blockToDokuWiki opts (Plain inlines) = -- title beginning with fig: indicates that the image is a figure -- dokuwiki doesn't support captions - so combine together alt and caption into alt -blockToDokuWiki opts (Para [Image attr txt (src,tgt)]) - | Just tit <- T.stripPrefix "fig:" tgt - = do +blockToDokuWiki opts (SimpleFigure attr txt (src, tit)) = do capt <- if null txt then return "" else (" " <>) `fmap` inlineListToDokuWiki opts txt diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 508fb6a98..d1417ff48 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -32,7 +32,6 @@ import qualified Data.Set as Set import qualified Data.Text as T import Data.Text (Text) import qualified Data.Text.Lazy as TL -import Network.HTTP (urlEncode) import System.FilePath (takeExtension, takeFileName, makeRelative) import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags) import Text.Pandoc.Builder (fromList, setMeta) @@ -45,6 +44,7 @@ import Text.Pandoc.Error import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType) +import Text.Pandoc.Network.HTTP (urlEncode) import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), ObfuscationMethod (NoObfuscation), WrapOption (..), WriterOptions (..)) @@ -79,7 +79,7 @@ data EPUBMetadata = EPUBMetadata{ , epubLanguage :: Text , epubCreator :: [Creator] , epubContributor :: [Creator] - , epubSubject :: [Text] + , epubSubject :: [Subject] , epubDescription :: Maybe Text , epubType :: Maybe Text , epubFormat :: Maybe Text @@ -121,6 +121,12 @@ data Title = Title{ data ProgressionDirection = LTR | RTL deriving Show +data Subject = Subject{ + subjectText :: Text + , subjectAuthority :: Maybe Text + , subjectTerm :: Maybe Text + } deriving Show + dcName :: Text -> QName dcName n = QName n Nothing (Just "dc") @@ -232,7 +238,11 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md , creatorRole = getAttr "role" , creatorFileAs = getAttr "file-as" } : epubContributor md } - | name == "subject" = md{ epubSubject = strContent e : epubSubject md } + | name == "subject" = md{ epubSubject = + Subject { subjectText = strContent e + , subjectAuthority = getAttr "authority" + , subjectTerm = getAttr "term" + } : epubSubject md } | name == "description" = md { epubDescription = Just $ strContent e } | name == "type" = md { epubType = Just $ strContent e } | name == "format" = md { epubFormat = Just $ strContent e } @@ -313,12 +323,13 @@ getDate s meta = getList s meta handleMetaValue handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } -simpleList :: T.Text -> Meta -> [Text] -simpleList s meta = - case lookupMeta s meta of - Just (MetaList xs) -> map metaValueToString xs - Just x -> [metaValueToString x] - Nothing -> [] +getSubject :: T.Text -> Meta -> [Subject] +getSubject s meta = getList s meta handleMetaValue + where handleMetaValue (MetaMap m) = + Subject{ subjectText = maybe "" metaValueToString $ M.lookup "text" m + , subjectAuthority = metaValueToString <$> M.lookup "authority" m + , subjectTerm = metaValueToString <$> M.lookup "term" m } + handleMetaValue mv = Subject (metaValueToString mv) Nothing Nothing metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata metadataFromMeta opts meta = EPUBMetadata{ @@ -352,7 +363,7 @@ metadataFromMeta opts meta = EPUBMetadata{ lookupMeta "language" meta `mplus` lookupMeta "lang" meta creators = getCreator "creator" meta contributors = getCreator "contributor" meta - subjects = simpleList "subject" meta + subjects = getSubject "subject" meta description = metaValueToString <$> lookupMeta "description" meta epubtype = metaValueToString <$> lookupMeta "type" meta format = metaValueToString <$> lookupMeta "format" meta @@ -659,7 +670,7 @@ pandocToEPUB version opts doc = do "contributors", "other-credits", "errata", "revision-history", "titlepage", "halftitlepage", "seriespage", - "foreword", "preface", + "foreword", "preface", "frontispiece", "seriespage", "titlepage"] backMatterTypes = ["appendix", "colophon", "bibliography", "index"] @@ -974,7 +985,7 @@ metadataElement version md currentTime = epubCreator md contributorNodes = withIds "epub-contributor" (toCreatorNode "contributor") $ epubContributor md - subjectNodes = map (dcTag "subject") $ epubSubject md + subjectNodes = withIds "subject" toSubjectNode $ epubSubject md descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md typeNodes = maybe [] (dcTag' "type") $ epubType md formatNodes = maybe [] (dcTag' "format") $ epubFormat md @@ -1046,6 +1057,16 @@ metadataElement version md currentTime = (("id",id') : maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ dateText date] + toSubjectNode id' subject + | version == EPUB2 = [dcNode "subject" ! + [("id",id')] $ subjectText subject] + | otherwise = (dcNode "subject" ! [("id",id')] $ subjectText subject) + : maybe [] (\x -> (unode "meta" ! + [("refines", "#" <> id'),("property","authority")] $ x) : + maybe [] (\y -> [unode "meta" ! + [("refines", "#" <> id'),("property","term")] $ y]) + (subjectTerm subject)) + (subjectAuthority subject) schemeToOnix :: Text -> Text schemeToOnix "ISBN-10" = "02" schemeToOnix "GTIN-13" = "03" @@ -1137,7 +1158,7 @@ transformInline _opts (Image attr@(_,_,kvs) lab (src,tit)) return $ Image attr lab ("../" <> newsrc, tit) transformInline opts x@(Math t m) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef (T.unpack url <> urlEncode (T.unpack m)) + newsrc <- modifyMediaRef (T.unpack (url <> urlEncode m)) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] ("../" <> newsrc, "")] diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 3b5d04427..ce3fe25a9 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -29,7 +29,7 @@ import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE -import Network.HTTP (urlEncode) +import Text.Pandoc.Network.HTTP (urlEncode) import Text.Pandoc.XML.Light as X import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -299,9 +299,8 @@ blockToXml :: PandocMonad m => Block -> FBM m [Content] blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula -- title beginning with fig: indicates that the image is a figure -blockToXml (Para [Image atr alt (src,tgt)]) - | Just tit <- T.stripPrefix "fig:" tgt - = insertImage NormalImage (Image atr alt (src,tit)) +blockToXml (SimpleFigure atr alt (src, tit)) = + insertImage NormalImage (Image atr alt (src,tit)) blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code") . T.lines $ s @@ -451,7 +450,7 @@ insertMath immode formula = do case htmlMath of WebTeX url -> do let alt = [Code nullAttr formula] - let imgurl = url <> T.pack (urlEncode $ T.unpack formula) + let imgurl = url <> urlEncode formula let img = Image nullAttr alt (imgurl, "") insertImage immode img _ -> return [el "code" formula] diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 6f91d1965..8c5548196 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -28,7 +28,6 @@ module Text.Pandoc.Writers.HTML ( writeRevealJs, tagWithAttributes ) where -import Control.Monad.Identity (runIdentity) import Control.Monad.State.Strict import Data.Char (ord) import Data.List (intercalate, intersperse, partition, delete, (\\), foldl') @@ -38,10 +37,9 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL -import Network.HTTP (urlEncode) import Network.URI (URI (..), parseURIReference) import Numeric (showHex) -import Text.DocLayout (render, literal) +import Text.DocLayout (render, literal, Doc) import Text.Blaze.Internal (MarkupM (Empty), customLeaf, customParent) import Text.DocTemplates (FromContext (lookupContext), Context (..)) import Text.Blaze.Html hiding (contents) @@ -52,11 +50,12 @@ import Text.Pandoc.ImageSize import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Slides -import Text.Pandoc.Templates (Template, compileTemplate, renderTemplate) +import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import qualified Text.Pandoc.Writers.AnnotatedTable as Ann +import Text.Pandoc.Network.HTTP (urlEncode) import Text.Pandoc.XML (escapeStringForXML, fromEntities, toEntities, html5Attributes, html4Attributes, rdfaAttributes) import qualified Text.Blaze.XHtml5 as H5 @@ -71,13 +70,16 @@ import Text.Pandoc.Class.PandocPure (runPure) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (mediaCategory) +import Text.Pandoc.Writers.Blaze (layoutMarkup) import Text.TeXMath import Text.XML.Light (elChildren, unode, unqual) import qualified Text.XML.Light as XML import Text.XML.Light.Output +import Data.String (fromString) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes + , stEmittedNotes :: Int -- ^ How many notes we've already pushed out to the HTML , stMath :: Bool -- ^ Math is used in document , stQuotes :: Bool -- ^ <q> tag is used , stHighlighting :: Bool -- ^ Syntax highlighting is used @@ -89,10 +91,11 @@ data WriterState = WriterState , stCodeBlockNum :: Int -- ^ Number of code block , stCsl :: Bool -- ^ Has CSL references , stCslEntrySpacing :: Maybe Int -- ^ CSL entry spacing + , stBlockLevel :: Int -- ^ Current block depth, excluding section divs } defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, +defaultWriterState = WriterState {stNotes= [], stEmittedNotes = 0, stMath = False, stQuotes = False, stHighlighting = False, stHtml5 = False, stEPUBVersion = Nothing, @@ -101,7 +104,8 @@ defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, stInSection = False, stCodeBlockNum = 0, stCsl = False, - stCslEntrySpacing = Nothing} + stCslEntrySpacing = Nothing, + stBlockLevel = 0} -- Helpers to render HTML with the appropriate function. @@ -128,10 +132,8 @@ needsVariationSelector '↔' = True needsVariationSelector _ = False -- | Hard linebreak. -nl :: WriterOptions -> Html -nl opts = if writerWrapText opts == WrapNone - then mempty - else preEscapedString "\n" +nl :: Html +nl = preEscapedString "\n" -- | Convert Pandoc document to Html 5 string. writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -157,7 +159,8 @@ writeHtmlStringForEPUB :: PandocMonad m -> m Text writeHtmlStringForEPUB version o = writeHtmlString' defaultWriterState{ stHtml5 = version == EPUB3, - stEPUBVersion = Just version } o + stEPUBVersion = Just version } + o{ writerWrapText = WrapNone } -- | Convert Pandoc document to Reveal JS HTML slide show. writeRevealJs :: PandocMonad m @@ -204,20 +207,23 @@ writeHtmlString' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Text writeHtmlString' st opts d = do (body, context) <- evalStateT (pandocToHtml opts d) st - let defaultTemplate = fmap (const tocTemplate) (getField "table-of-contents" context :: Maybe Text) - let template = msum [ writerTemplate opts - , defaultTemplate ] + let colwidth = case writerWrapText opts of + WrapAuto -> Just (writerColumns opts) + _ -> Nothing (if writerPreferAscii opts then toEntities else id) <$> - case template of - Nothing -> return $ renderHtml' body + case writerTemplate opts of + Nothing -> return $ + case colwidth of + Nothing -> renderHtml' body -- optimization, skip layout + Just cols -> render (Just cols) $ layoutMarkup body Just tpl -> do -- warn if empty lang when (isNothing (getField "lang" context :: Maybe Text)) $ report NoLangSpecified -- check for empty pagetitle - context' <- + (context' :: Context Text) <- case getField "pagetitle" context of Just (s :: Text) | not (T.null s) -> return context _ -> do @@ -228,9 +234,9 @@ writeHtmlString' st opts d = do Just [] -> "Untitled" Just (x:_) -> takeBaseName $ T.unpack x report $ NoTitleElement fallback - return $ resetField "pagetitle" fallback context - return $ render Nothing $ renderTemplate tpl - (defField "body" (renderHtml' body) context') + return $ resetField "pagetitle" (literal fallback) context + return $ render colwidth $ renderTemplate tpl + (defField "body" (layoutMarkup body) context') writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html writeHtml' st opts d = @@ -243,13 +249,6 @@ writeHtml' st opts d = (body, _) <- evalStateT (pandocToHtml opts d) st return body -wantTOC :: Meta -> Maybe Bool -wantTOC = fmap (== MetaBool True) . lookupMeta "tableOfContents" - -tocTemplate :: Template Text -tocTemplate = either error id . runIdentity . compileTemplate "" $ - "<div class=\"toc\"><h1></h1>$table-of-contents$</div>$body$" - -- result is (title, authors, date, toc, body, new variables) pandocToHtml :: PandocMonad m => WriterOptions @@ -259,13 +258,13 @@ pandocToHtml opts (Pandoc meta blocks) = do let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts modify $ \st -> st{ stSlideLevel = slideLevel } metadata <- metaToContext opts - (fmap (literal . renderHtml') . blockListToHtml opts) - (fmap (literal . renderHtml') . inlineListToHtml opts) + (fmap layoutMarkup . blockListToHtml opts) + (fmap layoutMarkup . inlineListToHtml opts) meta let stringifyHTML = escapeStringForXML . stringify - let authsMeta = map stringifyHTML $ docAuthors meta + let authsMeta = map (literal . stringifyHTML) $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta - let descriptionMeta = escapeStringForXML $ + let descriptionMeta = literal $ escapeStringForXML $ lookupMetaString "description" meta slideVariant <- gets stSlideVariant let sects = adjustNumbers opts $ @@ -273,15 +272,22 @@ pandocToHtml opts (Pandoc meta blocks) = do if slideVariant == NoSlides then blocks else prepSlides slideLevel blocks - let withTOC = fromMaybe (writerTableOfContents opts) (wantTOC meta) - toc <- if withTOC && slideVariant /= S5Slides - then fmap renderHtml' <$> tableOfContents opts sects + toc <- if writerTableOfContents opts && slideVariant /= S5Slides + then fmap layoutMarkup <$> tableOfContents opts sects else return Nothing blocks' <- blockListToHtml opts sects + notes <- do + -- make the st private just to be safe, since we modify it right afterwards + st <- get + if null (stNotes st) + then return mempty + else do + notes <- footnoteSection EndOfDocument (stEmittedNotes st + 1) (reverse (stNotes st)) + modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') }) + return notes st <- get - notes <- footnoteSection opts (reverse (stNotes st)) let thebody = blocks' >> notes - let math = case writerHTMLMathMethod opts of + let math = layoutMarkup $ case writerHTMLMathMethod opts of MathJax url | slideVariant /= RevealJsSlides -> -- mathjax is handled via a special plugin in revealjs @@ -295,10 +301,10 @@ pandocToHtml opts (Pandoc meta blocks) = do KaTeX url -> do H.script ! A.src (toValue $ url <> "katex.min.js") $ mempty - nl opts + nl let katexFlushLeft = case lookupContext "classoption" metadata of - Just clsops | "fleqn" `elem` (clsops :: [Text]) -> "true" + Just clsops | "fleqn" `elem` (clsops :: [Doc Text]) -> "true" _ -> "false" H.script $ text $ T.unlines [ "document.addEventListener(\"DOMContentLoaded\", function () {" @@ -315,7 +321,7 @@ pandocToHtml opts (Pandoc meta blocks) = do , " });" , "}}});" ] - nl opts + nl H.link ! A.rel "stylesheet" ! A.href (toValue $ url <> "katex.min.css") @@ -324,15 +330,16 @@ pandocToHtml opts (Pandoc meta blocks) = do Just s | not (stHtml5 st) -> H.script ! A.type_ "text/javascript" $ preEscapedString - ("/*<![CDATA[*/\n" ++ T.unpack s ++ + ("/*<![CDATA[*/\n" <> T.unpack s <> "/*]]>*/\n") | otherwise -> mempty Nothing -> mempty let mCss :: Maybe [Text] = lookupContext "css" metadata - let context = (if stHighlighting st + let context :: Context Text + context = (if stHighlighting st then case writerHighlightStyle opts of Just sty -> defField "highlighting-css" - (T.pack $ styleToCss sty) + (literal $ T.pack $ styleToCss sty) Nothing -> id else id) . (if stCsl st @@ -342,15 +349,15 @@ pandocToHtml opts (Pandoc meta blocks) = do Just 0 -> id Just n -> defField "csl-entry-spacing" - (tshow n <> "em")) + (literal $ tshow n <> "em")) else id) . (if stMath st - then defField "math" (renderHtml' math) + then defField "math" math else id) . (case writerHTMLMathMethod opts of MathJax u -> defField "mathjax" True . defField "mathjaxurl" - (T.takeWhile (/='?') u) + (literal $ T.takeWhile (/='?') u) _ -> defField "mathjax" False) . (case writerHTMLMathMethod opts of PlainMath -> defField "displaymath-css" True @@ -361,13 +368,14 @@ pandocToHtml opts (Pandoc meta blocks) = do -- template can't distinguish False/undefined defField "controls" True . defField "controlsTutorial" True . - defField "controlsLayout" ("bottom-right" :: Text) . - defField "controlsBackArrows" ("faded" :: Text) . + defField "controlsLayout" + ("bottom-right" :: Doc Text) . + defField "controlsBackArrows" ("faded" :: Doc Text) . defField "progress" True . defField "slideNumber" False . - defField "showSlideNumber" ("all" :: Text) . + defField "showSlideNumber" ("all" :: Doc Text) . defField "hashOneBasedIndex" False . - defField "hash" False . + defField "hash" True . defField "respondToHashChanges" True . defField "history" False . defField "keyboard" True . @@ -377,7 +385,7 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "touch" True . defField "loop" False . defField "rtl" False . - defField "navigationMode" ("default" :: Text) . + defField "navigationMode" ("default" :: Doc Text) . defField "shuffle" False . defField "fragments" True . defField "fragmentInURL" True . @@ -385,22 +393,22 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "help" True . defField "pause" True . defField "showNotes" False . - defField "autoPlayMedia" ("null" :: Text) . - defField "preloadIframes" ("null" :: Text) . - defField "autoSlide" ("0" :: Text) . + defField "autoPlayMedia" ("null" :: Doc Text) . + defField "preloadIframes" ("null" :: Doc Text) . + defField "autoSlide" ("0" :: Doc Text) . defField "autoSlideStoppable" True . - defField "autoSlideMethod" ("null" :: Text) . - defField "defaultTiming" ("null" :: Text) . + defField "autoSlideMethod" ("null" :: Doc Text) . + defField "defaultTiming" ("null" :: Doc Text) . defField "mouseWheel" False . - defField "display" ("block" :: Text) . + defField "display" ("block" :: Doc Text) . defField "hideInactiveCursor" True . - defField "hideCursorTime" ("5000" :: Text) . + defField "hideCursorTime" ("5000" :: Doc Text) . defField "previewLinks" False . - defField "transition" ("slide" :: Text) . - defField "transitionSpeed" ("default" :: Text) . - defField "backgroundTransition" ("fade" :: Text) . - defField "viewDistance" ("3" :: Text) . - defField "mobileViewDistance" ("2" :: Text) + defField "transition" ("slide" :: Doc Text) . + defField "transitionSpeed" ("default" :: Doc Text) . + defField "backgroundTransition" ("fade" :: Doc Text) . + defField "viewDistance" ("3" :: Doc Text) . + defField "mobileViewDistance" ("2" :: Doc Text) else id) . defField "document-css" (isNothing mCss && slideVariant == NoSlides) . defField "quotes" (stQuotes st) . @@ -410,18 +418,18 @@ pandocToHtml opts (Pandoc meta blocks) = do maybe id (defField "toc") toc . maybe id (defField "table-of-contents") toc . defField "author-meta" authsMeta . - maybe id (defField "date-meta") + maybe id (defField "date-meta" . literal) (normalizeDate dateMeta) . defField "description-meta" descriptionMeta . defField "pagetitle" - (stringifyHTML . docTitle $ meta) . - defField "idprefix" (writerIdentifierPrefix opts) . + (literal . stringifyHTML . docTitle $ meta) . + defField "idprefix" (literal $ writerIdentifierPrefix opts) . -- these should maybe be set in pandoc.hs defField "slidy-url" - ("https://www.w3.org/Talks/Tools/Slidy2" :: Text) . - defField "slideous-url" ("slideous" :: Text) . - defField "revealjs-url" ("https://unpkg.com/reveal.js@^4/" :: Text) $ - defField "s5-url" ("s5/default" :: Text) . + ("https://www.w3.org/Talks/Tools/Slidy2" :: Doc Text) . + defField "slideous-url" ("slideous" :: Doc Text) . + defField "revealjs-url" ("https://unpkg.com/reveal.js@^4/" :: Doc Text) $ + defField "s5-url" ("s5/default" :: Doc Text) . defField "html5" (stHtml5 st) $ metadata return (thebody, context) @@ -449,15 +457,15 @@ toList listop opts items = do unordList :: PandocMonad m => WriterOptions -> [Html] -> StateT WriterState m Html -unordList opts = toList H.ul opts . toListItems opts +unordList opts = toList H.ul opts . toListItems ordList :: PandocMonad m => WriterOptions -> [Html] -> StateT WriterState m Html -ordList opts = toList H.ol opts . toListItems opts +ordList opts = toList H.ol opts . toListItems defList :: PandocMonad m => WriterOptions -> [Html] -> StateT WriterState m Html -defList opts items = toList H.dl opts (items ++ [nl opts]) +defList opts items = toList H.dl opts (items ++ [nl]) isTaskListItem :: [Block] -> Bool isTaskListItem (Plain (Str "☐":Space:_):_) = True @@ -479,7 +487,7 @@ listItemToHtml opts bls let checkbox = if checked then checkbox' ! A.checked "" else checkbox' - checkbox' = H.input ! A.type_ "checkbox" ! A.disabled "" >> nl opts + checkbox' = H.input ! A.type_ "checkbox" ! A.disabled "" >> nl isContents <- inlineListToHtml opts is bsContents <- blockListToHtml opts bs return $ constr (checkbox >> isContents) >> bsContents @@ -502,28 +510,45 @@ tableOfContents opts sects = do -- | Convert list of Note blocks to a footnote <div>. -- Assumes notes are sorted. -footnoteSection :: PandocMonad m - => WriterOptions -> [Html] -> StateT WriterState m Html -footnoteSection opts notes = do +footnoteSection :: + PandocMonad m => ReferenceLocation -> Int -> [Html] -> StateT WriterState m Html +footnoteSection refLocation startCounter notes = do html5 <- gets stHtml5 slideVariant <- gets stSlideVariant - let hrtag = if html5 then H5.hr else H.hr + let hrtag = if refLocation /= EndOfBlock + then (if html5 then H5.hr else H.hr) <> nl + else mempty + let additionalClassName = case refLocation of + EndOfBlock -> "footnotes-end-of-block" + EndOfDocument -> "footnotes-end-of-document" + EndOfSection -> "footnotes-end-of-section" + let className = "footnotes " <> additionalClassName epubVersion <- gets stEPUBVersion let container x | html5 , epubVersion == Just EPUB3 - = H5.section ! A.class_ "footnotes" + = H5.section ! A.class_ className ! customAttribute "epub:type" "footnotes" $ x - | html5 = H5.section ! A.class_ "footnotes" + | html5 = H5.section ! A.class_ className ! customAttribute "role" "doc-endnotes" $ x | slideVariant /= NoSlides = H.div ! A.class_ "footnotes slide" $ x - | otherwise = H.div ! A.class_ "footnotes" $ x + | otherwise = H.div ! A.class_ className $ x return $ if null notes then mempty - else nl opts >> container (nl opts >> hrtag >> nl opts >> - H.ol (mconcat notes >> nl opts) >> nl opts) + else do + nl + container $ do + nl + hrtag + -- Keep the previous output exactly the same if we don't + -- have multiple notes sections + if startCounter == 1 + then H.ol $ mconcat notes >> nl + else H.ol ! A.start (fromString (show startCounter)) $ + mconcat notes >> nl + nl -- | Parse a mailto link; return Just (name, domain) or Nothing. parseMailto :: Text -> Maybe (Text, Text) @@ -618,6 +643,7 @@ toAttrs kvs = do return (keys, attrs) else return (Set.insert k keys, addAttr html5 mbEpubVersion k v attrs) addAttr html5 mbEpubVersion x y + | T.null x = id -- see #7546 | html5 = if x `Set.member` (html5Attributes <> rdfaAttributes) || T.any (== ':') x -- e.g. epub: namespace @@ -689,12 +715,11 @@ figure opts attr@(_, _, attrList) txt (s,tit) = do img <- inlineToHtml opts (Image attr alt (s,tit)) capt <- if null txt then return mempty - else tocapt `fmap` inlineListToHtml opts txt + else (nl <>) . tocapt <$> inlineListToHtml opts txt + let inner = mconcat [nl, img, capt, nl] return $ if html5 - then H5.figure $ mconcat - [nl opts, img, capt, nl opts] - else H.div ! A.class_ "figure" $ mconcat - [nl opts, img, nl opts, capt, nl opts] + then H5.figure inner + else H.div ! A.class_ "figure" $ inner adjustNumbers :: WriterOptions -> [Block] -> [Block] @@ -714,11 +739,10 @@ adjustNumbers opts doc = fixnum x = x showSecNum = T.intercalate "." . map tshow --- | Convert Pandoc block element to HTML. -blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html -blockToHtml _ Null = return mempty -blockToHtml opts (Plain lst) = inlineListToHtml opts lst -blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)]) +blockToHtmlInner :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html +blockToHtmlInner _ Null = return mempty +blockToHtmlInner opts (Plain lst) = inlineListToHtml opts lst +blockToHtmlInner opts (Para [Image attr@(_,classes,_) txt (src,tit)]) | "stretch" `elem` classes = do slideVariant <- gets stSlideVariant case slideVariant of @@ -728,20 +752,20 @@ blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)]) inlineToHtml opts (Image attr txt (src, tit)) _ -> figure opts attr txt (src, tit) -- title beginning with fig: indicates that the image is a figure -blockToHtml opts (Para [Image attr txt (s,T.stripPrefix "fig:" -> Just tit)]) = - figure opts attr txt (s,tit) -blockToHtml opts (Para lst) = do +blockToHtmlInner opts (SimpleFigure attr caption (src, title)) = + figure opts attr caption (src, title) +blockToHtmlInner opts (Para lst) = do contents <- inlineListToHtml opts lst case contents of Empty _ | not (isEnabled Ext_empty_paragraphs opts) -> return mempty _ -> return $ H.p contents -blockToHtml opts (LineBlock lns) = +blockToHtmlInner opts (LineBlock lns) = if writerWrapText opts == WrapNone then blockToHtml opts $ linesToPara lns else do htmlLines <- inlineListToHtml opts $ intercalate [LineBreak] lns return $ H.div ! A.class_ "line-block" $ htmlLines -blockToHtml opts (Div (ident, "section":dclasses, dkvs) +blockToHtmlInner opts (Div (ident, "section":dclasses, dkvs) (Header level hattr@(hident,hclasses,hkvs) ils : xs)) = do slideVariant <- gets stSlideVariant @@ -796,33 +820,33 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs) if titleSlide then do t <- addAttrs opts attr $ - secttag $ nl opts <> header' <> nl opts <> titleContents <> nl opts + secttag $ nl <> header' <> nl <> titleContents <> nl -- ensure 2D nesting for revealjs, but only for one level; -- revealjs doesn't like more than one level of nesting return $ if slideVariant == RevealJsSlides && not inSection && not (null innerSecs) - then H5.section (nl opts <> t <> nl opts <> innerContents) - else t <> nl opts <> if null innerSecs + then H5.section (nl <> t <> nl <> innerContents) + else t <> nl <> if null innerSecs then mempty - else innerContents <> nl opts + else innerContents <> nl else if writerSectionDivs opts || slide || (hident /= ident && not (T.null hident || T.null ident)) || (hclasses /= dclasses) || (hkvs /= dkvs) then addAttrs opts attr $ secttag - $ nl opts <> header' <> nl opts <> + $ nl <> header' <> nl <> if null innerSecs then mempty - else innerContents <> nl opts + else innerContents <> nl else do let attr' = (ident, classes' \\ hclasses, dkvs \\ hkvs) t <- addAttrs opts attr' header' return $ t <> if null innerSecs then mempty - else nl opts <> innerContents -blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do + else nl <> innerContents +blockToHtmlInner opts (Div attr@(ident, classes, kvs') bs) = do html5 <- gets stHtml5 slideVariant <- gets stSlideVariant let isCslBibBody = ident == "refs" || "csl-bib-body" `elem` classes @@ -859,7 +883,7 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do -- off widths! see #4028 mconcat <$> mapM (blockToHtml opts) bs' else blockListToHtml opts' bs' - let contents' = nl opts >> contents >> nl opts + let contents' = nl >> contents >> nl let (divtag, classes'') = if html5 && "section" `elem` classes' then (H5.section, filter (/= "section") classes') else (H.div, classes') @@ -876,7 +900,7 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do _ -> return mempty else addAttrs opts (ident, classes'', kvs) $ divtag contents' -blockToHtml opts (RawBlock f str) = do +blockToHtmlInner opts (RawBlock f str) = do ishtml <- isRawHtml f if ishtml then return $ preEscapedText str @@ -887,10 +911,10 @@ blockToHtml opts (RawBlock f str) = do else do report $ BlockNotRendered (RawBlock f str) return mempty -blockToHtml _ HorizontalRule = do +blockToHtmlInner _ HorizontalRule = do html5 <- gets stHtml5 return $ if html5 then H5.hr else H.hr -blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do +blockToHtmlInner opts (CodeBlock (id',classes,keyvals) rawCode) = do id'' <- if T.null id' then do modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 } @@ -922,7 +946,7 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do -- we set writerIdentifierPrefix to "" since id'' already -- includes it: addAttrs opts{writerIdentifierPrefix = ""} (id'',[],keyvals) h -blockToHtml opts (BlockQuote blocks) = do +blockToHtmlInner opts (BlockQuote blocks) = do -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; -- otherwise incremental @@ -940,11 +964,11 @@ blockToHtml opts (BlockQuote blocks) = do (DefinitionList lst) _ -> do contents <- blockListToHtml opts blocks return $ H.blockquote - $ nl opts >> contents >> nl opts + $ nl >> contents >> nl else do contents <- blockListToHtml opts blocks - return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (ident,classes,kvs) lst) = do + return $ H.blockquote $ nl >> contents >> nl +blockToHtmlInner opts (Header level (ident,classes,kvs) lst) = do contents <- inlineListToHtml opts lst let secnum = fromMaybe mempty $ lookup "number" kvs let contents' = if writerNumberSections opts && not (T.null secnum) @@ -967,12 +991,12 @@ blockToHtml opts (Header level (ident,classes,kvs) lst) = do 5 -> H.h5 contents' 6 -> H.h6 contents' _ -> H.p ! A.class_ "heading" $ contents' -blockToHtml opts (BulletList lst) = do +blockToHtmlInner opts (BulletList lst) = do contents <- mapM (listItemToHtml opts) lst let isTaskList = not (null lst) && all isTaskListItem lst (if isTaskList then (! A.class_ "task-list") else id) <$> unordList opts contents -blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do +blockToHtmlInner opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (listItemToHtml opts) lst html5 <- gets stHtml5 let numstyle' = case numstyle of @@ -995,17 +1019,47 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do else []) l <- ordList opts contents return $ foldl' (!) l attribs -blockToHtml opts (DefinitionList lst) = do +blockToHtmlInner opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- liftM H.dt $ inlineListToHtml opts term - defs' <- mapM (liftM (\x -> H.dd (x >> nl opts)) . + defs' <- mapM (liftM (\x -> H.dd (nl >> x >> nl)) . blockListToHtml opts) defs - return $ mconcat $ nl opts : term' : nl opts : - intersperse (nl opts) defs') lst + return $ mconcat $ nl : term' : nl : + intersperse (nl) defs') lst defList opts contents -blockToHtml opts (Table attr caption colspecs thead tbody tfoot) = +blockToHtmlInner opts (Table attr caption colspecs thead tbody tfoot) = tableToHtml opts (Ann.toTable attr caption colspecs thead tbody tfoot) +-- | Convert Pandoc block element to HTML. All the legwork is done by +-- 'blockToHtmlInner', this just takes care of emitting the notes after +-- the block if necessary. +blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html +blockToHtml opts block = do + -- Ignore inserted section divs -- they are not blocks as they came from + -- the document itself (at least not when coming from markdown) + let isSection = case block of + Div (_, classes, _) _ | "section" `elem` classes -> True + _ -> False + let increaseLevel = not isSection + when increaseLevel $ + modify (\st -> st{ stBlockLevel = stBlockLevel st + 1 }) + doc <- blockToHtmlInner opts block + st <- get + let emitNotes = + (writerReferenceLocation opts == EndOfBlock && stBlockLevel st == 1) || + (writerReferenceLocation opts == EndOfSection && isSection) + res <- if emitNotes + then do + notes <- if null (stNotes st) + then return mempty + else footnoteSection (writerReferenceLocation opts) (stEmittedNotes st + 1) (reverse (stNotes st)) + modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') }) + return (doc <> notes) + else return doc + when increaseLevel $ + modify (\st' -> st'{ stBlockLevel = stBlockLevel st' - 1 }) + return res + tableToHtml :: PandocMonad m => WriterOptions -> Ann.Table @@ -1017,10 +1071,10 @@ tableToHtml opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do cs <- blockListToHtml opts longCapt return $ do H.caption cs - nl opts - coltags <- colSpecListToHtml opts colspecs + nl + coltags <- colSpecListToHtml colspecs head' <- tableHeadToHtml opts thead - bodies <- intersperse (nl opts) <$> mapM (tableBodyToHtml opts) tbodies + bodies <- intersperse (nl) <$> mapM (tableBodyToHtml opts) tbodies foot' <- tableFootToHtml opts tfoot let (ident,classes,kvs) = attr -- When widths of columns are < 100%, we need to set width for the whole @@ -1037,13 +1091,13 @@ tableToHtml opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do <> "%;"):kvs) _ -> attr addAttrs opts attr' $ H.table $ do - nl opts + nl captionDoc coltags head' mconcat bodies foot' - nl opts + nl tableBodyToHtml :: PandocMonad m => WriterOptions @@ -1090,7 +1144,7 @@ tablePartToHtml opts tblpart attr rows = tablePartElement <- addAttrs opts attr $ tag' contents return $ do tablePartElement - nl opts + nl where isEmptyRow (Ann.HeaderRow _attr _rownum cells) = all isEmptyCell cells isEmptyCell (Ann.Cell _colspecs _colnum cell) = @@ -1131,14 +1185,13 @@ rowListToHtml :: PandocMonad m -> [TableRow] -> StateT WriterState m Html rowListToHtml opts rows = - (\x -> nl opts *> mconcat x) <$> + (\x -> nl *> mconcat x) <$> mapM (tableRowToHtml opts) rows colSpecListToHtml :: PandocMonad m - => WriterOptions - -> [ColSpec] + => [ColSpec] -> StateT WriterState m Html -colSpecListToHtml opts colspecs = do +colSpecListToHtml colspecs = do html5 <- gets stHtml5 let hasDefaultWidth (_, ColWidthDefault) = True hasDefaultWidth _ = False @@ -1152,16 +1205,16 @@ colSpecListToHtml opts colspecs = do ColWidth w -> if html5 then A.style (toValue $ "width: " <> percent w) else A.width (toValue $ percent w) - nl opts + nl return $ if all hasDefaultWidth colspecs then mempty else do H.colgroup $ do - nl opts + nl mapM_ (col . snd) colspecs - nl opts + nl tableRowToHtml :: PandocMonad m => WriterOptions @@ -1180,12 +1233,12 @@ tableRowToHtml opts (TableRow tblpart attr rownum rowhead rowbody) = do headcells <- mapM (cellToHtml opts HeaderCell) rowhead bodycells <- mapM (cellToHtml opts celltype) rowbody rowHtml <- addAttrs opts attr' $ H.tr $ do - nl opts + nl mconcat headcells mconcat bodycells return $ do rowHtml - nl opts + nl alignmentToString :: Alignment -> Maybe Text alignmentToString = \case @@ -1243,18 +1296,18 @@ tableCellToHtml opts ctype colAlign (Cell attr align rowspan colspan item) = do : otherAttribs return $ do tag' ! attribs $ contents - nl opts + nl -toListItems :: WriterOptions -> [Html] -> [Html] -toListItems opts items = map (toListItem opts) items ++ [nl opts] +toListItems :: [Html] -> [Html] +toListItems items = map toListItem items ++ [nl] -toListItem :: WriterOptions -> Html -> Html -toListItem opts item = nl opts *> H.li item +toListItem :: Html -> Html +toListItem item = nl *> H.li item blockListToHtml :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Html blockListToHtml opts lst = - mconcat . intersperse (nl opts) . filter nonempty + mconcat . intersperse (nl) . filter nonempty <$> mapM (blockToHtml opts) lst where nonempty (Empty _) = False nonempty _ = True @@ -1286,9 +1339,9 @@ inlineToHtml opts inline = do (Str str) -> return $ strToHtml str Space -> return $ strToHtml " " SoftBreak -> return $ case writerWrapText opts of - WrapNone -> preEscapedText " " - WrapAuto -> preEscapedText " " - WrapPreserve -> preEscapedText "\n" + WrapNone -> " " + WrapAuto -> " " + WrapPreserve -> nl LineBreak -> return $ do if html5 then H5.br else H.br strToHtml "\n" @@ -1389,7 +1442,7 @@ inlineToHtml opts inline = do InlineMath -> "\\textstyle " DisplayMath -> "\\displaystyle " return $ imtag ! A.style "vertical-align:middle" - ! A.src (toValue $ url <> T.pack (urlEncode (T.unpack $ s <> str))) + ! A.src (toValue . (url <>) . urlEncode $ s <> str) ! A.alt (toValue str) ! A.title (toValue str) ! A.class_ mathClass @@ -1424,13 +1477,17 @@ inlineToHtml opts inline = do ishtml <- isRawHtml f if ishtml then return $ preEscapedText str - else if (f == Format "latex" || f == Format "tex") && - allowsMathEnvironments (writerHTMLMathMethod opts) && - isMathEnvironment str - then inlineToHtml opts $ Math DisplayMath str - else do - report $ InlineNotRendered inline - return mempty + else do + let istex = f == Format "latex" || f == Format "tex" + let mm = writerHTMLMathMethod opts + case istex of + True + | allowsMathEnvironments mm && isMathEnvironment str + -> inlineToHtml opts $ Math DisplayMath str + | allowsRef mm && isRef str + -> inlineToHtml opts $ Math InlineMath str + _ -> do report $ InlineNotRendered inline + return mempty (Link attr txt (s,_)) | "mailto:" `T.isPrefixOf` s -> do linkText <- inlineListToHtml opts txt obfuscateLink opts attr linkText s @@ -1480,7 +1537,8 @@ inlineToHtml opts inline = do -- note: null title included, as in Markdown.pl (Note contents) -> do notes <- gets stNotes - let number = length notes + 1 + emittedNotes <- gets stEmittedNotes + let number = emittedNotes + length notes + 1 let ref = tshow number htmlContents <- blockListToNote opts ref contents epubVersion <- gets stEPUBVersion @@ -1548,7 +1606,7 @@ blockListToNote opts ref blocks = do _ | html5 -> noteItem ! customAttribute "role" "doc-endnote" _ -> noteItem - return $ nl opts >> noteItem' + return $ nl >> noteItem' inDiv :: PandocMonad m=> Text -> Html -> StateT WriterState m Html inDiv cls x = do @@ -1557,6 +1615,9 @@ inDiv cls x = do (if html5 then H5.div else H.div) x ! A.class_ (toValue cls) +isRef :: Text -> Bool +isRef t = "\\ref{" `T.isPrefixOf` t || "\\eqref{" `T.isPrefixOf` t + isMathEnvironment :: Text -> Bool isMathEnvironment s = "\\begin{" `T.isPrefixOf` s && envName `elem` mathmlenvs @@ -1591,10 +1652,15 @@ isMathEnvironment s = "\\begin{" `T.isPrefixOf` s && allowsMathEnvironments :: HTMLMathMethod -> Bool allowsMathEnvironments (MathJax _) = True +allowsMathEnvironments (KaTeX _) = True allowsMathEnvironments MathML = True allowsMathEnvironments (WebTeX _) = True allowsMathEnvironments _ = False +allowsRef :: HTMLMathMethod -> Bool +allowsRef (MathJax _) = True +allowsRef _ = False + -- | List of intrinsic event attributes allowed on all elements in HTML4. intrinsicEventsHTML4 :: [Text] intrinsicEventsHTML4 = diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 75e14714b..dfd89bc54 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -98,8 +98,7 @@ blockToHaddock opts (Plain inlines) = do contents <- inlineListToHaddock opts inlines return $ contents <> cr -- title beginning with fig: indicates figure -blockToHaddock opts (Para [Image attr alt (src,tgt)]) - | Just tit <- T.stripPrefix "fig:" tgt +blockToHaddock opts (SimpleFigure attr alt (src, tit)) = blockToHaddock opts (Para [Image attr alt (src,tit)]) blockToHaddock opts (Para inlines) = -- TODO: if it contains linebreaks, we need to use a @...@ block diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index c254fbc58..ea6009fd1 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -1,7 +1,6 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ICML @@ -309,9 +308,8 @@ blocksToICML opts style lst = do -- | Convert a Pandoc block element to ICML. blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m (Doc Text) blockToICML opts style (Plain lst) = parStyle opts style "" lst --- title beginning with fig: indicates that the image is a figure -blockToICML opts style (Para img@[Image _ txt (_,Text.stripPrefix "fig:" -> Just _)]) = do - figure <- parStyle opts (figureName:style) "" img +blockToICML opts style (SimpleFigure attr txt (src, tit)) = do + figure <- parStyle opts (figureName:style) "" [Image attr txt (src, tit)] caption <- parStyle opts (imgCaptionName:style) "" txt return $ intersperseBrs [figure, caption] blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) "" lst diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs index 2613851c5..47c6e6966 100644 --- a/src/Text/Pandoc/Writers/Ipynb.hs +++ b/src/Text/Pandoc/Writers/Ipynb.hs @@ -37,6 +37,8 @@ import qualified Data.ByteString.Lazy as BL import Data.Aeson.Encode.Pretty (Config(..), defConfig, encodePretty', keyOrder, Indent(Spaces)) import Text.DocLayout (literal) +import Text.Pandoc.UUID (getRandomUUID) +import Data.Char (isAscii, isAlphaNum) writeIpynb :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeIpynb opts d = do @@ -49,7 +51,7 @@ writeIpynb opts d = do "cell_type", "output_type", "execution_count", "metadata", "outputs", "source", - "data", "name", "text" ] } + "data", "name", "text" ] <> compare } $ notebook pandocToNotebook :: PandocMonad m @@ -79,7 +81,7 @@ pandocToNotebook opts (Pandoc meta blocks) = do let metadata = case fromJSON metadata' of Error _ -> mempty -- TODO warning here? shouldn't happen Success x -> x - cells <- extractCells opts blocks + cells <- extractCells nbformat opts blocks return $ Notebook{ notebookMetadata = metadata , notebookFormat = nbformat @@ -97,23 +99,26 @@ addAttachment (Image attr lab (src,tit)) return $ Image attr lab ("attachment:" <> src, tit) addAttachment x = return x -extractCells :: PandocMonad m => WriterOptions -> [Block] -> m [Ipynb.Cell a] -extractCells _ [] = return [] -extractCells opts (Div (_id,classes,kvs) xs : bs) +extractCells :: PandocMonad m + => (Int, Int) -> WriterOptions -> [Block] -> m [Ipynb.Cell a] +extractCells _ _ [] = return [] +extractCells nbformat opts (Div (ident,classes,kvs) xs : bs) | "cell" `elem` classes , "markdown" `elem` classes = do let meta = pairsToJSONMeta kvs (newdoc, attachments) <- runStateT (walkM addAttachment (Pandoc nullMeta xs)) mempty source <- writeMarkdown opts{ writerTemplate = Nothing } newdoc + uuid <- uuidFrom nbformat ident (Ipynb.Cell{ cellType = Markdown + , cellId = uuid , cellSource = Source $ breakLines $ T.stripEnd source , cellMetadata = meta , cellAttachments = if M.null attachments then Nothing - else Just attachments } :) - <$> extractCells opts bs + else Just $ MimeAttachments attachments } :) + <$> extractCells nbformat opts bs | "cell" `elem` classes , "code" `elem` classes = do let (codeContent, rest) = @@ -123,14 +128,16 @@ extractCells opts (Div (_id,classes,kvs) xs : bs) let meta = pairsToJSONMeta kvs outputs <- catMaybes <$> mapM blockToOutput rest let exeCount = lookup "execution_count" kvs >>= safeRead + uuid <- uuidFrom nbformat ident (Ipynb.Cell{ cellType = Ipynb.Code { codeExecutionCount = exeCount , codeOutputs = outputs } + , cellId = uuid , cellSource = Source $ breakLines codeContent , cellMetadata = meta - , cellAttachments = Nothing } :) <$> extractCells opts bs + , cellAttachments = Nothing } :) <$> extractCells nbformat opts bs | "cell" `elem` classes , "raw" `elem` classes = case consolidateAdjacentRawBlocks xs of @@ -138,38 +145,66 @@ extractCells opts (Div (_id,classes,kvs) xs : bs) let format' = case T.toLower f of "html" -> "text/html" + "html4" -> "text/html" + "html5" -> "text/html" + "s5" -> "text/html" + "slidy" -> "text/html" + "slideous" -> "text/html" + "dzslides" -> "text/html" "revealjs" -> "text/html" "latex" -> "text/latex" "markdown" -> "text/markdown" - "rst" -> "text/x-rst" + "rst" -> "text/restructuredtext" + "asciidoc" -> "text/asciidoc" _ -> f + uuid <- uuidFrom nbformat ident (Ipynb.Cell{ cellType = Raw + , cellId = uuid , cellSource = Source $ breakLines raw , cellMetadata = if format' == "ipynb" -- means no format given then mempty - else M.insert "format" + else JSONMeta $ M.insert "raw_mimetype" (Aeson.String format') mempty - , cellAttachments = Nothing } :) <$> extractCells opts bs - _ -> extractCells opts bs -extractCells opts (CodeBlock (_id,classes,kvs) raw : bs) + , cellAttachments = Nothing } :) <$> extractCells nbformat opts bs + _ -> extractCells nbformat opts bs +extractCells nbformat opts (CodeBlock (ident,classes,kvs) raw : bs) | "code" `elem` classes = do let meta = pairsToJSONMeta kvs let exeCount = lookup "execution_count" kvs >>= safeRead + uuid <- uuidFrom nbformat ident (Ipynb.Cell{ cellType = Ipynb.Code { codeExecutionCount = exeCount , codeOutputs = [] } + , cellId = uuid , cellSource = Source $ breakLines raw , cellMetadata = meta - , cellAttachments = Nothing } :) <$> extractCells opts bs -extractCells opts (b:bs) = do + , cellAttachments = Nothing } :) <$> extractCells nbformat opts bs +extractCells nbformat opts (b:bs) = do let isCodeOrDiv (CodeBlock (_,cl,_) _) = "code" `elem` cl isCodeOrDiv (Div (_,cl,_) _) = "cell" `elem` cl isCodeOrDiv _ = False let (mds, rest) = break isCodeOrDiv bs - extractCells opts (Div ("",["cell","markdown"],[]) (b:mds) : rest) + extractCells nbformat opts + (Div ("",["cell","markdown"],[]) (b:mds) : rest) + +-- Return Nothing if nbformat < 4.5. +-- Otherwise construct a UUID, using the existing identifier +-- if it is a valid UUID, otherwise constructing a new one. +uuidFrom :: PandocMonad m => (Int, Int) -> Text -> m (Maybe Text) +uuidFrom nbformat ident = + if nbformat >= (4,5) + then + if isValidUUID ident + then return $ Just ident + else Just . T.pack . drop 9 . show <$> getRandomUUID + else return Nothing + where + isValidUUID t = not (T.null t) && T.length t <= 64 && + T.all isValidUUIDChar t + isValidUUIDChar c = isAscii c && (isAlphaNum c || c == '-' || c == '_') blockToOutput :: PandocMonad m => Block -> m (Maybe (Output a)) blockToOutput (Div (_,["output","stream",sname],_) (CodeBlock _ t:_)) = @@ -218,11 +253,13 @@ extractData bs = do return (M.insert "text/html" (TextualData raw) mmap, meta) go (mmap, meta) (RawBlock (Format "latex") raw) = return (M.insert "text/latex" (TextualData raw) mmap, meta) + go (mmap, meta) (RawBlock (Format "markdown") raw) = + return (M.insert "text/markdown" (TextualData raw) mmap, meta) go (mmap, meta) (Div _ bs') = foldM go (mmap, meta) bs' go (mmap, meta) b = (mmap, meta) <$ report (BlockNotRendered b) pairsToJSONMeta :: [(Text, Text)] -> JSONMeta -pairsToJSONMeta kvs = +pairsToJSONMeta kvs = JSONMeta $ M.fromList [(k, case Aeson.decode (UTF8.fromTextLazy $ TL.fromStrict v) of Just val -> val Nothing -> String v) diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 9db8723d1..799fe29fa 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -291,9 +291,7 @@ blockToJATS opts (Header _ _ title) = do return $ inTagsSimple "title" title' -- No Plain, everything needs to be in a block-level tag blockToJATS opts (Plain lst) = blockToJATS opts (Para lst) --- title beginning with fig: indicates that the image is a figure -blockToJATS opts (Para [Image (ident,_,kvs) txt - (src,T.stripPrefix "fig:" -> Just tit)]) = do +blockToJATS opts (SimpleFigure (ident, _, kvs) txt (src, tit)) = do alt <- inlinesToJATS opts txt let (maintype, subtype) = imageMimeType src kvs let capt = if null txt @@ -553,6 +551,7 @@ inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do return $ selfClosingTag "inline-graphic" attr isParaOrList :: Block -> Bool +isParaOrList SimpleFigure{} = False -- implicit figures are not paragraphs isParaOrList Para{} = True isParaOrList Plain{} = True isParaOrList BulletList{} = True diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs index 5b19fd034..b00875a7c 100644 --- a/src/Text/Pandoc/Writers/JATS/References.hs +++ b/src/Text/Pandoc/Writers/JATS/References.hs @@ -70,6 +70,7 @@ referenceToJATS _opts ref = do , "pages" `varInTag` "page-range" , "ISBN" `varInTag` "isbn" , "ISSN" `varInTag` "issn" + , "URL" `varInTag` "uri" , varInTagWith "doi" "pub-id" [("pub-id-type", "doi")] , varInTagWith "pmid" "pub-id" [("pub-id-type", "pmid")] ] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 063e347fb..f8847aa08 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -21,15 +21,13 @@ module Text.Pandoc.Writers.LaTeX ( ) where import Control.Monad.State.Strict import Data.Char (isDigit) -import Data.List (intersperse, nubBy, (\\)) +import Data.List (intersperse, (\\)) import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) -import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) -import Text.DocTemplates (FromContext(lookupContext), renderTemplate, - Val(..), Context(..)) -import Text.Collate.Lang (Lang (..), renderLang) +import Text.DocTemplates (FromContext(lookupContext), renderTemplate) +import Text.Collate.Lang (renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, @@ -46,7 +44,7 @@ import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX) import Text.Pandoc.Writers.LaTeX.Citation (citationsToNatbib, citationsToBiblatex) import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState) -import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossia, toBabel) +import Text.Pandoc.Writers.LaTeX.Lang (toBabel) import Text.Pandoc.Writers.LaTeX.Util (stringToLaTeX, StringContext(..), toLabel, inCmd, wrapDiv, hypertarget, labelFor, @@ -132,12 +130,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do ,("tmargin","margin-top") ,("bmargin","margin-bottom") ] - let toPolyObj :: Lang -> Val Text - toPolyObj lang = MapVal $ Context $ - M.fromList [ ("name" , SimpleVal $ literal name) - , ("options" , SimpleVal $ literal opts) ] - where - (name, opts) = toPolyglossia lang mblang <- toLang $ case getLang options meta of Just l -> Just l Nothing | null docLangs -> Nothing @@ -216,36 +208,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do (literal $ toBabel l)) mblang $ defField "babel-otherlangs" (map (literal . toBabel) docLangs) - $ defField "babel-newcommands" (vcat $ - map (\(poly, babel) -> literal $ - -- \textspanish and \textgalician are already used by babel - -- save them as \oritext... and let babel use that - if poly `elem` ["spanish", "galician"] - then "\\let\\oritext" <> poly <> "\\text" <> poly <> "\n" <> - "\\AddBabelHook{" <> poly <> "}{beforeextras}" <> - "{\\renewcommand{\\text" <> poly <> "}{\\oritext" - <> poly <> "}}\n" <> - "\\AddBabelHook{" <> poly <> "}{afterextras}" <> - "{\\renewcommand{\\text" <> poly <> "}[2][]{\\foreignlanguage{" - <> poly <> "}{##2}}}" - else (if poly == "latin" -- see #4161 - then "\\providecommand{\\textlatin}{}\n\\renewcommand" - else "\\newcommand") <> "{\\text" <> poly <> - "}[2][]{\\foreignlanguage{" <> babel <> "}{#2}}\n" <> - "\\newenvironment{" <> poly <> - "}[2][]{\\begin{otherlanguage}{" <> - babel <> "}}{\\end{otherlanguage}}" - ) - -- eliminate duplicates that have same polyglossia name - $ nubBy (\a b -> fst a == fst b) - -- find polyglossia and babel names of languages used in the document - $ map (\l -> (fst $ toPolyglossia l, toBabel l)) docLangs - ) - $ maybe id (defField "polyglossia-lang" . toPolyObj) mblang - $ defField "polyglossia-otherlangs" - (ListVal (map toPolyObj docLangs :: [Val Text])) - $ - defField "latex-dir-rtl" + $ defField "latex-dir-rtl" ((render Nothing <$> getField "dir" context) == Just ("rtl" :: Text)) context return $ render colwidth $ @@ -383,10 +346,7 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do wrapNotes <$> wrapDiv (identifier,classes,kvs) result blockToLaTeX (Plain lst) = inlineListToLaTeX lst --- title beginning with fig: indicates that the image is a figure -blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,tgt)]) - | Just tit <- T.stripPrefix "fig:" tgt - = do +blockToLaTeX (SimpleFigure attr@(ident, _, _) txt (src, tit)) = do (capt, captForLof, footnotes) <- getCaption inlineListToLaTeX True txt lab <- labelFor ident let caption = "\\caption" <> captForLof <> braces capt <> lab @@ -429,6 +389,7 @@ blockToLaTeX (BlockQuote lst) = do blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do opts <- gets stOptions lab <- labelFor identifier + inNote <- stInNote <$> get linkAnchor' <- hypertarget True identifier lab let linkAnchor = if isEmpty linkAnchor' then empty @@ -438,8 +399,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do return $ flush (linkAnchor $$ "\\begin{code}" $$ literal str $$ "\\end{code}") $$ cr let rawCodeBlock = do - st <- get - env <- if stInNote st + env <- if inNote then modify (\s -> s{ stVerbInNote = True }) >> return "Verbatim" else return "verbatim" @@ -475,14 +435,13 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do "\\end{lstlisting}") $$ cr let highlightedCodeBlock = case highlight (writerSyntaxMap opts) - formatLaTeXBlock ("",classes,keyvalAttr) str of + formatLaTeXBlock ("",classes ++ ["default"],keyvalAttr) str of Left msg -> do unless (T.null msg) $ report $ CouldNotHighlight msg rawCodeBlock Right h -> do - st <- get - when (stInNote st) $ modify (\s -> s{ stVerbInNote = True }) + when inNote $ modify (\s -> s{ stVerbInNote = True }) modify (\s -> s{ stHighlighting = True }) return (flush $ linkAnchor $$ text (T.unpack h)) case () of @@ -491,6 +450,12 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do | writerListings opts -> listingsCodeBlock | not (null classes) && isJust (writerHighlightStyle opts) -> highlightedCodeBlock + -- we don't want to use \begin{verbatim} if our code + -- contains \end{verbatim}: + | inNote + , "\\end{Verbatim}" `T.isInfixOf` str -> highlightedCodeBlock + | not inNote + , "\\end{verbatim}" `T.isInfixOf` str -> highlightedCodeBlock | otherwise -> rawCodeBlock blockToLaTeX b@(RawBlock f x) = do beamer <- gets stBeamer @@ -766,9 +731,8 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do kvToCmd _ = Nothing langCmds = case lang of - Just lng -> let (l, o) = toPolyglossia lng - ops = if T.null o then "" else "[" <> o <> "]" - in ["text" <> l <> ops] + Just lng -> let l = toBabel lng + in ["foreignlanguage{" <> l <> "}"] Nothing -> [] let cmds = mapMaybe classToCmd classes ++ mapMaybe kvToCmd kvs ++ langCmds contents <- inlineListToLaTeX ils @@ -786,7 +750,9 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do then braces contents else foldr inCmd contents cmds) inlineToLaTeX (Emph lst) = inCmd "emph" <$> inlineListToLaTeX lst -inlineToLaTeX (Underline lst) = inCmd "underline" <$> inlineListToLaTeX lst +inlineToLaTeX (Underline lst) = do + modify $ \st -> st{ stStrikeout = True } -- this gives us the ulem package + inCmd "uline" <$> inlineListToLaTeX lst inlineToLaTeX (Strong lst) = inCmd "textbf" <$> inlineListToLaTeX lst inlineToLaTeX (Strikeout lst) = do -- we need to protect VERB in an mbox or we get an error diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs index 0ba68b74e..3fdbdc5af 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -10,61 +10,12 @@ Portability : portable -} module Text.Pandoc.Writers.LaTeX.Lang - ( toPolyglossiaEnv, - toPolyglossia, - toBabel + ( toBabel ) where import Data.Text (Text) import Text.Collate.Lang (Lang(..)) --- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: Lang -> (Text, Text) -toPolyglossiaEnv l = - case toPolyglossia l of - ("arabic", o) -> ("Arabic", o) - x -> x - --- Takes a list of the constituents of a BCP47 language code and --- converts it to a Polyglossia (language, options) tuple --- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: Lang -> (Text, Text) -toPolyglossia (Lang "ar" _ (Just "DZ") _ _ _) = ("arabic", "locale=algeria") -toPolyglossia (Lang "ar" _ (Just "IQ") _ _ _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ (Just "JO") _ _ _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ (Just "LB") _ _ _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ (Just "LY") _ _ _) = ("arabic", "locale=libya") -toPolyglossia (Lang "ar" _ (Just "MA") _ _ _) = ("arabic", "locale=morocco") -toPolyglossia (Lang "ar" _ (Just "MR") _ _ _) = ("arabic", "locale=mauritania") -toPolyglossia (Lang "ar" _ (Just "PS") _ _ _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ (Just "SY") _ _ _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ (Just "TN") _ _ _) = ("arabic", "locale=tunisia") -toPolyglossia (Lang "de" _ _ vars _ _) - | "1901" `elem` vars = ("german", "spelling=old") -toPolyglossia (Lang "de" _ (Just "AT") vars _ _) - | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") -toPolyglossia (Lang "de" _ (Just "AT") _ _ _) = ("german", "variant=austrian") -toPolyglossia (Lang "de" _ (Just "CH") vars _ _) - | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") -toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss") -toPolyglossia (Lang "de" _ _ _ _ _) = ("german", "") -toPolyglossia (Lang "dsb" _ _ _ _ _) = ("lsorbian", "") -toPolyglossia (Lang "el" _ _ vars _ _) - | "polyton" `elem` vars = ("greek", "variant=poly") -toPolyglossia (Lang "en" _ (Just "AU") _ _ _) = ("english", "variant=australian") -toPolyglossia (Lang "en" _ (Just "CA") _ _ _) = ("english", "variant=canadian") -toPolyglossia (Lang "en" _ (Just "GB") _ _ _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ (Just "NZ") _ _ _) = ("english", "variant=newzealand") -toPolyglossia (Lang "en" _ (Just "UK") _ _ _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ (Just "US") _ _ _) = ("english", "variant=american") -toPolyglossia (Lang "grc" _ _ _ _ _) = ("greek", "variant=ancient") -toPolyglossia (Lang "hsb" _ _ _ _ _) = ("usorbian", "") -toPolyglossia (Lang "la" _ _ vars _ _) - | "x-classic" `elem` vars = ("latin", "variant=classic") -toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian") -toPolyglossia (Lang "sl" _ _ _ _ _) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") - -- Takes a list of the constituents of a BCP47 language code and -- converts it to a Babel language string. -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf @@ -92,7 +43,7 @@ toBabel (Lang "en" _ (Just "US") _ _ _) = "american" toBabel (Lang "fr" _ (Just "CA") _ _ _) = "canadien" toBabel (Lang "fra" _ _ vars _ _) | "aca" `elem` vars = "acadian" -toBabel (Lang "grc" _ _ _ _ _) = "polutonikogreek" +toBabel (Lang "grc" _ _ _ _ _) = "ancientgreek" toBabel (Lang "hsb" _ _ _ _ _) = "uppersorbian" toBabel (Lang "la" _ _ vars _ _) | "x-classic" `elem` vars = "classiclatin" diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs index 27a8a0257..9471c171c 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -102,7 +102,7 @@ colDescriptors (Ann.Table _attr _caption specs thead tbodies tfoot) = toColDescriptor :: Int -> Alignment -> Double -> Text toColDescriptor numcols align width = T.pack $ printf - ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}" + ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.4f}}" (T.unpack (alignCommand align)) ((numcols - 1) * 2) width diff --git a/src/Text/Pandoc/Writers/LaTeX/Util.hs b/src/Text/Pandoc/Writers/LaTeX/Util.hs index c34338121..916ca1a99 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Util.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Util.hs @@ -26,7 +26,7 @@ import Control.Monad (when) import Text.Pandoc.Class (PandocMonad, toLang) import Text.Pandoc.Options (WriterOptions(..), isEnabled) import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState(..)) -import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv) +import Text.Pandoc.Writers.LaTeX.Lang (toBabel) import Text.Pandoc.Highlighting (toListingsLanguage) import Text.DocLayout import Text.Pandoc.Definition @@ -124,7 +124,7 @@ stringToLaTeX context zs = do '\160' -> emits "~" '\x200B' -> emits "\\hspace{0pt}" -- zero-width space '\x202F' -> emits "\\," - '\x2026' -> emitcseq "\\ldots" + '\x2026' | ligatures -> emitcseq "\\ldots" '\x2018' | ligatures -> emitquote "`" '\x2019' | ligatures -> emitquote "'" '\x201C' | ligatures -> emitquote "``" @@ -238,13 +238,11 @@ wrapDiv (_,classes,kvs) t = do Just "ltr" -> align "LTR" _ -> id wrapLang txt = case lang of - Just lng -> let (l, o) = toPolyglossiaEnv lng - ops = if T.null o - then "" - else brackets $ literal o - in inCmd "begin" (literal l) <> ops + Just lng -> let l = toBabel lng + in inCmd "begin" "otherlanguage" + <> (braces (literal l)) $$ blankline <> txt <> blankline - $$ inCmd "end" (literal l) + $$ inCmd "end" "otherlanguage" Nothing -> txt return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 87b2d8d21..8a34bf47f 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -109,11 +109,10 @@ blockToMan :: PandocMonad m blockToMan _ Null = return empty blockToMan opts (Div _ bs) = blockListToMan opts bs blockToMan opts (Plain inlines) = - liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines + splitSentences <$> inlineListToMan opts inlines blockToMan opts (Para inlines) = do - contents <- liftM vcat $ mapM (inlineListToMan opts) $ - splitSentences inlines - return $ text ".PP" $$ contents + contents <- inlineListToMan opts inlines + return $ text ".PP" $$ splitSentences contents blockToMan opts (LineBlock lns) = blockToMan opts $ linesToPara lns blockToMan _ b@(RawBlock f str) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index fda2bbcef..bb68d9fee 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -2,7 +2,6 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Markdown Copyright : Copyright (C) 2006-2021 John MacFarlane @@ -19,6 +18,7 @@ Markdown: <https://daringfireball.net/projects/markdown/> module Text.Pandoc.Writers.Markdown ( writeMarkdown, writeCommonMark, + writeMarkua, writePlain) where import Control.Monad.Reader import Control.Monad.State.Strict @@ -43,7 +43,10 @@ import Text.Pandoc.Templates (renderTemplate) import Text.DocTemplates (Val(..), Context(..), FromContext(..)) import Text.Pandoc.Walk import Text.Pandoc.Writers.HTML (writeHtml5String) -import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown, linkAttributes, attrsToMarkdown) +import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown, + linkAttributes, + attrsToMarkdown, + attrsToMarkua) import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..), WriterState(..), WriterEnv(..), @@ -78,6 +81,26 @@ writeCommonMark opts document = enableExtension Ext_intraword_underscores $ writerExtensions opts } +-- | Convert Pandoc to Markua. +writeMarkua :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeMarkua opts document = + evalMD (pandocToMarkdown opts' document) def{ envVariant = Markua } def + where + opts' = opts{ writerExtensions = + enableExtension Ext_hard_line_breaks $ + enableExtension Ext_pipe_tables $ + -- required for fancy list enumerators + enableExtension Ext_fancy_lists $ + enableExtension Ext_startnum $ + enableExtension Ext_strikeout $ + enableExtension Ext_subscript $ + enableExtension Ext_superscript $ + enableExtension Ext_definition_lists $ + enableExtension Ext_smart $ + enableExtension Ext_footnotes + mempty } + + pandocTitleBlock :: Doc Text -> [Doc Text] -> Doc Text -> Doc Text pandocTitleBlock tit auths dat = hang 2 (text "% ") tit <> cr <> @@ -141,10 +164,20 @@ valToYaml (SimpleVal x) | otherwise = if hasNewlines x then hang 0 ("|" <> cr) x - else if isNothing $ foldM needsDoubleQuotes True x - then "\"" <> fmap escapeInDoubleQuotes x <> "\"" - else x + else case x of + Text _ t | isSpecialString t -> + "\"" <> fmap escapeInDoubleQuotes x <> "\"" + _ | isNothing (foldM needsDoubleQuotes True x) -> + "\"" <> fmap escapeInDoubleQuotes x <> "\"" + | otherwise -> x where + isSpecialString t = Set.member t specialStrings + specialStrings = Set.fromList + ["y", "Y", "yes", "Yes", "YES", "n", "N", + "no", "No", "NO", "true", "True", "TRUE", + "false", "False", "FALSE", "on", "On", "ON", + "off", "Off", "OFF", "null", "Null", + "NULL", "~", "*"] needsDoubleQuotes isFirst t = if T.any isBadAnywhere t || (isFirst && T.any isYamlPunct (T.take 1 t)) @@ -318,8 +351,15 @@ blockToMarkdown' opts (Div attrs ils) = do contents <- blockListToMarkdown opts ils variant <- asks envVariant return $ - case () of - _ | isEnabled Ext_fenced_divs opts && + case () of + _ | variant == Markua -> + case () of + () | "blurb" `elem` classes' -> prefixed "B> " contents <> blankline + | "aside" `elem` classes' -> prefixed "A> " contents <> blankline + -- | necessary to enable option to create a bibliography + | (take 3 (T.unpack id')) == "ref" -> contents <> blankline + | otherwise -> contents <> blankline + | isEnabled Ext_fenced_divs opts && attrs /= nullAttr -> let attrsToMd = if variant == Commonmark then attrsToMarkdown @@ -365,14 +405,13 @@ blockToMarkdown' opts (Plain inlines) = do _ -> inlines contents <- inlineListToMarkdown opts inlines' return $ contents <> cr --- title beginning with fig: indicates figure -blockToMarkdown' opts (Para [Image attr alt (src,tgt@(T.stripPrefix "fig:" -> Just tit))]) +blockToMarkdown' opts (SimpleFigure attr alt (src, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && attr /= nullAttr = -- use raw HTML (<> blankline) . literal . T.strip <$> writeHtml5String opts{ writerTemplate = Nothing } - (Pandoc nullMeta [Para [Image attr alt (src,tgt)]]) + (Pandoc nullMeta [SimpleFigure attr alt (src, tit)]) | otherwise = blockToMarkdown opts (Para [Image attr alt (src,tit)]) blockToMarkdown' opts (Para inlines) = (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) @@ -391,7 +430,8 @@ blockToMarkdown' opts b@(RawBlock f str) = do (literal "```" <> literal "\n") let renderEmpty = mempty <$ report (BlockNotRendered b) case variant of - PlainText -> renderEmpty + PlainText + | f == "plain" -> return $ literal str <> literal "\n" Commonmark | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"] -> return $ literal str <> literal "\n" @@ -399,6 +439,7 @@ blockToMarkdown' opts b@(RawBlock f str) = do | f `elem` ["markdown", "markdown_github", "markdown_phpextra", "markdown_mmd", "markdown_strict"] -> return $ literal str <> literal "\n" + Markua -> renderEmpty _ | isEnabled Ext_raw_attribute opts -> rawAttribBlock | f `elem` ["html", "html5", "html4"] , isEnabled Ext_markdown_attribute opts @@ -410,17 +451,19 @@ blockToMarkdown' opts b@(RawBlock f str) = do , isEnabled Ext_raw_tex opts -> return $ literal str <> literal "\n" _ -> renderEmpty -blockToMarkdown' opts HorizontalRule = - return $ blankline <> literal (T.replicate (writerColumns opts) "-") <> blankline +blockToMarkdown' opts HorizontalRule = do + variant <- asks envVariant + let indicator = case variant of + Markua -> "* * *" + _ -> T.replicate (writerColumns opts) "-" + return $ blankline <> literal indicator <> blankline blockToMarkdown' opts (Header level attr inlines) = do - -- first, if we're putting references at the end of a section, we -- put them here. blkLevel <- asks envBlockLevel refs <- if writerReferenceLocation opts == EndOfSection && blkLevel == 1 then notesAndRefs opts else return empty - variant <- asks envVariant -- we calculate the id that would be used by auto_identifiers -- so we know whether to print an explicit identifier @@ -433,7 +476,8 @@ blockToMarkdown' opts (Header level attr inlines) = do && id' == autoId -> empty (id',_,_) | isEnabled Ext_mmd_header_identifiers opts -> space <> brackets (literal id') - _ | isEnabled Ext_header_attributes opts || + _ | variant == Markua -> attrsToMarkua attr + | isEnabled Ext_header_attributes opts || isEnabled Ext_attributes opts -> space <> attrsToMarkdown attr | otherwise -> empty @@ -467,6 +511,8 @@ blockToMarkdown' opts (Header level attr inlines) = do -- ghc interprets '#' characters in column 1 as linenum specifiers. _ | variant == PlainText || isEnabled Ext_literate_haskell opts -> contents <> blankline + _ | variant == Markua -> attr' <> cr <> literal (T.replicate level "#") + <> space <> contents <> blankline _ -> literal (T.replicate level "#") <> space <> contents <> attr' <> blankline return $ refs <> hdr @@ -483,9 +529,11 @@ blockToMarkdown' opts (CodeBlock attribs str) = do backticks <> attrs <> cr <> literal str <> cr <> backticks <> blankline | isEnabled Ext_fenced_code_blocks opts -> tildes <> attrs <> cr <> literal str <> cr <> tildes <> blankline - _ -> nest (writerTabStop opts) (literal str) <> blankline + _ | variant == Markua -> blankline <> attrsToMarkua attribs <> cr <> backticks <> cr <> + literal str <> cr <> backticks <> cr <> blankline + | otherwise -> nest (writerTabStop opts) (literal str) <> blankline where - endlineLen c = maybe 3 ((+1) . maximum) $ nonEmpty $ + endlineLen c = maybe 3 ((+1) . maximum) $ nonEmpty [T.length ln | ln <- map trim (T.lines str) , T.pack [c,c,c] `T.isPrefixOf` ln @@ -572,19 +620,29 @@ blockToMarkdown' opts t@(Table _ blkCapt specs thead tbody tfoot) = do return $ nst (tbl $$ caption'') $$ blankline blockToMarkdown' opts (BulletList items) = do contents <- inList $ mapM (bulletListItemToMarkdown opts) items - return $ (if isTightList items then vcat else vsep) contents <> blankline + return $ (if isTightList items then vcat else vsep) + contents <> blankline blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do variant <- asks envVariant let start' = if variant == Commonmark || isEnabled Ext_startnum opts then start else 1 let sty' = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle - let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim + let delim' | isEnabled Ext_fancy_lists opts = + case variant of + -- Markua supports 'fancy' enumerators, but no TwoParens + Markua -> if delim == TwoParens then OneParen else delim + _ -> delim + | variant == Commonmark && --commonmark only supports one paren + (delim == OneParen || delim == TwoParens) = OneParen + | otherwise = DefaultDelim let attribs = (start', sty', delim') let markers = orderedListMarkers attribs - let markers' = map (\m -> if T.length m < 3 - then m <> T.replicate (3 - T.length m) " " - else m) markers + let markers' = case variant of + Markua -> markers + _ -> map (\m -> if T.length m < 3 + then m <> T.replicate (3 - T.length m) " " + else m) markers contents <- inList $ zipWithM (orderedListItemToMarkdown opts) markers' items return $ (if isTightList items then vcat else vsep) contents <> blankline @@ -698,10 +756,13 @@ itemEndsWithTightList bs = -- | Convert bullet list item (list of blocks) to markdown. bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m (Doc Text) bulletListItemToMarkdown opts bs = do + variant <- asks envVariant let exts = writerExtensions opts contents <- blockListToMarkdown opts $ taskListItemToAscii exts bs let sps = T.replicate (writerTabStop opts - 2) " " - let start = literal $ "- " <> sps + let start = case variant of + Markua -> literal "* " + _ -> literal $ "- " <> sps -- remove trailing blank line if item ends with a tight list let contents' = if itemEndsWithTightList bs then chomp contents <> cr @@ -711,19 +772,22 @@ bulletListItemToMarkdown opts bs = do -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: PandocMonad m => WriterOptions -- ^ options - -> Text -- ^ list item marker + -> Text -- ^ list item marker -> [Block] -- ^ list item (list of blocks) -> MD m (Doc Text) orderedListItemToMarkdown opts marker bs = do let exts = writerExtensions opts contents <- blockListToMarkdown opts $ taskListItemToAscii exts bs + variant <- asks envVariant let sps = case writerTabStop opts - T.length marker of n | n > 0 -> literal $ T.replicate n " " _ -> literal " " let ind = if isEnabled Ext_four_space_rule opts then writerTabStop opts else max (writerTabStop opts) (T.length marker + 1) - let start = literal marker <> sps + let start = case variant of + Markua -> literal marker <> " " + _ -> literal marker <> sps -- remove trailing blank line if item ends with a tight list let contents' = if itemEndsWithTightList bs then chomp contents <> cr @@ -742,7 +806,10 @@ definitionListItemToMarkdown opts (label, defs) = do then do let tabStop = writerTabStop opts variant <- asks envVariant - let leader = if variant == PlainText then " " else ": " + let leader = case variant of + PlainText -> " " + Markua -> ":" + _ -> ": " let sps = case writerTabStop opts - 3 of n | n > 0 -> literal $ T.replicate n " " _ -> literal " " @@ -813,6 +880,7 @@ blockListToMarkdown opts blocks = do isListBlock _ = False commentSep | variant == PlainText = Null + | variant == Markua = Null | isEnabled Ext_raw_html opts = RawBlock "html" "<!-- -->\n" | otherwise = RawBlock "markdown" " \n" mconcat <$> mapM (blockToMarkdown opts) (fixBlocks blocks) diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index cd5f5b896..0bf70e80e 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -13,7 +13,8 @@ module Text.Pandoc.Writers.Markdown.Inline ( inlineListToMarkdown, linkAttributes, - attrsToMarkdown + attrsToMarkdown, + attrsToMarkua ) where import Control.Monad.Reader import Control.Monad.State.Strict @@ -24,7 +25,6 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T -import Network.HTTP (urlEncode) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging @@ -32,6 +32,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.Network.HTTP (urlEncode) import Text.Pandoc.Writers.Shared import Text.Pandoc.Walk import Text.Pandoc.Writers.HTML (writeHtml5String) @@ -44,32 +45,35 @@ import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..), -- | Escape special characters for Markdown. escapeText :: WriterOptions -> Text -> Text -escapeText opts = T.pack . go . T.unpack +escapeText opts = T.pack . go' . T.unpack where startsWithSpace (' ':_) = True startsWithSpace ('\t':_) = True startsWithSpace [] = True startsWithSpace _ = False + go' ('#':cs) + | isEnabled Ext_space_in_atx_header opts + = if startsWithSpace (dropWhile (=='#') cs) + then '\\':'#':go cs + else '#':go cs + | otherwise = '\\':'#':go cs + go' ('@':cs) + | isEnabled Ext_citations opts = + case cs of + (d:_) + | isAlphaNum d || d == '_' || d == '{' + -> '\\':'@':go cs + _ -> '@':go cs + go' cs = go cs go [] = [] go (c:cs) = case c of - '<' | isEnabled Ext_all_symbols_escapable opts -> - '\\' : '<' : go cs - | otherwise -> "<" ++ go cs - '>' | isEnabled Ext_all_symbols_escapable opts -> - '\\' : '>' : go cs - | otherwise -> ">" ++ go cs - '@' | isEnabled Ext_citations opts -> - case cs of - (d:_) - | isAlphaNum d || d == '_' || d == '{' - -> '\\':'@':go cs - _ -> '@':go cs - '#' | isEnabled Ext_space_in_atx_header opts - , startsWithSpace cs - -> '\\':'#':go cs _ | c `elem` ['\\','`','*','_','[',']'] -> '\\':c:go cs + '>' | isEnabled Ext_all_symbols_escapable opts -> '\\':'>':go cs + | otherwise -> ">" ++ go cs + '<' | isEnabled Ext_all_symbols_escapable opts -> '\\':'<':go cs + | otherwise -> "<" ++ go cs '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs '~' | isEnabled Ext_subscript opts || @@ -90,10 +94,13 @@ escapeText opts = T.pack . go . T.unpack | isEnabled Ext_intraword_underscores opts , isAlphaNum c , isAlphaNum x -> c : '_' : x : go xs - '#':xs -> c : '#' : go xs - '>':xs -> c : '>' : go xs _ -> c : go cs +-- Escape the escape character, as well as formatting pairs +escapeMarkuaString :: Text -> Text +escapeMarkuaString s = foldr (uncurry T.replace) s [("--","~-~-"), + ("**","~*~*"),("//","~/~/"),("^^","~^~^"),(",,","~,~,")] + attrsToMarkdown :: Attr -> Doc Text attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] where attribId = case attribs of @@ -115,9 +122,56 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] escAttrChar '\\' = literal "\\\\" escAttrChar c = literal $ T.singleton c +attrsToMarkua:: Attr -> Doc Text +attrsToMarkua attributes + | null list = empty + | otherwise = braces $ intercalateDocText list + where attrId = case attributes of + ("",_,_) -> [] + (i,_,_) -> [literal $ "id: " <> i] + -- all non explicit (key,value) attributes besides id are getting + -- a default class key to be Markua conform + attrClasses = case attributes of + (_,[],_) -> [] + (_,classes,_) -> map (escAttr . ("class: " <>)) + classes + attrKeyValues = case attributes of + (_,_,[]) -> [] + (_,_,keyvalues) -> map ((\(k,v) -> escAttr k + <> ": " <> escAttr v) . + preprocessKeyValues) keyvalues + escAttr = mconcat . map escAttrChar . T.unpack + escAttrChar '"' = literal "\"" + escAttrChar c = literal $ T.singleton c + + list = concat [attrId, attrClasses, attrKeyValues] + + -- if attribute key is alt, caption, title then content + -- gets wrapped inside quotes + -- attribute gets removed + preprocessKeyValues :: (Text, Text) -> (Text, Text) + preprocessKeyValues (key,value) + | key == "alt" || + key == "caption" || + key == "title" = (key, inquotes value) + | otherwise = (key,value) + intercalateDocText :: [Doc Text] -> Doc Text + intercalateDocText [] = empty + intercalateDocText [x] = x + intercalateDocText (x:xs) = x <> ", " <> (intercalateDocText xs) + +-- | Add a (key, value) pair to Pandoc attr type +addKeyValueToAttr :: Attr -> (Text,Text) -> Attr +addKeyValueToAttr (ident,classes,kvs) (key,value) + | not (T.null key) && not (T.null value) = (ident, + classes, + (key,value): kvs) + | otherwise = (ident,classes,kvs) + linkAttributes :: WriterOptions -> Attr -> Doc Text linkAttributes opts attr = - if (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && attr /= nullAttr + if (isEnabled Ext_link_attributes opts || + isEnabled Ext_attributes opts) && attr /= nullAttr then attrsToMarkdown attr else empty @@ -190,11 +244,13 @@ getReference attr label target = do (stKeys s) }) return lab' + + -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m (Doc Text) -inlineListToMarkdown opts lst = do - inlist <- asks envInList - go (if inlist then avoidBadWrapsInList lst else lst) +inlineListToMarkdown opts ils = do + inlist <- asks envInList + avoidBadWraps inlist <$> go ils where go [] = return empty go (x@Math{}:y@(Str t):zs) | T.all isDigit (T.take 1 t) -- starts with digit -- see #7058 @@ -235,26 +291,25 @@ inlineListToMarkdown opts lst = do fmap (iMark <>) (go is) thead = fmap fst . T.uncons -isSp :: Inline -> Bool -isSp Space = True -isSp SoftBreak = True -isSp _ = False +-- Remove breaking spaces that might cause bad wraps. +avoidBadWraps :: Bool -> Doc Text -> Doc Text +avoidBadWraps inListItem = go . toList + where + go [] = mempty + go (BreakingSpace : Text len t : BreakingSpace : xs) + = case T.uncons t of + Just (c,t') + | c == '>' + || ((c == '-' || c == '*' || c == '+') && T.null t') + || (inListItem && isOrderedListMarker t) + || (t == "1." || t == "1)") + -> Text (len + 1) (" " <> t) <> go (BreakingSpace : xs) + _ -> BreakingSpace <> Text len t <> go (BreakingSpace : xs) + go (x:xs) = x <> go xs -avoidBadWrapsInList :: [Inline] -> [Inline] -avoidBadWrapsInList [] = [] -avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s = - Str (" >" <> cs) : avoidBadWrapsInList xs -avoidBadWrapsInList [s, Str (T.uncons -> Just (c, cs))] - | T.null cs && isSp s && c `elem` ['-','*','+'] = [Str $ T.pack [' ', c]] -avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs) - | T.null cs && isSp s && c `elem` ['-','*','+'] = - Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs -avoidBadWrapsInList (s:Str cs:Space:xs) - | isSp s && isOrderedListMarker cs = - Str (" " <> cs) : Space : avoidBadWrapsInList xs -avoidBadWrapsInList [s, Str cs] - | isSp s && isOrderedListMarker cs = [Str $ " " <> cs] -avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs + toList (Concat (Concat a b) c) = toList (Concat a (Concat b c)) + toList (Concat a b) = a : toList b + toList x = [x] isOrderedListMarker :: Text -> Bool isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) && @@ -281,6 +336,7 @@ inlineToMarkdown opts (Span attrs ils) = do _ -> id $ case variant of PlainText -> contents + Markua -> "`" <> contents <> "`" <> attrsToMarkua attrs _ | attrs == nullAttr -> contents | isEnabled Ext_bracketed_spans opts -> let attrs' = if attrs /= nullAttr @@ -307,7 +363,7 @@ inlineToMarkdown opts (Underline lst) = do case variant of PlainText -> return contents _ | isEnabled Ext_bracketed_spans opts -> - return $ "[" <> contents <> "]" <> "{.ul}" + return $ "[" <> contents <> "]" <> "{.underline}" | isEnabled Ext_native_spans opts -> return $ tagWithAttrs "span" ("", ["underline"], []) <> contents @@ -394,60 +450,75 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do then "“" <> contents <> "”" else "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = do + variant <- asks envVariant let tickGroups = filter (T.any (== '`')) $ T.group str let longest = maybe 0 maximum $ nonEmpty $ map T.length tickGroups let marker = T.replicate (longest + 1) "`" let spacer = if longest == 0 then "" else " " let attrsEnabled = isEnabled Ext_inline_code_attributes opts || isEnabled Ext_attributes opts - let attrs = if attrsEnabled && attr /= nullAttr - then attrsToMarkdown attr - else empty - variant <- asks envVariant + let attrs = case variant of + Markua -> attrsToMarkua attr + _ -> if attrsEnabled && attr /= nullAttr + then attrsToMarkdown attr + else empty case variant of PlainText -> return $ literal str _ -> return $ literal (marker <> spacer <> str <> spacer <> marker) <> attrs inlineToMarkdown opts (Str str) = do variant <- asks envVariant - let str' = (if writerPreferAscii opts - then toHtml5Entities - else id) . - (if isEnabled Ext_smart opts - then unsmartify opts - else id) . - (if variant == PlainText - then id - else escapeText opts) $ str + let str' = case variant of + Markua -> escapeMarkuaString str + _ -> (if writerPreferAscii opts + then toHtml5Entities + else id) . + (if isEnabled Ext_smart opts + then unsmartify opts + else id) . + (if variant == PlainText + then id + else escapeText opts) $ str return $ literal str' -inlineToMarkdown opts (Math InlineMath str) = - case writerHTMLMathMethod opts of - WebTeX url -> inlineToMarkdown opts - (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) - _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$" <> literal str <> "$" - | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\(" <> literal str <> "\\)" - | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\(" <> literal str <> "\\\\)" - | otherwise -> do - variant <- asks envVariant - texMathToInlines InlineMath str >>= - inlineListToMarkdown opts . - (if variant == PlainText then makeMathPlainer else id) -inlineToMarkdown opts (Math DisplayMath str) = - case writerHTMLMathMethod opts of - WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` - inlineToMarkdown opts (Image nullAttr [Str str] - (url <> T.pack (urlEncode $ T.unpack str), str)) - _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$$" <> literal str <> "$$" - | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\[" <> literal str <> "\\]" - | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\[" <> literal str <> "\\\\]" - | otherwise -> (\x -> cr <> x <> cr) `fmap` - (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) +inlineToMarkdown opts (Math InlineMath str) = do + variant <- asks envVariant + case () of + _ | variant == Markua -> return $ "`" <> literal str <> "`" <> "$" + | otherwise -> case writerHTMLMathMethod opts of + WebTeX url -> inlineToMarkdown opts + (Image nullAttr [Str str] (url <> urlEncode str, str)) + _ | isEnabled Ext_tex_math_dollars opts -> + return $ "$" <> literal str <> "$" + | isEnabled Ext_tex_math_single_backslash opts -> + return $ "\\(" <> literal str <> "\\)" + | isEnabled Ext_tex_math_double_backslash opts -> + return $ "\\\\(" <> literal str <> "\\\\)" + | otherwise -> + texMathToInlines InlineMath str >>= + inlineListToMarkdown opts . + (if variant == PlainText then makeMathPlainer else id) + +inlineToMarkdown opts (Math DisplayMath str) = do + variant <- asks envVariant + case () of + _ | variant == Markua -> do + let attributes = attrsToMarkua (addKeyValueToAttr ("",[],[]) + ("format", "latex")) + return $ blankline <> attributes <> cr <> literal "```" <> cr + <> literal str <> cr <> literal "```" <> blankline + | otherwise -> case writerHTMLMathMethod opts of + WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` + inlineToMarkdown opts (Image nullAttr [Str str] + (url <> urlEncode str, str)) + _ | isEnabled Ext_tex_math_dollars opts -> + return $ "$$" <> literal str <> "$$" + | isEnabled Ext_tex_math_single_backslash opts -> + return $ "\\[" <> literal str <> "\\]" + | isEnabled Ext_tex_math_double_backslash opts -> + return $ "\\\\[" <> literal str <> "\\\\]" + | otherwise -> (\x -> cr <> x <> cr) `fmap` + (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) + inlineToMarkdown opts il@(RawInline f str) = do let tickGroups = filter (T.any (== '`')) $ T.group str let numticks = 1 + maybe 0 maximum (nonEmpty (map T.length tickGroups)) @@ -458,7 +529,8 @@ inlineToMarkdown opts il@(RawInline f str) = do literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}" let renderEmpty = mempty <$ report (InlineNotRendered il) case variant of - PlainText -> renderEmpty + PlainText + | f == "plain" -> return $ literal str Commonmark | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"] -> return $ literal str @@ -466,6 +538,7 @@ inlineToMarkdown opts il@(RawInline f str) = do | f `elem` ["markdown", "markdown_github", "markdown_phpextra", "markdown_mmd", "markdown_strict"] -> return $ literal str + Markua -> renderEmpty _ | isEnabled Ext_raw_attribute opts -> rawAttribInline | f `elem` ["html", "html5", "html4"] , isEnabled Ext_raw_html opts @@ -502,7 +575,11 @@ inlineToMarkdown opts (Cite (c:cs) lst) then do suffs <- inlineListToMarkdown opts $ citationSuffix c rest <- mapM convertOne cs - let inbr = suffs <+> joincits rest + let inbr = suffs <> + (if not (null (citationSuffix c)) && not (null rest) + then text ";" + else mempty) + <+> joincits rest br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' return $ literal ("@" <> maybeInBraces (citationId c)) <+> br else do @@ -524,12 +601,14 @@ inlineToMarkdown opts (Cite (c:cs) lst) sdoc <- inlineListToMarkdown opts sinlines let k' = literal (modekey m <> "@" <> maybeInBraces k) r = case sinlines of - Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc - _ -> k' <+> sdoc + Str (T.uncons -> Just (y,_)):_ + | y `elem` (",;]@" :: String) -> k' <> sdoc + Space:_ -> k' <> sdoc + _ -> k' <+> sdoc return $ pdoc <+> r modekey SuppressAuthor = "-" modekey _ = "" -inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do +inlineToMarkdown opts lnk@(Link attr@(ident,classes,kvs) txt (src, tit)) = do variant <- asks envVariant linktext <- inlineListToMarkdown opts txt let linktitle = if T.null tit @@ -537,6 +616,9 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do else literal $ " \"" <> tit <> "\"" let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) let useAuto = isURI src && + T.null ident && + null kvs && + (null classes || classes == ["uri"] || classes == ["email"]) && case txt of [Str s] | escapeURI s == srcSuffix -> True _ -> False @@ -551,6 +633,11 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do PlainText | useAuto -> return $ literal srcSuffix | otherwise -> return linktext + Markua + | T.null tit -> return $ result <> attrsToMarkua attr + | otherwise -> return $ result <> attrsToMarkua attributes + where result = "[" <> linktext <> "](" <> (literal src) <> ")" + attributes = addKeyValueToAttr attr ("title", tit) _ | useAuto -> return $ "<" <> literal srcSuffix <> ">" | useRefLinks -> let first = "[" <> linktext <> "]" @@ -582,9 +669,16 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) then [Str ""] else alternate linkPart <- inlineToMarkdown opts (Link attr txt (source, tit)) + alt <- inlineListToMarkdown opts alternate + let attributes | variant == Markua = attrsToMarkua $ + addKeyValueToAttr (addKeyValueToAttr attr ("title", tit)) + ("alt", render (Just (writerColumns opts)) alt) + | otherwise = empty return $ case variant of - PlainText -> "[" <> linkPart <> "]" - _ -> "!" <> linkPart + PlainText -> "[" <> linkPart <> "]" + Markua -> cr <> attributes <> cr <> literal "![](" <> + literal source <> ")" <> cr + _ -> "!" <> linkPart inlineToMarkdown opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get diff --git a/src/Text/Pandoc/Writers/Markdown/Types.hs b/src/Text/Pandoc/Writers/Markdown/Types.hs index a1d0d14e4..060446811 100644 --- a/src/Text/Pandoc/Writers/Markdown/Types.hs +++ b/src/Text/Pandoc/Writers/Markdown/Types.hs @@ -45,7 +45,8 @@ data WriterEnv = WriterEnv { envInList :: Bool } data MarkdownVariant = - PlainText + Markua + | PlainText | Commonmark | Markdown deriving (Show, Eq) diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 5029be69f..f047baf1c 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,5 +1,4 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.MediaWiki Copyright : Copyright (C) 2008-2021 John MacFarlane @@ -91,8 +90,7 @@ blockToMediaWiki (Div attrs bs) = do blockToMediaWiki (Plain inlines) = inlineListToMediaWiki inlines --- title beginning with fig: indicates that the image is a figure -blockToMediaWiki (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do +blockToMediaWiki (SimpleFigure attr txt (src, tit)) = do capt <- inlineListToMediaWiki txt img <- imageToMediaWiki attr let opt = if T.null tit @@ -130,10 +128,15 @@ blockToMediaWiki b@(RawBlock f str) blockToMediaWiki HorizontalRule = return "\n-----\n" -blockToMediaWiki (Header level _ inlines) = do +blockToMediaWiki (Header level (ident,_,_) inlines) = do + let autoId = T.replace " " "_" $ stringify inlines contents <- inlineListToMediaWiki inlines let eqs = T.replicate level "=" - return $ eqs <> " " <> contents <> " " <> eqs <> "\n" + return $ + (if T.null ident || autoId == ident + then "" + else "<span id=\"" <> ident <> "\"></span>\n") + <> eqs <> " " <> contents <> " " <> eqs <> "\n" blockToMediaWiki (CodeBlock (_,classes,keyvals) str) = do let at = Set.fromList classes `Set.intersection` highlightingLangs diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 97c23f24d..53763a609 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -21,7 +21,7 @@ TODO: module Text.Pandoc.Writers.Ms ( writeMs ) where import Control.Monad.State.Strict -import Data.Char (isLower, isUpper, ord) +import Data.Char (isAscii, isLower, isUpper, ord) import Data.List (intercalate, intersperse) import Data.List.NonEmpty (nonEmpty) import qualified Data.Map as Map @@ -46,6 +46,8 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Writers.Roff import Text.Printf (printf) import Text.TeXMath (writeEqn) +import qualified Data.Text.Encoding as TE +import qualified Data.ByteString as B -- | Convert Pandoc to Ms. writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -88,6 +90,21 @@ escapeStr :: WriterOptions -> Text -> Text escapeStr opts = escapeString (if writerPreferAscii opts then AsciiOnly else AllowUTF8) +-- In PDFs we need to escape parentheses and backslash. +-- In PDF we need to encode as UTF-16 BE. +escapePDFString :: Text -> Text +escapePDFString t + | T.all isAscii t = + T.replace "(" "\\(" . T.replace ")" "\\)" . T.replace "\\" "\\\\" $ t + | otherwise = ("\\376\\377" <>) . -- add bom + mconcat . map encodeChar . T.unpack $ t + where + encodeChar c = + if isAscii c && c /= '\\' && c /= '(' && c /= ')' + then "\\000" <> T.singleton c + else mconcat . map toOctal . B.unpack . TE.encodeUtf16BE $ T.singleton c + toOctal n = "\\" <> T.pack (printf "%03o" n) + escapeUri :: Text -> Text escapeUri = T.pack . escapeURIString (\c -> c /= '@' && isAllowedInURI c) . T.unpack @@ -143,7 +160,7 @@ blockToMs opts (Div (ident,cls,kvs) bs) = do setFirstPara return $ anchor $$ res blockToMs opts (Plain inlines) = - liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines + splitSentences <$> inlineListToMs' opts inlines blockToMs opts (Para [Image attr alt (src,_tit)]) | let ext = takeExtension (T.unpack src) in (ext == ".ps" || ext == ".eps") = do let (mbW,mbH) = (inPoints opts <$> dimension Width attr, @@ -156,7 +173,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)]) space <> doubleQuotes (literal (tshow (floor hp :: Int))) _ -> empty - capt <- inlineListToMs' opts alt + capt <- splitSentences <$> inlineListToMs' opts alt return $ nowrap (literal ".PSPIC -C " <> doubleQuotes (literal (escapeStr opts src)) <> sizeAttrs) $$ @@ -166,9 +183,9 @@ blockToMs opts (Para [Image attr alt (src,_tit)]) blockToMs opts (Para inlines) = do firstPara <- gets stFirstPara resetFirstPara - contents <- liftM vcat $ mapM (inlineListToMs' opts) $ - splitSentences inlines - return $ literal (if firstPara then ".LP" else ".PP") $$ contents + contents <- inlineListToMs' opts inlines + return $ literal (if firstPara then ".LP" else ".PP") $$ + splitSentences contents blockToMs _ b@(RawBlock f str) | f == Format "ms" = return $ literal str | otherwise = do @@ -196,7 +213,7 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do (if T.null secnum then "" else " ") <> - escapeStr opts (stringify inlines)) + escapePDFString (stringify inlines)) let backlink = nowrap (literal ".pdfhref L -D " <> doubleQuotes (literal (toAscii ident)) <> space <> literal "\\") <> cr <> literal " -- " diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 9c2ce805d..264b9c498 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -12,82 +12,20 @@ Conversion of a 'Pandoc' document to a string representation. -} module Text.Pandoc.Writers.Native ( writeNative ) where -import Data.List (intersperse) import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition -import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) -import Text.DocLayout - -prettyList :: [Doc Text] -> Doc Text -prettyList ds = - "[" <> - mconcat (intersperse (cr <> ",") $ map (nest 1) ds) <> "]" - --- | Prettyprint Pandoc block element. -prettyBlock :: Block -> Doc Text -prettyBlock (LineBlock lines') = - "LineBlock" $$ prettyList (map (text . show) lines') -prettyBlock (BlockQuote blocks) = - "BlockQuote" $$ prettyList (map prettyBlock blocks) -prettyBlock (OrderedList attribs blockLists) = - "OrderedList" <> space <> text (show attribs) $$ - prettyList (map (prettyList . map prettyBlock) blockLists) -prettyBlock (BulletList blockLists) = - "BulletList" $$ - prettyList (map (prettyList . map prettyBlock) blockLists) -prettyBlock (DefinitionList items) = "DefinitionList" $$ - prettyList (map deflistitem items) - where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <> - nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")" -prettyBlock (Table attr blkCapt specs thead tbody tfoot) = - mconcat [ "Table " - , text (show attr) - , " " - , prettyCaption blkCapt ] $$ - prettyList (map (text . show) specs) $$ - prettyHead thead $$ - prettyBodies tbody $$ - prettyFoot tfoot - where prettyRows = prettyList . map prettyRow - prettyRow (Row a body) = - text ("Row " <> show a) $$ prettyList (map prettyCell body) - prettyCell (Cell a ma h w b) = - mconcat [ "Cell " - , text (show a) - , " " - , text (show ma) - , " (" - , text (show h) - , ") (" - , text (show w) - , ")" ] $$ - prettyList (map prettyBlock b) - prettyCaption (Caption mshort body) = - "(Caption " <> text (showsPrec 11 mshort "") $$ prettyList (map prettyBlock body) <> ")" - prettyHead (TableHead thattr body) - = "(TableHead " <> text (show thattr) $$ prettyRows body <> ")" - prettyBody (TableBody tbattr rhc hd bd) - = mconcat [ "(TableBody " - , text (show tbattr) - , " (" - , text (show rhc) - , ")" ] $$ prettyRows hd $$ prettyRows bd <> ")" - prettyBodies = prettyList . map prettyBody - prettyFoot (TableFoot tfattr body) - = "(TableFoot " <> text (show tfattr) $$ prettyRows body <> ")" -prettyBlock (Div attr blocks) = - text ("Div " <> show attr) $$ prettyList (map prettyBlock blocks) -prettyBlock block = text $ show block +import Text.Pandoc.Options (WriterOptions (..)) +import Text.Show.Pretty (ppDoc) +import Text.PrettyPrint (renderStyle, Style(..), style, char) -- | Prettyprint Pandoc document. writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m Text -writeNative opts (Pandoc meta blocks) = return $ - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - withHead = case writerTemplate opts of - Just _ -> \bs -> text ("Pandoc (" ++ show meta ++ ")") $$ - bs $$ cr - Nothing -> id - in render colwidth $ withHead $ prettyList $ map prettyBlock blocks +writeNative opts (Pandoc meta blocks) = do + let style' = style{ lineLength = writerColumns opts, + ribbonsPerLine = 1.2 } + return $ T.pack $ renderStyle style' $ + case writerTemplate opts of + Just _ -> ppDoc (Pandoc meta blocks) <> char '\n' + Nothing -> ppDoc blocks diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 5f3224c2f..8af64969b 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -2,7 +2,6 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.OpenDocument Copyright : Copyright (C) 2008-2020 Andrea Rossato and John MacFarlane @@ -193,7 +192,7 @@ formulaStyle mt = inTags False "style:style" ,("style:vertical-rel", "text")] else [("style:vertical-pos", "middle") - ,("style:vertical-rel", "paragraph-content") + ,("style:vertical-rel", "text") ,("style:horizontal-pos", "center") ,("style:horizontal-rel", "paragraph-content") ,("style:wrap", "none")] @@ -377,7 +376,7 @@ blockToOpenDocument o = \case Plain b -> if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b - Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] -> figure attr c s t + SimpleFigure attr c (s, t) -> figure attr c s t Para b -> if null b && not (isEnabled Ext_empty_paragraphs o) then return empty diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index d404f1c8d..d2a383212 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -22,6 +22,7 @@ import Data.List (intersect, intersperse, partition, transpose) import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Map as M import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging @@ -29,6 +30,7 @@ import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) +import Text.Pandoc.Citeproc.Locator (parseLocator, LocatorMap(..), LocatorInfo(..)) import Text.Pandoc.Writers.Shared data WriterState = @@ -103,11 +105,14 @@ blockToOrg :: PandocMonad m => Block -- ^ Block element -> Org m (Doc Text) blockToOrg Null = return empty -blockToOrg (Div attr bs) = divToOrg attr bs +blockToOrg (Div attr@(ident,_,_) bs) = do + opts <- gets stOptions + -- Strip off bibliography if citations enabled + if ident == "refs" && isEnabled Ext_citations opts + then return mempty + else divToOrg attr bs blockToOrg (Plain inlines) = inlineListToOrg inlines --- title beginning with fig: indicates that the image is a figure -blockToOrg (Para [Image attr txt (src,tgt)]) - | Just tit <- T.stripPrefix "fig:" tgt = do +blockToOrg (SimpleFigure attr txt (src, tit)) = do capt <- if null txt then return empty else ("#+caption: " <>) `fmap` inlineListToOrg txt @@ -154,7 +159,7 @@ blockToOrg (CodeBlock (_,classes,kvs) str) = do let (beg, end) = case at of [] -> ("#+begin_example" <> numberlines, "#+end_example") (x:_) -> ("#+begin_src " <> x <> numberlines, "#+end_src") - return $ literal beg $$ nest 2 (literal str) $$ text end $$ blankline + return $ literal beg $$ literal str $$ text end $$ blankline blockToOrg (BlockQuote blocks) = do contents <- blockListToOrg blocks return $ blankline $$ "#+begin_quote" $$ @@ -398,7 +403,35 @@ inlineToOrg (Quoted SingleQuote lst) = do inlineToOrg (Quoted DoubleQuote lst) = do contents <- inlineListToOrg lst return $ "\"" <> contents <> "\"" -inlineToOrg (Cite _ lst) = inlineListToOrg lst +inlineToOrg (Cite cs lst) = do + opts <- gets stOptions + if isEnabled Ext_citations opts + then do + let renderCiteItem c = do + citePref <- inlineListToOrg (citationPrefix c) + let (locinfo, suffix) = parseLocator locmap (citationSuffix c) + citeSuff <- inlineListToOrg suffix + let locator = case locinfo of + Just info -> literal $ + T.replace "\160" " " $ + T.replace "{" "" $ + T.replace "}" "" $ locatorRaw info + Nothing -> mempty + return $ hsep [ citePref + , ("@" <> literal (citationId c)) + , locator + , citeSuff ] + citeItems <- mconcat . intersperse "; " <$> mapM renderCiteItem cs + let sty = case cs of + (d:_) + | citationMode d == AuthorInText + -> literal "/t" + [d] + | citationMode d == SuppressAuthor + -> literal "/na" + _ -> mempty + return $ "[cite" <> sty <> ":" <> citeItems <> "]" + else inlineListToOrg lst inlineToOrg (Code _ str) = return $ "=" <> literal str <> "=" inlineToOrg (Str str) = return . literal $ escapeString str inlineToOrg (Math t str) = do @@ -461,20 +494,109 @@ pandocLangToOrg cs = "c" -> "C" "commonlisp" -> "lisp" "r" -> "R" - "bash" -> "shell" - "lillypond" -> "ly" + "bash" -> "sh" _ -> cs -- | List of language identifiers recognized by org-mode. +-- See <https://orgmode.org/manual/Languages.html>. orgLangIdentifiers :: [Text] orgLangIdentifiers = - [ "abc", "asymptote", "awk", "axiom", "C", "cpp", "calc", "clojure","comint" - , "coq", "css", "D", "ditaa", "dot", "ebnf", "elixir", "eukleides", "fomus" - , "forth", "F90", "gnuplot", "Translate", "groovy", "haskell" , "browser" - , "request", "io", "ipython", "J", "java", "js", "julia", "kotlin", "latex" - , "ledger", "ly", "lisp", "Flavored", "makefile", "mathematica", "mathomatic" - , "matlab", "max", "mongo", "mscgen", "cypher", "Caml", "octave" , "org", "oz" - , "perl", "picolisp", "plantuml", "processing", "prolog", "python" , "R" - , "rec", "ruby", "sass", "scala", "scheme", "screen", "sed", "shell", "shen" - , "sql", "sqlite", "stan", "ML", "stata", "tcl", "typescript", "vala" - ] + [ "asymptote" + , "lisp" + , "awk" + , "lua" + , "C" + , "matlab" + , "C++" + , "mscgen" + , "clojure" + , "ocaml" + , "css" + , "octave" + , "D" + , "org" + , "ditaa" + , "oz" + , "calc" + , "perl" + , "emacs-lisp" + , "plantuml" + , "eshell" + , "processing" + , "fortran" + , "python" + , "gnuplot" + , "R" + , "screen" + , "ruby" + , "dot" + , "sass" + , "haskell" + , "scheme" + , "java" + , "sed" + , "js" + , "sh" + , "latex" + , "sql" + , "ledger" + , "sqlite" + , "lilypond" + , "vala" ] + +-- taken from oc-csl.el in the org source tree: +locmap :: LocatorMap +locmap = LocatorMap $ M.fromList + [ ("bk." , "book") + , ("bks." , "book") + , ("book" , "book") + , ("chap." , "chapter") + , ("chaps." , "chapter") + , ("chapter" , "chapter") + , ("col." , "column") + , ("cols." , "column") + , ("column" , "column") + , ("figure" , "figure") + , ("fig." , "figure") + , ("figs." , "figure") + , ("folio" , "folio") + , ("fol." , "folio") + , ("fols." , "folio") + , ("number" , "number") + , ("no." , "number") + , ("nos." , "number") + , ("line" , "line") + , ("l." , "line") + , ("ll." , "line") + , ("note" , "note") + , ("n." , "note") + , ("nn." , "note") + , ("opus" , "opus") + , ("op." , "opus") + , ("opp." , "opus") + , ("page" , "page") + , ("p" , "page") + , ("p." , "page") + , ("pp." , "page") + , ("paragraph" , "paragraph") + , ("para." , "paragraph") + , ("paras." , "paragraph") + , ("¶" , "paragraph") + , ("¶¶" , "paragraph") + , ("part" , "part") + , ("pt." , "part") + , ("pts." , "part") + , ("§" , "section") + , ("§§" , "section") + , ("section" , "section") + , ("sec." , "section") + , ("secs." , "section") + , ("sub verbo" , "sub verbo") + , ("s.v." , "sub verbo") + , ("s.vv." , "sub verbo") + , ("verse" , "verse") + , ("v." , "verse") + , ("vv." , "verse") + , ("volume" , "volume") + , ("vol." , "volume") + , ("vols." , "volume") ] diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 157810216..e799297de 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -1,5 +1,10 @@ +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} {- | Module : Text.Pandoc.Writers.Powerpoint.Output Copyright : Copyright (C) 2017-2020 Jesse Rosenthal @@ -21,14 +26,21 @@ import Control.Monad.Reader import Control.Monad.State import Codec.Archive.Zip import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse) +import Data.Bifunctor (bimap) +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI import Data.Default +import Data.Foldable (toList) +import Data.List.NonEmpty (nonEmpty, NonEmpty ((:|))) +import Data.Ratio ((%), Ratio) import Data.Text (Text) import qualified Data.Text as T -import qualified Data.Text.Read +import Data.Text.Read (decimal) import Data.Time (formatTime, defaultTimeLocale) import Data.Time.Clock (UTCTime) import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) -import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) +import Data.Traversable (for) +import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension, takeFileName) import Text.Pandoc.XML.Light as XML import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 @@ -48,11 +60,11 @@ import System.FilePath.Glob import Text.DocTemplates (FromContext(lookupContext), Context) import Text.DocLayout (literal) import Text.TeXMath +import Text.Pandoc.Logging (LogMessage(PowerpointTemplateWarning)) import Text.Pandoc.Writers.Math (convertMath) import Text.Pandoc.Writers.Powerpoint.Presentation import Text.Pandoc.Shared (tshow, stringify) import Skylighting (fromColor) -import Data.List.NonEmpty (nonEmpty) -- |The 'EMU' type is used to specify sizes in English Metric Units. type EMU = Integer @@ -105,11 +117,7 @@ data WriterEnv = WriterEnv { envRefArchive :: Archive , envInList :: Bool , envInNoteSlide :: Bool , envCurSlideId :: Int - -- the difference between the number at - -- the end of the slide file name and - -- the rId number - , envSlideIdOffset :: Int - , envContentType :: ContentType + , envPlaceholder :: Placeholder , envSlideIdMap :: M.Map SlideId Int -- maps the slide number to the -- corresponding notes id number. If there @@ -117,6 +125,8 @@ data WriterEnv = WriterEnv { envRefArchive :: Archive -- no entry in the map for it. , envSpeakerNotesIdMap :: M.Map Int Int , envInSpeakerNotes :: Bool + , envSlideLayouts :: Maybe SlideLayouts + , envOtherStyleIndents :: Maybe Indents } deriving (Show) @@ -131,17 +141,82 @@ instance Default WriterEnv where , envInList = False , envInNoteSlide = False , envCurSlideId = 1 - , envSlideIdOffset = 1 - , envContentType = NormalContent + , envPlaceholder = Placeholder ObjType 0 , envSlideIdMap = mempty , envSpeakerNotesIdMap = mempty , envInSpeakerNotes = False + , envSlideLayouts = Nothing + , envOtherStyleIndents = Nothing } -data ContentType = NormalContent - | TwoColumnLeftContent - | TwoColumnRightContent - deriving (Show, Eq) +type SlideLayouts = SlideLayoutsOf SlideLayout + +data SlideLayoutsOf a = SlideLayouts + { metadata :: a + , title :: a + , content :: a + , twoColumn :: a + , comparison :: a + , contentWithCaption :: a + , blank :: a + } deriving (Show, Eq, Functor, Foldable, Traversable) + +data SlideLayout = SlideLayout + { slElement :: Element + , slInReferenceDoc :: Bool + -- ^ True if the layout is in the provided reference doc, False if it's in + -- the default reference doc. + , slPath :: FilePath + , slEntry :: Entry + } deriving (Show) + +getSlideLayouts :: PandocMonad m => P m SlideLayouts +getSlideLayouts = asks envSlideLayouts >>= maybe (throwError e) pure + where + e = PandocSomeError ("Slide layouts aren't defined, even though they should " + <> "always be. This is a bug in pandoc.") + +-- | A placeholder within a layout, identified by type and index. +-- +-- E.g., @Placeholder ObjType 2@ is the third placeholder of type 'ObjType' in +-- the layout. +data Placeholder = Placeholder + { placeholderType :: PHType + , index :: Int + } deriving (Show, Eq) + +-- | Paragraph indentation info. +data Indents = Indents + { level1 :: Maybe LevelIndents + , level2 :: Maybe LevelIndents + , level3 :: Maybe LevelIndents + , level4 :: Maybe LevelIndents + , level5 :: Maybe LevelIndents + , level6 :: Maybe LevelIndents + , level7 :: Maybe LevelIndents + , level8 :: Maybe LevelIndents + , level9 :: Maybe LevelIndents + } deriving (Show, Eq) + +levelIndent :: Indents -> Int -> Maybe LevelIndents +levelIndent is index = getter is + where + getter = case index of + 0 -> level1 + 1 -> level2 + 2 -> level3 + 3 -> level4 + 4 -> level5 + 5 -> level6 + 6 -> level7 + 7 -> level8 + 8 -> level9 + _ -> const Nothing + +data LevelIndents = LevelIndents + { marL :: EMU + , indent :: EMU + } deriving (Show, Eq) data MediaInfo = MediaInfo { mInfoFilePath :: FilePath , mInfoLocalId :: Int @@ -155,12 +230,14 @@ data WriterState = WriterState { stLinkIds :: M.Map Int (M.Map Int LinkTarget) -- (FP, Local ID, Global ID, Maybe Mime) , stMediaIds :: M.Map Int [MediaInfo] , stMediaGlobalIds :: M.Map FilePath Int + , stFooterInfo :: Maybe FooterInfo } deriving (Show, Eq) instance Default WriterState where def = WriterState { stLinkIds = mempty , stMediaIds = mempty , stMediaGlobalIds = mempty + , stFooterInfo = Nothing } type P m = ReaderT WriterEnv (StateT WriterState m) @@ -199,11 +276,12 @@ alwaysInheritedPatterns = , "ppt/slideLayouts/_rels/slideLayout*.xml.rels" , "ppt/slideMasters/slideMaster1.xml" , "ppt/slideMasters/_rels/slideMaster1.xml.rels" - , "ppt/theme/theme1.xml" - , "ppt/theme/_rels/theme1.xml.rels" + , "ppt/theme/theme*.xml" + , "ppt/theme/_rels/theme*.xml.rels" , "ppt/presProps.xml" , "ppt/tableStyles.xml" , "ppt/media/image*" + , "ppt/fonts/*" ] -- We only look for these under special conditions @@ -212,8 +290,6 @@ contingentInheritedPatterns pres = [] <> if presHasSpeakerNotes pres then map compile [ "ppt/notesMasters/notesMaster*.xml" , "ppt/notesMasters/_rels/notesMaster*.xml.rels" - , "ppt/theme/theme2.xml" - , "ppt/theme/_rels/theme2.xml.rels" ] else [] @@ -264,7 +340,32 @@ presentationToArchiveP p@(Presentation docProps slides) = do T.unlines (map (T.pack . (" " <>)) missingFiles) ) - newArch' <- foldM copyFileToArchive emptyArchive filePaths + newArch <- foldM copyFileToArchive emptyArchive filePaths + + -- Add any layouts taken from the default archive, + -- overwriting any already added. + slideLayouts <- getSlideLayouts + let f layout = + if not (slInReferenceDoc layout) + then addEntryToArchive (slEntry layout) + else id + let newArch' = foldr f newArch slideLayouts + + master <- getMaster + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + presentationElement <- parseXml refArchive distArchive "ppt/presentation.xml" + modify (\s -> + s {stFooterInfo = + getFooterInfo (dcDate docProps) slideLayouts master presentationElement + }) + + -- Update the master to make sure it includes any layouts we've just added + masterRels <- getMasterRels + let (updatedMasterElem, updatedMasterRelElem) = updateMasterElems slideLayouts master masterRels + updatedMasterEntry <- elemToEntry "ppt/slideMasters/slideMaster1.xml" updatedMasterElem + updatedMasterRelEntry <- elemToEntry "ppt/slideMasters/_rels/slideMaster1.xml.rels" updatedMasterRelElem + -- we make a modified ppt/viewProps.xml out of the presentation viewProps viewPropsEntry <- makeViewPropsEntry -- we make a docProps/core.xml entry out of the presentation docprops @@ -274,10 +375,9 @@ presentationToArchiveP p@(Presentation docProps slides) = do -- we make this ourself in case there's something unexpected in the -- one in the reference doc. relsEntry <- topLevelRelsEntry - -- presentation entry and rels. We have to do the rels first to make - -- sure we know the correct offset for the rIds. - presEntry <- presentationToPresEntry p - presRelsEntry <- presentationToRelsEntry p + -- presentation entry and rels. + (presentationRIdUpdateData, presRelsEntry) <- presentationToRelsEntry p + presEntry <- presentationToPresEntry presentationRIdUpdateData p slideEntries <- mapM slideToEntry slides slideRelEntries <- mapM slideToSlideRelEntry slides spkNotesEntries <- catMaybes <$> mapM slideToSpeakerNotesEntry slides @@ -293,9 +393,169 @@ presentationToArchiveP p@(Presentation docProps slides) = do spkNotesEntries <> spkNotesRelEntries <> mediaEntries <> + [updatedMasterEntry, updatedMasterRelEntry] <> [contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry, presEntry, presRelsEntry, viewPropsEntry] +updateMasterElems :: SlideLayouts -> Element -> Element -> (Element, Element) +updateMasterElems layouts master masterRels = (updatedMaster, updatedMasterRels) + where + updatedMaster = master { elContent = updateSldLayoutIdLst <$> elContent master } + (updatedRelationshipIds, updatedMasterRels) = addLayoutRels masterRels + + updateSldLayoutIdLst :: Content -> Content + updateSldLayoutIdLst (Elem e) = case elName e of + (QName "sldLayoutIdLst" _ _) -> let + mkChild relationshipId (lastId, children) = let + thisId = lastId + 1 + newChild = Element + { elName = QName "sldLayoutId" Nothing (Just "p") + , elAttribs = + [ Attr (QName "id" Nothing Nothing) (T.pack (show thisId)) + , Attr (QName "id" Nothing (Just "r")) relationshipId + ] + , elContent = [] + , elLine = Nothing + } + in (thisId, Elem newChild : children) + newChildren = snd (foldr mkChild (maxIdNumber' e, []) updatedRelationshipIds) + in Elem e { elContent = elContent e <> newChildren } + _ -> Elem e + updateSldLayoutIdLst c = c + + addLayoutRels :: + Element -> + ([Text], Element) + addLayoutRels e = let + layoutsToAdd = filter (\l -> not (slInReferenceDoc l) && isNew e l) + (toList layouts) + newRelationships = snd (foldr mkRelationship (maxIdNumber e, []) layoutsToAdd) + newRelationshipIds = + mapMaybe (findElemAttr (QName "Id" Nothing Nothing)) newRelationships + mkRelationship layout (lastId, relationships) = let + thisId = lastId + 1 + slideLayoutPath = "../slideLayouts/" <> T.pack (takeFileName (slPath layout)) + newRelationship = Element + { elName = QName "Relationship" Nothing Nothing + , elAttribs = + [ Attr (QName "Id" Nothing Nothing) ("rId" <> T.pack (show thisId)) + , Attr (QName "Type" Nothing Nothing) "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout" + , Attr (QName "Target" Nothing Nothing) slideLayoutPath + ] + , elContent = [] + , elLine = Nothing + } + in (thisId, Elem newRelationship : relationships) + in (newRelationshipIds, e {elContent = elContent e <> newRelationships}) + + -- | Whether the layout needs to be added to the Relationships element. + isNew :: Element -> SlideLayout -> Bool + isNew relationships SlideLayout{..} = let + toDetails = fmap (takeFileName . T.unpack) + . findElemAttr (QName "Target" Nothing Nothing) + in takeFileName slPath `notElem` mapMaybe toDetails (elContent relationships) + + findElemAttr :: QName -> Content -> Maybe Text + findElemAttr attr (Elem e) = findAttr attr e + findElemAttr _ _ = Nothing + + maxIdNumber :: Element -> Integer + maxIdNumber relationships = maximum (0 : idNumbers) + where + idNumbers = mapMaybe (readTextAsInteger . T.drop 3) idAttributes + idAttributes = mapMaybe getIdAttribute (elContent relationships) + getIdAttribute (Elem e) = findAttr (QName "Id" Nothing Nothing) e + getIdAttribute _ = Nothing + + maxIdNumber' :: Element -> Integer + maxIdNumber' sldLayouts = maximum (0 : idNumbers) + where + idNumbers = mapMaybe readTextAsInteger idAttributes + idAttributes = mapMaybe getIdAttribute (elContent sldLayouts) + getIdAttribute (Elem e) = findAttr (QName "id" Nothing Nothing) e + getIdAttribute _ = Nothing + +data FooterInfo = FooterInfo + { fiDate :: SlideLayoutsOf (Maybe Element) + , fiFooter :: SlideLayoutsOf (Maybe Element) + , fiSlideNumber :: SlideLayoutsOf (Maybe Element) + , fiShowOnFirstSlide :: Bool + } deriving (Show, Eq) + +getFooterInfo :: Maybe Text -> SlideLayouts -> Element -> Element -> Maybe FooterInfo +getFooterInfo date layouts master presentation = do + let ns = elemToNameSpaces master + hf <- findChild (elemName ns "p" "hf") master + let fiDate = let + f layoutDate = + case date of + Nothing -> layoutDate + Just d -> + if dateIsAutomatic (elemToNameSpaces layoutDate) layoutDate + then layoutDate + else replaceDate d layoutDate + in fmap f . getShape "dt" hf . slElement <$> layouts + fiFooter = getShape "ftr" hf . slElement <$> layouts + fiSlideNumber = getShape "sldNum" hf . slElement <$> layouts + fiShowOnFirstSlide = + fromMaybe True + (getBooleanAttribute "showSpecialPlsOnTitleSld" presentation) + pure FooterInfo{..} + where + getShape t hf layout = + if fromMaybe True (getBooleanAttribute t hf) + then do + let ns = elemToNameSpaces layout + cSld <- findChild (elemName ns "p" "cSld") layout + spTree <- findChild (elemName ns "p" "spTree") cSld + let containsPlaceholder sp = fromMaybe False $ do + nvSpPr <- findChild (elemName ns "p" "nvSpPr") sp + nvPr <- findChild (elemName ns "p" "nvPr") nvSpPr + ph <- findChild (elemName ns "p" "ph") nvPr + placeholderType <- findAttr (QName "type" Nothing Nothing) ph + pure (placeholderType == t) + listToMaybe (filterChildren containsPlaceholder spTree) + else Nothing + + dateIsAutomatic :: NameSpaces -> Element -> Bool + dateIsAutomatic ns shape = isJust $ do + txBody <- findChild (elemName ns "p" "txBody") shape + p <- findChild (elemName ns "a" "p") txBody + findChild (elemName ns "a" "fld") p + + replaceDate :: Text -> Element -> Element + replaceDate newDate e = + e { elContent = + case (elName e) of + QName "t" _ (Just "a") -> + [ Text (CData { cdVerbatim = CDataText + , cdData = newDate + , cdLine = Nothing + }) + ] + _ -> ifElem (replaceDate newDate) <$> elContent e + } + + ifElem :: (Element -> Element) -> (Content -> Content) + ifElem f (Elem e) = Elem (f e) + ifElem _ c = c + + getBooleanAttribute t e = + (`elem` ["1", "true"]) <$> + (findAttr (QName t Nothing Nothing) e) + +footerElements :: + PandocMonad m => + (forall a. SlideLayoutsOf a -> a) -> + P m [Content] +footerElements layout = do + footerInfo <- gets stFooterInfo + pure + $ Elem <$> + (toList (footerInfo >>= layout . fiDate) + <> toList (footerInfo >>= layout . fiFooter) + <> toList (footerInfo >>= layout . fiSlideNumber)) + makeSlideIdMap :: Presentation -> M.Map SlideId Int makeSlideIdMap (Presentation _ slides) = M.fromList $ map slideId slides `zip` [1..] @@ -304,9 +564,9 @@ makeSpeakerNotesMap :: Presentation -> M.Map Int Int makeSpeakerNotesMap (Presentation _ slides) = M.fromList $ mapMaybe f (slides `zip` [1..]) `zip` [1..] - where f (Slide _ _ notes, n) = if notes == mempty - then Nothing - else Just n + where f (Slide _ _ notes _, n) = if notes == mempty + then Nothing + else Just n presentationToArchive :: PandocMonad m => WriterOptions -> Meta -> Presentation -> m Archive @@ -318,6 +578,71 @@ presentationToArchive opts meta pres = do Nothing -> toArchive . BL.fromStrict <$> P.readDataFile "reference.pptx" + let (referenceLayouts, defaultReferenceLayouts) = + (getLayoutsFromArchive refArchive, getLayoutsFromArchive distArchive) + let layoutTitles = SlideLayouts { metadata = "Title Slide" :: Text + , title = "Section Header" + , content = "Title and Content" + , twoColumn = "Two Content" + , comparison = "Comparison" + , contentWithCaption = "Content with Caption" + , blank = "Blank" + } + layouts <- for layoutTitles $ \layoutTitle -> do + let layout = M.lookup (CI.mk layoutTitle) referenceLayouts + let defaultLayout = M.lookup (CI.mk layoutTitle) defaultReferenceLayouts + case (layout, defaultLayout) of + (Nothing, Nothing) -> + throwError (PandocSomeError ("Couldn't find layout named \"" + <> layoutTitle <> "\" in the provided " + <> "reference doc or in the default " + <> "reference doc included with pandoc.")) + (Nothing, Just ((element, path, entry) :| _)) -> do + P.report (PowerpointTemplateWarning + ("Couldn't find layout named \"" + <> layoutTitle <> "\" in provided " + <> "reference doc. Falling back to " + <> "the default included with pandoc.")) + pure SlideLayout { slElement = element + , slPath = path + , slEntry = entry + , slInReferenceDoc = False + } + (Just ((element, path, entry) :| _), _ ) -> + pure SlideLayout { slElement = element + , slPath = path + , slEntry = entry + , slInReferenceDoc = True + } + + master <- getMaster' refArchive distArchive + + let otherStyleIndents = do + let ns = elemToNameSpaces master + txStyles <- findChild (elemName ns "p" "txStyles") master + otherStyle <- findChild (elemName ns "p" "otherStyle") txStyles + let makeLevelIndents name = do + e <- findChild (elemName ns "a" name) otherStyle + pure LevelIndents + { indent = fromMaybe (-342900) + (findAttr (QName "indent" Nothing Nothing) e + >>= readTextAsInteger) + , marL = fromMaybe 347663 + (findAttr (QName "marL" Nothing Nothing) e + >>= readTextAsInteger) + } + pure Indents + { level1 = makeLevelIndents "lvl1pPr" + , level2 = makeLevelIndents "lvl2pPr" + , level3 = makeLevelIndents "lvl3pPr" + , level4 = makeLevelIndents "lvl4pPr" + , level5 = makeLevelIndents "lvl5pPr" + , level6 = makeLevelIndents "lvl6pPr" + , level7 = makeLevelIndents "lvl7pPr" + , level8 = makeLevelIndents "lvl8pPr" + , level9 = makeLevelIndents "lvl9pPr" + } + utctime <- P.getTimestamp presSize <- case getPresentationSize refArchive distArchive of @@ -341,6 +666,8 @@ presentationToArchive opts meta pres = do , envPresentationSize = presSize , envSlideIdMap = makeSlideIdMap pres , envSpeakerNotesIdMap = makeSpeakerNotesMap pres + , envSlideLayouts = Just layouts + , envOtherStyleIndents = otherStyleIndents } let st = def { stMediaGlobalIds = initialGlobalIds refArchive distArchive @@ -348,7 +675,30 @@ presentationToArchive opts meta pres = do runP env st $ presentationToArchiveP pres - +-- | Get all slide layouts from an archive, as a map where the layout's name +-- gives the map key. +-- +-- For each layout, the map contains its XML representation, its path within +-- the archive, and the archive entry. +getLayoutsFromArchive :: Archive -> M.Map (CI Text) (NonEmpty (Element, FilePath, Entry)) +getLayoutsFromArchive archive = + M.fromListWith (<>) ((\t@(e, _, _) -> (CI.mk (name e), pure t)) <$> layouts) + where + layouts :: [(Element, FilePath, Entry)] + layouts = mapMaybe findElementByPath paths + parseXml' entry = case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of + Left _ -> Nothing + Right element -> Just element + findElementByPath :: FilePath -> Maybe (Element, FilePath, Entry) + findElementByPath path = do + entry <- findEntryByPath path archive + element <- parseXml' entry + pure (element, path, entry) + paths = filter (match (compile "ppt/slideLayouts/slideLayout*.xml")) (filesInArchive archive) + name element = fromMaybe "Untitled layout" $ do + let ns = elemToNameSpaces element + cSld <- findChild (elemName ns "p" "cSld") element + findAttr (QName "name" Nothing Nothing) cSld -------------------------------------------------- @@ -365,38 +715,59 @@ curSlideHasSpeakerNotes = -------------------------------------------------- getLayout :: PandocMonad m => Layout -> P m Element -getLayout layout = do - let layoutpath = case layout of - MetadataSlide{} -> "ppt/slideLayouts/slideLayout1.xml" - TitleSlide{} -> "ppt/slideLayouts/slideLayout3.xml" - ContentSlide{} -> "ppt/slideLayouts/slideLayout2.xml" - TwoColumnSlide{} -> "ppt/slideLayouts/slideLayout4.xml" - refArchive <- asks envRefArchive - distArchive <- asks envDistArchive - parseXml refArchive distArchive layoutpath +getLayout layout = getElement <$> getSlideLayouts + where + getElement = + slElement . case layout of + MetadataSlide{} -> metadata + TitleSlide{} -> title + ContentSlide{} -> content + TwoColumnSlide{} -> twoColumn + ComparisonSlide{} -> comparison + ContentWithCaptionSlide{} -> contentWithCaption + BlankSlide{} -> blank shapeHasId :: NameSpaces -> T.Text -> Element -> Bool -shapeHasId ns ident element - | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element - , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr - , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr = - nm == ident - | otherwise = False +shapeHasId ns ident element = getShapeId ns element == Just ident -getContentShape :: PandocMonad m => NameSpaces -> Element -> P m Element +getShapeId :: NameSpaces -> Element -> Maybe Text +getShapeId ns element = do + nvSpPr <- findChild (elemName ns "p" "nvSpPr") element + cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr + findAttr (QName "id" Nothing Nothing) cNvPr + +type ShapeId = Integer + +getContentShape :: PandocMonad m => NameSpaces -> Element -> P m (Maybe ShapeId, Element) getContentShape ns spTreeElem | isElem ns "p" "spTree" spTreeElem = do - contentType <- asks envContentType - let contentShapes = getShapesByPlaceHolderType ns spTreeElem ObjType - case contentType of - NormalContent | (sp : _) <- contentShapes -> return sp - TwoColumnLeftContent | (sp : _) <- contentShapes -> return sp - TwoColumnRightContent | (_ : sp : _) <- contentShapes -> return sp - _ -> throwError $ PandocSomeError - "Could not find shape for Powerpoint content" + ph@Placeholder{index, placeholderType} <- asks envPlaceholder + case drop index (getShapesByPlaceHolderType ns spTreeElem placeholderType) of + sp : _ -> let + shapeId = getShapeId ns sp >>= readTextAsInteger + in return (shapeId, sp) + [] -> throwError $ PandocSomeError $ missingPlaceholderMessage ph getContentShape _ _ = throwError $ PandocSomeError "Attempted to find content on non shapeTree" +missingPlaceholderMessage :: Placeholder -> Text +missingPlaceholderMessage Placeholder{..} = + "Could not find a " <> ordinal + <> " placeholder of type " <> placeholderText + where + ordinal = T.pack (show index) <> + case (index `mod` 100, index `mod` 10) of + (11, _) -> "th" + (12, _) -> "th" + (13, _) -> "th" + (_, 1) -> "st" + (_, 2) -> "nd" + (_, 3) -> "rd" + _ -> "th" + placeholderText = case placeholderType of + ObjType -> "obj (or nothing)" + PHType t -> t + getShapeDimensions :: NameSpaces -> Element -> Maybe ((Integer, Integer), (Integer, Integer)) @@ -438,7 +809,7 @@ getContentShapeSize ns layout master | isElem ns "p" "sldLayout" layout , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do - sp <- getContentShape ns spTree + (_, sp) <- getContentShape ns spTree case getShapeDimensions ns sp of Just sz -> return sz Nothing -> do let mbSz = @@ -602,8 +973,18 @@ getMaster :: PandocMonad m => P m Element getMaster = do refArchive <- asks envRefArchive distArchive <- asks envDistArchive + getMaster' refArchive distArchive + +getMaster' :: PandocMonad m => Archive -> Archive -> m Element +getMaster' refArchive distArchive = parseXml refArchive distArchive "ppt/slideMasters/slideMaster1.xml" +getMasterRels :: PandocMonad m => P m Element +getMasterRels = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + parseXml refArchive distArchive "ppt/slideMasters/_rels/slideMaster1.xml.rels" + -- We want to get the header dimensions, so we can make sure that the -- image goes underneath it. We only use this in a content slide if it -- has a header. @@ -654,41 +1035,44 @@ captionHeight = 40 createCaption :: PandocMonad m => ((Integer, Integer), (Integer, Integer)) -> [ParaElem] - -> P m Element + -> P m (ShapeId, Element) createCaption contentShapeDimensions paraElements = do let para = Paragraph def{pPropAlign = Just AlgnCenter} paraElements elements <- mapM paragraphToElement [para] let ((x, y), (cx, cy)) = contentShapeDimensions let txBody = mknode "p:txBody" [] $ [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> elements - return $ - mknode "p:sp" [] [ mknode "p:nvSpPr" [] - [ mknode "p:cNvPr" [("id","1"), ("name","TextBox 3")] () - , mknode "p:cNvSpPr" [("txBox", "1")] () - , mknode "p:nvPr" [] () - ] - , mknode "p:spPr" [] - [ mknode "a:xfrm" [] - [ mknode "a:off" [("x", tshow $ 12700 * x), - ("y", tshow $ 12700 * (y + cy - captionHeight))] () - , mknode "a:ext" [("cx", tshow $ 12700 * cx), - ("cy", tshow $ 12700 * captionHeight)] () - ] - , mknode "a:prstGeom" [("prst", "rect")] - [ mknode "a:avLst" [] () - ] - , mknode "a:noFill" [] () - ] - , txBody - ] + return + ( 1 + , mknode "p:sp" [] [ mknode "p:nvSpPr" [] + [ mknode "p:cNvPr" [("id","1"), ("name","TextBox 3")] () + , mknode "p:cNvSpPr" [("txBox", "1")] () + , mknode "p:nvPr" [] () + ] + , mknode "p:spPr" [] + [ mknode "a:xfrm" [] + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * (y + cy - captionHeight))] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * captionHeight)] () + ] + , mknode "a:prstGeom" [("prst", "rect")] + [ mknode "a:avLst" [] () + ] + , mknode "a:noFill" [] () + ] + , txBody + ] + ) makePicElements :: PandocMonad m => Element -> PicProps -> MediaInfo + -> Text -> [ParaElem] - -> P m [Element] -makePicElements layout picProps mInfo alt = do + -> P m [(ShapeId, Element)] +makePicElements layout picProps mInfo titleText alt = do opts <- asks envOpts (pageWidth, pageHeight) <- asks envPresentationSize -- hasHeader <- asks envSlideHasHeader @@ -721,7 +1105,11 @@ makePicElements layout picProps mInfo alt = do ,("noChangeAspect","1")] () -- cNvPr will contain the link information so we do that separately, -- and register the link if necessary. - let cNvPrAttr = [("descr", T.pack $ mInfoFilePath mInfo), + let description = (if T.null titleText + then "" + else titleText <> "\n\n") + <> T.pack (mInfoFilePath mInfo) + let cNvPrAttr = [("descr", description), ("id","0"), ("name","Picture 1")] cNvPr <- case picPropLink picProps of @@ -751,10 +1139,12 @@ makePicElements layout picProps mInfo alt = do let spPr = mknode "p:spPr" [("bwMode","auto")] [xfrm, prstGeom, mknode "a:noFill" [] (), ln] - let picShape = mknode "p:pic" [] - [ nvPicPr - , blipFill - , spPr ] + let picShape = ( 0 + , mknode "p:pic" [] + [ nvPicPr + , blipFill + , spPr ] + ) -- And now, maybe create the caption: if hasCaption @@ -762,6 +1152,12 @@ makePicElements layout picProps mInfo alt = do return [picShape, cap] else return [picShape] +consolidateRuns :: [ParaElem] -> [ParaElem] +consolidateRuns [] = [] +consolidateRuns (Run pr1 s1 : Run pr2 s2 : xs) + | pr1 == pr2 = consolidateRuns (Run pr1 (s1 <> s2) : xs) +consolidateRuns (x:xs) = x : consolidateRuns xs + paraElemToElements :: PandocMonad m => ParaElem -> P m [Content] paraElemToElements Break = return [Elem $ mknode "a:br" [] ()] @@ -867,15 +1263,32 @@ surroundWithMathAlternate element = paragraphToElement :: PandocMonad m => Paragraph -> P m Element paragraphToElement par = do + indents <- asks envOtherStyleIndents let - attrs = [("lvl", tshow $ pPropLevel $ paraProps par)] <> - (case pPropMarginLeft (paraProps par) of - Just px -> [("marL", tshow $ pixelsToEmu px)] - Nothing -> [] - ) <> - (case pPropIndent (paraProps par) of - Just px -> [("indent", tshow $ pixelsToEmu px)] - Nothing -> [] + lvl = pPropLevel (paraProps par) + attrs = [("lvl", tshow lvl)] <> + (case (pPropIndent (paraProps par), pPropMarginLeft (paraProps par)) of + (Just px1, Just px2) -> [ ("indent", tshow $ pixelsToEmu px1) + , ("marL", tshow $ pixelsToEmu px2) + ] + (Just px1, Nothing) -> [("indent", tshow $ pixelsToEmu px1)] + (Nothing, Just px2) -> [("marL", tshow $ pixelsToEmu px2)] + (Nothing, Nothing) -> fromMaybe [] $ do + indents' <- indents + thisLevel <- levelIndent indents' lvl + nextLevel <- levelIndent indents' (lvl + 1) + let (m, i) = + case pPropBullet (paraProps par) of + Nothing -> + (Just (marL thisLevel), Just 0) + Just (AutoNumbering _) -> + ( Just (marL nextLevel) + , Just (marL thisLevel - marL nextLevel) + ) + Just Bullet -> (Nothing, Nothing) + pure ( toList ((,) "indent" . tshow <$> i) + <> toList ((,) "marL" . tshow <$> m) + ) ) <> (case pPropAlign (paraProps par) of Just AlgnLeft -> [("algn", "l")] @@ -897,48 +1310,53 @@ paragraphToElement par = do [mknode "a:buAutoNum" (autoNumAttrs attrs') ()] Nothing -> [mknode "a:buNone" [] ()] ) - paras <- mapM paraElemToElements (paraElems par) - return $ mknode "a:p" [] $ - [Elem $ mknode "a:pPr" attrs props] <> concat paras + paras <- mconcat <$> mapM paraElemToElements (consolidateRuns (paraElems par)) + return $ mknode "a:p" [] $ [Elem $ mknode "a:pPr" attrs props] <> paras -shapeToElement :: PandocMonad m => Element -> Shape -> P m Element +shapeToElement :: PandocMonad m => Element -> Shape -> P m (Maybe ShapeId, Element) shapeToElement layout (TextBox paras) | ns <- elemToNameSpaces layout , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do - sp <- getContentShape ns spTree + (shapeId, sp) <- getContentShape ns spTree elements <- mapM paragraphToElement paras let txBody = mknode "p:txBody" [] $ [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> elements emptySpPr = mknode "p:spPr" [] () return + . (shapeId,) . surroundWithMathAlternate . replaceNamedChildren ns "p" "txBody" [txBody] . replaceNamedChildren ns "p" "spPr" [emptySpPr] $ sp -- GraphicFrame and Pic should never reach this. -shapeToElement _ _ = return $ mknode "p:sp" [] () +shapeToElement _ _ = return (Nothing, mknode "p:sp" [] ()) -shapeToElements :: PandocMonad m => Element -> Shape -> P m [Content] -shapeToElements layout (Pic picProps fp alt) = do +shapeToElements :: PandocMonad m => Element -> Shape -> P m [(Maybe ShapeId, Content)] +shapeToElements layout (Pic picProps fp titleText alt) = do mInfo <- registerMedia fp alt case mInfoExt mInfo of - Just _ -> map Elem <$> - makePicElements layout picProps mInfo alt + Just _ -> map (bimap Just Elem) <$> + makePicElements layout picProps mInfo titleText alt Nothing -> shapeToElements layout $ TextBox [Paragraph def alt] -shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$> +shapeToElements layout (GraphicFrame tbls cptn) = map (bimap Just Elem) <$> graphicFrameToElements layout tbls cptn shapeToElements _ (RawOOXMLShape str) = return - [Text (CData CDataRaw str Nothing)] + [(Nothing, Text (CData CDataRaw str Nothing))] shapeToElements layout shp = do - element <- shapeToElement layout shp - return [Elem element] + (shapeId, element) <- shapeToElement layout shp + return [(shapeId, Elem element)] -shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Content] +shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [(Maybe ShapeId, Content)] shapesToElements layout shps = concat <$> mapM (shapeToElements layout) shps -graphicFrameToElements :: PandocMonad m => Element -> [Graphic] -> [ParaElem] -> P m [Element] +graphicFrameToElements :: + PandocMonad m => + Element -> + [Graphic] -> + [ParaElem] -> + P m [(ShapeId, Element)] graphicFrameToElements layout tbls caption = do -- get the sizing master <- getMaster @@ -952,21 +1370,23 @@ graphicFrameToElements layout tbls caption = do elements <- mapM (graphicToElement cx) tbls let graphicFrameElts = - mknode "p:graphicFrame" [] $ - [ mknode "p:nvGraphicFramePr" [] - [ mknode "p:cNvPr" [("id", "6"), ("name", "Content Placeholder 5")] () - , mknode "p:cNvGraphicFramePr" [] - [mknode "a:graphicFrameLocks" [("noGrp", "1")] ()] - , mknode "p:nvPr" [] - [mknode "p:ph" [("idx", "1")] ()] - ] - , mknode "p:xfrm" [] - [ mknode "a:off" [("x", tshow $ 12700 * x), - ("y", tshow $ 12700 * y)] () - , mknode "a:ext" [("cx", tshow $ 12700 * cx), - ("cy", tshow $ 12700 * cy)] () - ] - ] <> elements + ( 6 + , mknode "p:graphicFrame" [] $ + [ mknode "p:nvGraphicFramePr" [] + [ mknode "p:cNvPr" [("id", "6"), ("name", "Content Placeholder 5")] () + , mknode "p:cNvGraphicFramePr" [] + [mknode "a:graphicFrameLocks" [("noGrp", "1")] ()] + , mknode "p:nvPr" [] + [mknode "p:ph" [("idx", "1")] ()] + ] + , mknode "p:xfrm" [] + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * y)] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * cy)] () + ] + ] <> elements + ) if not $ null caption then do capElt <- createCaption ((x, y), (cx, cytmp)) caption @@ -1088,124 +1508,433 @@ getShapeByPlaceHolderTypes ns spTreeElem (s:ss) = Just element -> Just element Nothing -> getShapeByPlaceHolderTypes ns spTreeElem ss -nonBodyTextToElement :: PandocMonad m => Element -> [PHType] -> [ParaElem] -> P m Element +nonBodyTextToElement :: + PandocMonad m => + Element -> + [PHType] -> + [ParaElem] -> + P m (Maybe ShapeId, Element) nonBodyTextToElement layout phTypes paraElements | ns <- elemToNameSpaces layout , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld - , Just sp <- getShapeByPlaceHolderTypes ns spTree phTypes = do + , Just sp <- getShapeByPlaceHolderTypes ns spTree phTypes + , Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") sp + , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr + , Just shapeId <- findAttr (nodename "id") cNvPr + , Right (shapeIdNum, _) <- decimal shapeId = do let hdrPara = Paragraph def paraElements element <- paragraphToElement hdrPara let txBody = mknode "p:txBody" [] $ [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> [element] - return $ replaceNamedChildren ns "p" "txBody" [txBody] sp + return (Just shapeIdNum, replaceNamedChildren ns "p" "txBody" [txBody] sp) -- XXX: TODO - | otherwise = return $ mknode "p:sp" [] () - -contentToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> P m Element + | otherwise = return (Nothing, mknode "p:sp" [] ()) + +data ContentShapeIds = ContentShapeIds + { contentHeaderId :: Maybe ShapeId + , contentContentIds :: [ShapeId] + } + +contentToElement :: + PandocMonad m => + Element -> + [ParaElem] -> + [Shape] -> + P m (Maybe ContentShapeIds, Element) contentToElement layout hdrShape shapes | ns <- elemToNameSpaces layout , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do - element <- nonBodyTextToElement layout [PHType "title"] hdrShape + (shapeId, element) <- nonBodyTextToElement layout [PHType "title"] hdrShape let hdrShapeElements = [Elem element | not (null hdrShape)] - contentElements <- local - (\env -> env {envContentType = NormalContent}) + contentHeaderId = if null hdrShape then Nothing else shapeId + content' <- local + (\env -> env {envPlaceholder = Placeholder ObjType 0}) (shapesToElements layout shapes) - return $ buildSpTree ns spTree (hdrShapeElements <> contentElements) -contentToElement _ _ _ = return $ mknode "p:sp" [] () - -twoColumnToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> [Shape] -> P m Element + let contentContentIds = mapMaybe fst content' + contentElements = snd <$> content' + footer <- footerElements content + return ( Just ContentShapeIds{..} + , buildSpTree ns spTree (hdrShapeElements <> contentElements <> footer) + ) +contentToElement _ _ _ = return (Nothing, mknode "p:sp" [] ()) + +data TwoColumnShapeIds = TwoColumnShapeIds + { twoColumnHeaderId :: Maybe ShapeId + , twoColumnLeftIds :: [ShapeId] + , twoColumnRightIds :: [ShapeId] + } + +twoColumnToElement :: + PandocMonad m => + Element -> + [ParaElem] -> + [Shape] -> + [Shape] -> + P m (Maybe TwoColumnShapeIds, Element) twoColumnToElement layout hdrShape shapesL shapesR | ns <- elemToNameSpaces layout , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do - element <- nonBodyTextToElement layout [PHType "title"] hdrShape + (headerId, element) <- nonBodyTextToElement layout [PHType "title"] hdrShape let hdrShapeElements = [Elem element | not (null hdrShape)] - contentElementsL <- local - (\env -> env {envContentType =TwoColumnLeftContent}) - (shapesToElements layout shapesL) - contentElementsR <- local - (\env -> env {envContentType =TwoColumnRightContent}) - (shapesToElements layout shapesR) + twoColumnHeaderId = if null hdrShape then Nothing else headerId + contentL <- local (\env -> env {envPlaceholder = Placeholder ObjType 0}) + (shapesToElements layout shapesL) + let twoColumnLeftIds = mapMaybe fst contentL + contentElementsL = snd <$> contentL + contentR <- local (\env -> env {envPlaceholder = Placeholder ObjType 1}) + (shapesToElements layout shapesR) + let (twoColumnRightIds) = (mapMaybe fst contentR) + contentElementsR = snd <$> contentR -- let contentElementsL' = map (setIdx ns "1") contentElementsL -- contentElementsR' = map (setIdx ns "2") contentElementsR - return $ buildSpTree ns spTree $ - hdrShapeElements <> contentElementsL <> contentElementsR -twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] () - - -titleToElement :: PandocMonad m => Element -> [ParaElem] -> P m Element + footer <- footerElements twoColumn + return + $ (Just TwoColumnShapeIds{..}, ) + $ buildSpTree ns spTree + $ hdrShapeElements <> contentElementsL <> contentElementsR <> footer +twoColumnToElement _ _ _ _ = return (Nothing, mknode "p:sp" [] ()) + +data ComparisonShapeIds = ComparisonShapeIds + { comparisonHeaderId :: Maybe ShapeId + , comparisonLeftTextIds :: [ShapeId] + , comparisonLeftContentIds :: [ShapeId] + , comparisonRightTextIds :: [ShapeId] + , comparisonRightContentIds :: [ShapeId] + } + +comparisonToElement :: + PandocMonad m => + Element -> + [ParaElem] -> + ([Shape], [Shape]) -> + ([Shape], [Shape]) -> + P m (Maybe ComparisonShapeIds, Element) +comparisonToElement layout hdrShape (shapesL1, shapesL2) (shapesR1, shapesR2) + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + (headerShapeId, element) <- nonBodyTextToElement layout [PHType "title"] hdrShape + let hdrShapeElements = [Elem element | not (null hdrShape)] + comparisonHeaderId = if null hdrShape then Nothing else headerShapeId + contentL1 <- local (\env -> env {envPlaceholder = Placeholder (PHType "body") 0}) + (shapesToElements layout shapesL1) + let comparisonLeftTextIds = mapMaybe fst contentL1 + contentElementsL1 = snd <$> contentL1 + contentL2 <- local (\env -> env {envPlaceholder = Placeholder ObjType 0}) + (shapesToElements layout shapesL2) + let comparisonLeftContentIds = mapMaybe fst contentL2 + contentElementsL2 = snd <$> contentL2 + contentR1 <- local (\env -> env {envPlaceholder = Placeholder (PHType "body") 1}) + (shapesToElements layout shapesR1) + let comparisonRightTextIds = mapMaybe fst contentR1 + contentElementsR1 = snd <$> contentR1 + contentR2 <- local (\env -> env {envPlaceholder = Placeholder ObjType 1}) + (shapesToElements layout shapesR2) + let comparisonRightContentIds = mapMaybe fst contentR2 + contentElementsR2 = snd <$> contentR2 + footer <- footerElements comparison + return + $ (Just ComparisonShapeIds{..}, ) + $ buildSpTree ns spTree + $ mconcat [ hdrShapeElements + , contentElementsL1 + , contentElementsL2 + , contentElementsR1 + , contentElementsR2 + ] <> footer +comparisonToElement _ _ _ _= return (Nothing, mknode "p:sp" [] ()) + +data ContentWithCaptionShapeIds = ContentWithCaptionShapeIds + { contentWithCaptionHeaderId :: Maybe ShapeId + , contentWithCaptionCaptionIds :: [ShapeId] + , contentWithCaptionContentIds :: [ShapeId] + } + +contentWithCaptionToElement :: + PandocMonad m => + Element -> + [ParaElem] -> + [Shape] -> + [Shape] -> + P m (Maybe ContentWithCaptionShapeIds, Element) +contentWithCaptionToElement layout hdrShape textShapes contentShapes + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + (shapeId, element) <- nonBodyTextToElement layout [PHType "title"] hdrShape + let hdrShapeElements = [Elem element | not (null hdrShape)] + contentWithCaptionHeaderId = if null hdrShape then Nothing else shapeId + text <- local (\env -> env {envPlaceholder = Placeholder (PHType "body") 0}) + (shapesToElements layout textShapes) + let contentWithCaptionCaptionIds = mapMaybe fst text + textElements = snd <$> text + content <- local (\env -> env {envPlaceholder = Placeholder ObjType 0}) + (shapesToElements layout contentShapes) + let contentWithCaptionContentIds = mapMaybe fst content + contentElements = snd <$> content + footer <- footerElements contentWithCaption + return + $ (Just ContentWithCaptionShapeIds{..}, ) + $ buildSpTree ns spTree + $ mconcat [ hdrShapeElements + , textElements + , contentElements + ] <> footer +contentWithCaptionToElement _ _ _ _ = return (Nothing, mknode "p:sp" [] ()) + +blankToElement :: + PandocMonad m => + Element -> + P m Element +blankToElement layout + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = + buildSpTree ns spTree <$> footerElements blank +blankToElement _ = return $ mknode "p:sp" [] () + +newtype TitleShapeIds = TitleShapeIds + { titleHeaderId :: Maybe ShapeId + } + +titleToElement :: + PandocMonad m => + Element -> + [ParaElem] -> + P m (Maybe TitleShapeIds, Element) titleToElement layout titleElems | ns <- elemToNameSpaces layout , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do - element <- nonBodyTextToElement layout [PHType "title", PHType "ctrTitle"] titleElems + (shapeId, element) <- nonBodyTextToElement layout [PHType "title", PHType "ctrTitle"] titleElems let titleShapeElements = [Elem element | not (null titleElems)] - return $ buildSpTree ns spTree titleShapeElements -titleToElement _ _ = return $ mknode "p:sp" [] () - -metadataToElement :: PandocMonad m => Element -> [ParaElem] -> [ParaElem] -> [[ParaElem]] -> [ParaElem] -> P m Element + titleHeaderId = if null titleElems then Nothing else shapeId + footer <- footerElements title + return + $ (Just TitleShapeIds{..}, ) + $ buildSpTree ns spTree (titleShapeElements <> footer) +titleToElement _ _ = return (Nothing, mknode "p:sp" [] ()) + +data MetadataShapeIds = MetadataShapeIds + { metadataTitleId :: Maybe ShapeId + , metadataSubtitleId :: Maybe ShapeId + , metadataDateId :: Maybe ShapeId + } + +metadataToElement :: + PandocMonad m => + Element -> + [ParaElem] -> + [ParaElem] -> + [[ParaElem]] -> + [ParaElem] -> + P m (Maybe MetadataShapeIds, Element) metadataToElement layout titleElems subtitleElems authorsElems dateElems | ns <- elemToNameSpaces layout , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do - titleShapeElements <- if null titleElems - then return [] - else sequence [nonBodyTextToElement layout [PHType "ctrTitle"] titleElems] let combinedAuthorElems = intercalate [Break] authorsElems subtitleAndAuthorElems = intercalate [Break, Break] [subtitleElems, combinedAuthorElems] - subtitleShapeElements <- if null subtitleAndAuthorElems - then return [] - else sequence [nonBodyTextToElement layout [PHType "subTitle"] subtitleAndAuthorElems] - dateShapeElements <- if null dateElems - then return [] - else sequence [nonBodyTextToElement layout [PHType "dt"] dateElems] - return . buildSpTree ns spTree . map Elem $ - (titleShapeElements <> subtitleShapeElements <> dateShapeElements) -metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] () + (titleId, titleElement) <- nonBodyTextToElement layout [PHType "ctrTitle"] titleElems + (subtitleId, subtitleElement) <- nonBodyTextToElement layout [PHType "subTitle"] subtitleAndAuthorElems + (dateId, dateElement) <- nonBodyTextToElement layout [PHType "dt"] dateElems + let titleShapeElements = [titleElement | not (null titleElems)] + metadataTitleId = if null titleElems then Nothing else titleId + subtitleShapeElements = [subtitleElement | not (null subtitleAndAuthorElems)] + metadataSubtitleId = if null subtitleAndAuthorElems then Nothing else subtitleId + footerInfo <- gets stFooterInfo + footer <- (if maybe False fiShowOnFirstSlide footerInfo + then id + else const []) <$> footerElements metadata + let dateShapeElements = [dateElement + | not (null dateElems + || isJust (footerInfo >>= metadata . fiDate)) + ] + metadataDateId = if null dateElems then Nothing else dateId + return + $ (Just MetadataShapeIds{..}, ) + $ buildSpTree ns spTree + $ map Elem (titleShapeElements <> subtitleShapeElements <> dateShapeElements) + <> footer +metadataToElement _ _ _ _ _ = return (Nothing, mknode "p:sp" [] ()) slideToElement :: PandocMonad m => Slide -> P m Element -slideToElement (Slide _ l@(ContentSlide hdrElems shapes) _ )= do +slideToElement (Slide _ l@(ContentSlide hdrElems shapes) _ backgroundImage) = do + layout <- getLayout l + backgroundImageElement <- traverse backgroundImageToElement backgroundImage + (shapeIds, spTree) + <- local (\env -> if null hdrElems + then env + else env{envSlideHasHeader=True}) + (contentToElement layout hdrElems shapes) + let animations = case shapeIds of + Nothing -> [] + Just ContentShapeIds{..} -> + slideToIncrementalAnimations (zip contentContentIds shapes) + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] (mknode "p:cSld" [] (toList backgroundImageElement <> [spTree]) : animations) +slideToElement (Slide _ l@(TwoColumnSlide hdrElems shapesL shapesR) _ backgroundImage) = do layout <- getLayout l - spTree <- local (\env -> if null hdrElems + backgroundImageElement <- traverse backgroundImageToElement backgroundImage + (shapeIds, spTree) <- local (\env -> if null hdrElems then env else env{envSlideHasHeader=True}) $ - contentToElement layout hdrElems shapes + twoColumnToElement layout hdrElems shapesL shapesR + let animations = case shapeIds of + Nothing -> [] + Just TwoColumnShapeIds{..} -> + slideToIncrementalAnimations (zip twoColumnLeftIds shapesL + <> zip twoColumnRightIds shapesR) return $ mknode "p:sld" [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") - ] [mknode "p:cSld" [] [spTree]] -slideToElement (Slide _ l@(TwoColumnSlide hdrElems shapesL shapesR) _) = do + ] (mknode "p:cSld" [] (toList backgroundImageElement <> [spTree]) : animations) +slideToElement (Slide _ l@(ComparisonSlide hdrElems shapesL shapesR) _ backgroundImage) = do layout <- getLayout l - spTree <- local (\env -> if null hdrElems + backgroundImageElement <- traverse backgroundImageToElement backgroundImage + (shapeIds, spTree) <- local (\env -> if null hdrElems then env else env{envSlideHasHeader=True}) $ - twoColumnToElement layout hdrElems shapesL shapesR + comparisonToElement layout hdrElems shapesL shapesR + let animations = case shapeIds of + Nothing -> [] + Just ComparisonShapeIds{..} -> + slideToIncrementalAnimations + (zip comparisonLeftTextIds (fst shapesL) + <> zip comparisonLeftContentIds (snd shapesL) + <> zip comparisonRightTextIds (fst shapesR) + <> zip comparisonRightContentIds (snd shapesR)) + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] (mknode "p:cSld" [] (toList backgroundImageElement <> [spTree]) : animations) +slideToElement (Slide _ l@(TitleSlide hdrElems) _ backgroundImage) = do + layout <- getLayout l + backgroundImageElement <- traverse backgroundImageToElement backgroundImage + (_, spTree) <- titleToElement layout hdrElems return $ mknode "p:sld" [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") - ] [mknode "p:cSld" [] [spTree]] -slideToElement (Slide _ l@(TitleSlide hdrElems) _) = do + ] [mknode "p:cSld" [] (toList backgroundImageElement <> [spTree])] +slideToElement (Slide + _ + l@(MetadataSlide titleElems subtitleElems authorElems dateElems) + _ + backgroundImage) = do layout <- getLayout l - spTree <- titleToElement layout hdrElems + backgroundImageElement <- traverse backgroundImageToElement backgroundImage + (_, spTree) <- metadataToElement layout titleElems subtitleElems authorElems dateElems return $ mknode "p:sld" [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") - ] [mknode "p:cSld" [] [spTree]] -slideToElement (Slide _ l@(MetadataSlide titleElems subtitleElems authorElems dateElems) _) = do + ] [mknode "p:cSld" [] (toList backgroundImageElement <> [spTree])] +slideToElement (Slide + _ + l@(ContentWithCaptionSlide hdrElems captionShapes contentShapes) + _ + backgroundImage) = do layout <- getLayout l - spTree <- metadataToElement layout titleElems subtitleElems authorElems dateElems + backgroundImageElement <- traverse backgroundImageToElement backgroundImage + (shapeIds, spTree) <- contentWithCaptionToElement layout hdrElems captionShapes contentShapes + let animations = case shapeIds of + Nothing -> [] + Just ContentWithCaptionShapeIds{..} -> + slideToIncrementalAnimations + (zip contentWithCaptionCaptionIds captionShapes + <> zip contentWithCaptionContentIds contentShapes) return $ mknode "p:sld" [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") - ] [mknode "p:cSld" [] [spTree]] + ] (mknode "p:cSld" [] (toList backgroundImageElement <> [spTree]) : animations) +slideToElement (Slide _ BlankSlide _ backgroundImage) = do + layout <- getLayout BlankSlide + backgroundImageElement <- traverse backgroundImageToElement backgroundImage + spTree <- blankToElement layout + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [mknode "p:cSld" [] (toList backgroundImageElement <> [spTree])] +backgroundImageToElement :: PandocMonad m => FilePath -> P m Element +backgroundImageToElement path = do + MediaInfo{mInfoLocalId, mInfoFilePath} <- registerMedia path [] + (imgBytes, _) <- P.fetchItem (T.pack mInfoFilePath) + opts <- asks envOpts + let imageDimensions = either (const Nothing) + (Just . sizeInPixels) + (imageSize opts imgBytes) + pageSize <- asks envPresentationSize + let fillRectAttributes = maybe [] (offsetAttributes pageSize) imageDimensions + let rId = "rId" <> T.pack (show mInfoLocalId) + return + $ mknode "p:bg" [] + $ mknode "p:bgPr" [] + [ mknode "a:blipFill" [("dpi", "0"), ("rotWithShape", "1")] + [ mknode "a:blip" [("r:embed", rId)] + $ mknode "a:lum" [] () + , mknode "a:srcRect" [] () + , mknode "a:stretch" [] + $ mknode "a:fillRect" fillRectAttributes () + ] + , mknode "a:effectsLst" [] () + ] + where + offsetAttributes :: (Integer, Integer) -> (Integer, Integer) -> [(Text, Text)] + offsetAttributes (pageWidth, pageHeight) (pictureWidth, pictureHeight) = let + widthRatio = pictureWidth % pageWidth + heightRatio = pictureHeight % pageHeight + getOffset :: Ratio Integer -> Text + getOffset proportion = let + percentageOffset = (proportion - 1) * (-100 % 2) + integerOffset = round percentageOffset * 1000 :: Integer + in T.pack (show integerOffset) + in case compare widthRatio heightRatio of + EQ -> [] + LT -> let + offset = getOffset ((pictureHeight % pageHeight) / widthRatio) + in [ ("t", offset) + , ("b", offset) + ] + GT -> let + offset = getOffset ((pictureWidth % pageWidth) / heightRatio) + in [ ("l", offset) + , ("r", offset) + ] + + +slideToIncrementalAnimations :: + [(ShapeId, Shape)] -> + [Element] +slideToIncrementalAnimations shapes = let + incrementals :: [(ShapeId, [Bool])] + incrementals = do + (shapeId, TextBox ps) <- shapes + pure . (shapeId,) $ do + Paragraph ParaProps{pPropIncremental} _ <- ps + pure pPropIncremental + toIndices :: [Bool] -> Maybe (NonEmpty (Integer, Integer)) + toIndices bs = do + let indexed = zip [0..] bs + ts <- nonEmpty (filter snd indexed) + pure (fmap (\(n, _) -> (n, n)) ts) + indices :: [(ShapeId, NonEmpty (Integer, Integer))] + indices = do + (shapeId, bs) <- incrementals + toList ((,) shapeId <$> toIndices bs) + in toList (incrementalAnimation <$> nonEmpty indices) -------------------------------------------------------------------- -- Notes: @@ -1316,8 +2045,8 @@ speakerNotesSlideNumber pgNum fieldId = ] slideToSpeakerNotesElement :: PandocMonad m => Slide -> P m (Maybe Element) -slideToSpeakerNotesElement (Slide _ _ (SpeakerNotes [])) = return Nothing -slideToSpeakerNotesElement slide@(Slide _ _ (SpeakerNotes paras)) = do +slideToSpeakerNotesElement (Slide _ _ (SpeakerNotes []) _) = return Nothing +slideToSpeakerNotesElement slide@(Slide _ _ (SpeakerNotes paras) _) = do master <- getNotesMaster fieldId <- getSlideNumberFieldId master num <- slideNum slide @@ -1373,11 +2102,14 @@ slideToFilePath slide = do idNum <- slideNum slide return $ "slide" <> show idNum <> ".xml" -slideToRelId :: PandocMonad m => Slide -> P m T.Text -slideToRelId slide = do +slideToRelId :: + PandocMonad m => + MinimumRId -> + Slide -> + P m T.Text +slideToRelId minSlideRId slide = do n <- slideNum slide - offset <- asks envSlideIdOffset - return $ "rId" <> tshow (n + offset) + return $ "rId" <> tshow (n + minSlideRId - 1) data Relationship = Relationship { relId :: Int @@ -1396,19 +2128,18 @@ elementToRel element return $ Relationship num type' (T.unpack target) | otherwise = Nothing -slideToPresRel :: PandocMonad m => Slide -> P m Relationship -slideToPresRel slide = do +slideToPresRel :: PandocMonad m => Int -> Slide -> P m Relationship +slideToPresRel minimumSlideRId slide = do idNum <- slideNum slide - n <- asks envSlideIdOffset - let rId = idNum + n + let rId = idNum + minimumSlideRId - 1 fp = "slides/" <> idNumToFilePath idNum return $ Relationship { relId = rId , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" , relTarget = fp } -getRels :: PandocMonad m => P m [Relationship] -getRels = do +getPresentationRels :: PandocMonad m => P m [Relationship] +getPresentationRels = do refArchive <- asks envRefArchive distArchive <- asks envDistArchive relsElem <- parseXml refArchive distArchive "ppt/_rels/presentation.xml.rels" @@ -1416,42 +2147,77 @@ getRels = do let relElems = findChildren (QName "Relationship" (Just globalNS) Nothing) relsElem return $ mapMaybe elementToRel relElems -presentationToRels :: PandocMonad m => Presentation -> P m [Relationship] +-- | Info required to update a presentation rId from the reference doc for the +-- output. +type PresentationRIdUpdateData = (ReferenceMinRIdAfterSlides, NewRIdBounds) + +-- | The minimum and maximum rIds for presentation relationships created from +-- the presentation content (as opposed to from the reference doc). +-- +-- Relationships taken from the reference doc should have their rId number +-- adjusted to make sure it sits outside this range. +type NewRIdBounds = (MinimumRId, MaximumRId) + +-- | The minimum presentation rId from the reference doc which comes after the +-- first slide rId (in the reference doc). +type ReferenceMinRIdAfterSlides = Int +type MinimumRId = Int +type MaximumRId = Int + +-- | Given a presentation rId from the reference doc, return the value it should +-- have in the output. +updatePresentationRId :: PresentationRIdUpdateData -> Int -> Int +updatePresentationRId (minOverlappingRId, (minNewId, maxNewId)) n + | n < minNewId = n + | otherwise = n - minOverlappingRId + maxNewId + 1 + +presentationToRels :: + PandocMonad m => + Presentation -> + P m (PresentationRIdUpdateData, [Relationship]) presentationToRels pres@(Presentation _ slides) = do - mySlideRels <- mapM slideToPresRel slides - let notesMasterRels = - [Relationship { relId = length mySlideRels + 2 - , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster" - , relTarget = "notesMasters/notesMaster1.xml" - } | presHasSpeakerNotes pres] - insertedRels = mySlideRels <> notesMasterRels - rels <- getRels - -- we remove the slide rels and the notesmaster (if it's - -- there). We'll put these back in ourselves, if necessary. - let relsWeKeep = filter + rels <- getPresentationRels + + -- We want to make room for the slides in the id space. We'll assume the slide + -- masters come first (this seems to be what PowerPoint does by default, and + -- is true of the reference doc), and we'll put the slides next. So we find + -- the starting rId for the slides by finding the maximum rId for the masters + -- and adding 1. + -- + -- Then: + -- 1. We look to see what the minimum rId which is greater than or equal to + -- the minimum slide rId is, in the rels we're keeping from the reference + -- doc (i.e. the minimum rId which might overlap with the slides). + -- 2. We increase this minimum overlapping rId to 1 higher than the last slide + -- rId (or the notesMaster rel, if we're including one), and increase all + -- rIds higher than this minimum by the same amount. + + let masterRels = filter (T.isSuffixOf "slideMaster" . relType) rels + slideStartId = maybe 1 ((+ 1) . maximum . fmap relId) (nonEmpty masterRels) + -- we remove the slide rels and the notesmaster (if it's + -- there). We'll put these back in ourselves, if necessary. + relsWeKeep = filter (\r -> relType r /= "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" && relType r /= "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") rels - -- We want to make room for the slides in the id space. The slides - -- will start at Id2 (since Id1 is for the slide master). There are - -- two slides in the data file, but that might change in the future, - -- so we will do this: - -- - -- 1. We look to see what the minimum relWithoutSlide id (greater than 1) is. - -- 2. We add the difference between this and the number of slides to - -- all relWithoutSlide rels (unless they're 1) - -- 3. If we have a notesmaster slide, we make space for that as well. + minOverlappingRel = maybe 0 minimum + (nonEmpty (filter (slideStartId <=) + (relId <$> relsWeKeep))) - let minRelNotOne = maybe 0 minimum $ nonEmpty - $ filter (1 <) $ map relId relsWeKeep + mySlideRels <- mapM (slideToPresRel slideStartId) slides - modifyRelNum :: Int -> Int - modifyRelNum 1 = 1 - modifyRelNum n = n - minRelNotOne + 2 + length insertedRels + let notesMasterRels = + [Relationship { relId = slideStartId + length mySlideRels + , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster" + , relTarget = "notesMasters/notesMaster1.xml" + } | presHasSpeakerNotes pres] + insertedRels = mySlideRels <> notesMasterRels + newRIdBounds = (slideStartId, slideStartId + length insertedRels - 1) + updateRId = updatePresentationRId (minOverlappingRel, newRIdBounds) - relsWeKeep' = map (\r -> r{relId = modifyRelNum $ relId r}) relsWeKeep + relsWeKeep' = map (\r -> r{relId = updateRId $ relId r}) relsWeKeep - return $ insertedRels <> relsWeKeep' + return ((minOverlappingRel, newRIdBounds), insertedRels <> relsWeKeep') -- We make this ourselves, in case there's a thumbnail in the one from -- the template. @@ -1488,10 +2254,14 @@ relsToElement rels = mknode "Relationships" [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] (map relToElement rels) -presentationToRelsEntry :: PandocMonad m => Presentation -> P m Entry +presentationToRelsEntry :: + PandocMonad m => + Presentation -> + P m (PresentationRIdUpdateData, Entry) presentationToRelsEntry pres = do - rels <- presentationToRels pres - elemToEntry "ppt/_rels/presentation.xml.rels" $ relsToElement rels + (presentationRIdUpdateData, rels) <- presentationToRels pres + element <- elemToEntry "ppt/_rels/presentation.xml.rels" $ relsToElement rels + pure (presentationRIdUpdateData, element) elemToEntry :: PandocMonad m => FilePath -> Element -> P m Entry elemToEntry fp element = do @@ -1522,7 +2292,7 @@ slideToSpeakerNotesEntry slide = do _ -> return Nothing slideToSpeakerNotesRelElement :: PandocMonad m => Slide -> P m (Maybe Element) -slideToSpeakerNotesRelElement (Slide _ _ (SpeakerNotes [])) = return Nothing +slideToSpeakerNotesRelElement (Slide _ _ (SpeakerNotes []) _) = return Nothing slideToSpeakerNotesRelElement slide@Slide{} = do idNum <- slideNum slide return $ Just $ @@ -1606,11 +2376,16 @@ speakerNotesSlideRelElement slide = do slideToSlideRelElement :: PandocMonad m => Slide -> P m Element slideToSlideRelElement slide = do idNum <- slideNum slide - let target = case slide of - (Slide _ MetadataSlide{} _) -> "../slideLayouts/slideLayout1.xml" - (Slide _ TitleSlide{} _) -> "../slideLayouts/slideLayout3.xml" - (Slide _ ContentSlide{} _) -> "../slideLayouts/slideLayout2.xml" - (Slide _ TwoColumnSlide{} _) -> "../slideLayouts/slideLayout4.xml" + target <- flip fmap getSlideLayouts $ + T.pack . ("../slideLayouts/" <>) . takeFileName . + slPath . case slide of + (Slide _ MetadataSlide{} _ _) -> metadata + (Slide _ TitleSlide{} _ _) -> title + (Slide _ ContentSlide{} _ _) -> content + (Slide _ TwoColumnSlide{} _ _) -> twoColumn + (Slide _ ComparisonSlide{} _ _) -> comparison + (Slide _ ContentWithCaptionSlide{} _ _) -> contentWithCaption + (Slide _ BlankSlide _ _) -> blank speakerNotesRels <- maybeToList <$> speakerNotesSlideRelElement slide @@ -1632,24 +2407,37 @@ slideToSlideRelElement slide = do , ("Target", target)] () ] <> speakerNotesRels <> linkRels <> mediaRels) -slideToSldIdElement :: PandocMonad m => Slide -> P m Element -slideToSldIdElement slide = do +slideToSldIdElement :: + PandocMonad m => + MinimumRId -> + Slide -> + P m Element +slideToSldIdElement minimumSlideRId slide = do n <- slideNum slide let id' = tshow $ n + 255 - rId <- slideToRelId slide + rId <- slideToRelId minimumSlideRId slide return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] () -presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element -presentationToSldIdLst (Presentation _ slides) = do - ids <- mapM slideToSldIdElement slides +presentationToSldIdLst :: + PandocMonad m => + MinimumRId -> + Presentation -> + P m Element +presentationToSldIdLst minimumSlideRId (Presentation _ slides) = do + ids <- mapM (slideToSldIdElement minimumSlideRId) slides return $ mknode "p:sldIdLst" [] ids -presentationToPresentationElement :: PandocMonad m => Presentation -> P m Element -presentationToPresentationElement pres@(Presentation _ slds) = do +presentationToPresentationElement :: + PandocMonad m => + PresentationRIdUpdateData -> + Presentation -> + P m Element +presentationToPresentationElement presentationUpdateRIdData pres = do + let (_, (minSlideRId, maxSlideRId)) = presentationUpdateRIdData refArchive <- asks envRefArchive distArchive <- asks envDistArchive element <- parseXml refArchive distArchive "ppt/presentation.xml" - sldIdLst <- presentationToSldIdLst pres + sldIdLst <- presentationToSldIdLst minSlideRId pres let modifySldIdLst :: Content -> Content modifySldIdLst (Elem e) = case elName e of @@ -1657,11 +2445,11 @@ presentationToPresentationElement pres@(Presentation _ slds) = do _ -> Elem e modifySldIdLst ct = ct - notesMasterRId = length slds + 2 + notesMasterRId = maxSlideRId notesMasterElem = mknode "p:notesMasterIdLst" [] [ mknode - "p:NotesMasterId" + "p:notesMasterId" [("r:id", "rId" <> tshow notesMasterRId)] () ] @@ -1692,16 +2480,34 @@ presentationToPresentationElement pres@(Presentation _ slds) = do then concatMap insertNotesMaster' else id + updateRIds :: Content -> Content + updateRIds (Elem el) = + Elem (el { elAttribs = fmap updateRIdAttribute (elAttribs el) + , elContent = fmap updateRIds (elContent el) + }) + updateRIds content = content + + updateRIdAttribute :: XML.Attr -> XML.Attr + updateRIdAttribute attr = fromMaybe attr $ do + oldValue <- case attrKey attr of + QName "id" _ (Just "r") -> + T.stripPrefix "rId" (attrVal attr) + >>= fmap fromIntegral . readTextAsInteger + _ -> Nothing + let newValue = updatePresentationRId presentationUpdateRIdData oldValue + pure attr {attrVal = "rId" <> T.pack (show newValue)} + newContent = insertNotesMaster $ removeUnwantedMaster $ - map modifySldIdLst $ + (modifySldIdLst . updateRIds) <$> elContent element return $ element{elContent = newContent} -presentationToPresEntry :: PandocMonad m => Presentation -> P m Entry -presentationToPresEntry pres = presentationToPresentationElement pres >>= - elemToEntry "ppt/presentation.xml" +presentationToPresEntry :: PandocMonad m => PresentationRIdUpdateData -> Presentation -> P m Entry +presentationToPresEntry presentationRIdUpdateData pres = + presentationToPresentationElement presentationRIdUpdateData pres >>= + elemToEntry "ppt/presentation.xml" -- adapted from the Docx writer docPropsElement :: PandocMonad m => DocProps -> P m Element @@ -1920,3 +2726,102 @@ autoNumAttrs (startNum, numStyle, numDelim) = OneParen -> "ParenR" TwoParens -> "ParenBoth" _ -> "Period" + +-- | The XML required to insert an "appear" animation for each of the given +-- groups of paragraphs, identified by index. +incrementalAnimation :: + -- | (ShapeId, [(startParagraphIndex, endParagraphIndex)]) + NonEmpty (ShapeId, NonEmpty (Integer, Integer)) -> + Element +incrementalAnimation indices = mknode "p:timing" [] [tnLst, bldLst] + where + triples :: NonEmpty (ShapeId, Integer, Integer) + triples = do + (shapeId, paragraphIds) <- indices + (start, end) <- paragraphIds + pure (shapeId, start, end) + + tnLst = mknode "p:tnLst" [] + $ mknode "p:par" [] + $ mknode "p:cTn" [ ("id", "1") + , ("dur", "indefinite") + , ("restart", "never") + , ("nodeType", "tmRoot") + ] + $ mknode "p:childTnLst" [] + $ mknode "p:seq" [ ("concurrent", "1") + , ("nextAc", "seek") + ] + [ mknode "p:cTn" [ ("id", "2") + , ("dur", "indefinite") + , ("nodeType", "mainSeq") + ] + $ mknode "p:childTnLst" [] + $ zipWith makePar [3, 7 ..] (toList triples) + , mknode "p:prevCondLst" [] + $ mknode "p:cond" ([("evt", "onPrev"), ("delay", "0")]) + $ mknode "p:tgtEl" [] + $ mknode "p:sldTgt" [] () + , mknode "p:nextCondLst" [] + $ mknode "p:cond" ([("evt", "onNext"), ("delay", "0")]) + $ mknode "p:tgtEl" [] + $ mknode "p:sldTgt" [] () + ] + bldLst = mknode "p:bldLst" [] + [ mknode "p:bldP" [ ("spid", T.pack (show shapeId)) + , ("grpId", "0") + , ("uiExpand", "1") + , ("build", "p") + ] + () | (shapeId, _) <- toList indices + ] + + makePar :: Integer -> (ShapeId, Integer, Integer) -> Element + makePar nextId (shapeId, start, end) = + mknode "p:par" [] + $ mknode "p:cTn" [("id", T.pack (show nextId)), ("fill", "hold")] + [ mknode "p:stCondLst" [] + $ mknode "p:cond" [("delay", "indefinite")] () + , mknode "p:childTnLst" [] + $ mknode "p:par" [] + $ mknode "p:cTn" [ ("id", T.pack (show (nextId + 1))) + , ("fill", "hold") + ] + [ mknode "p:stCondLst" [] + $ mknode "p:cond" [("delay", "0")] () + , mknode "p:childTnLst" [] + $ mknode "p:par" [] + $ mknode "p:cTn" [ ("id", T.pack (show (nextId + 2))) + , ("presetID", "1") + , ("presetClass", "entr") + , ("presetSubtype", "0") + , ("fill", "hold") + , ("grpId", "0") + , ("nodeType", "clickEffect") + ] + [ mknode "p:stCondLst" [] + $ mknode "p:cond" [("delay", "0")] () + , mknode "p:childTnLst" [] + $ mknode "p:set" [] + [ mknode "p:cBhvr" [] + [ mknode "p:cTn" [ ("id", T.pack (show (nextId + 3))) + , ("dur", "1") + , ("fill", "hold") + ] + $ mknode "p:stCondLst" [] + $ mknode "p:cond" [("delay", "0")] () + , mknode "p:tgtEl" [] + $ mknode "p:spTgt" [("spid", T.pack (show shapeId))] + $ mknode "p:txEl" [] + $ mknode "p:pRg" [ ("st", T.pack (show start)) + , ("end", T.pack (show end))] + () + , mknode "p:attrNameLst" [] + $ mknode "p:attrName" [] ("style.visibility" :: Text) + ] + , mknode "p:to" [] + $ mknode "p:strVal" [("val", "visible")] () + ] + ] + ] + ] diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index 9246a93e9..fd6b83120 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -1,7 +1,9 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | Module : Text.Pandoc.Writers.Powerpoint.Presentation Copyright : Copyright (C) 2017-2020 Jesse Rosenthal @@ -53,7 +55,6 @@ import Text.Pandoc.Slides (getSlideLevel) import Text.Pandoc.Options import Text.Pandoc.Logging import Text.Pandoc.Walk -import Data.Time (UTCTime) import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element" import Text.Pandoc.Shared (tshow) import Text.Pandoc.Writers.Shared (lookupMetaInlines, lookupMetaBlocks @@ -61,11 +62,13 @@ import Text.Pandoc.Writers.Shared (lookupMetaInlines, lookupMetaBlocks , toLegacyTable) import qualified Data.Map as M import qualified Data.Set as S -import Data.Maybe (maybeToList, fromMaybe) +import Data.Maybe (maybeToList, fromMaybe, listToMaybe, isNothing) import Text.Pandoc.Highlighting import qualified Data.Text as T import Control.Applicative ((<|>)) import Skylighting +import Data.Bifunctor (bimap) +import Data.Char (isSpace) data WriterEnv = WriterEnv { envMetadata :: Meta , envRunProps :: RunProps @@ -77,6 +80,8 @@ data WriterEnv = WriterEnv { envMetadata :: Meta , envInNoteSlide :: Bool , envCurSlideId :: SlideId , envInSpeakerNotes :: Bool + , envInIncrementalDiv :: Maybe InIncrementalDiv + , envInListInBlockQuote :: Bool } deriving (Show) @@ -91,6 +96,8 @@ instance Default WriterEnv where , envInNoteSlide = False , envCurSlideId = SlideId "Default" , envInSpeakerNotes = False + , envInIncrementalDiv = Nothing + , envInListInBlockQuote = False } @@ -111,6 +118,23 @@ instance Default WriterState where , stSpeakerNotes = mempty } +data InIncrementalDiv + = InIncremental + -- ^ The current content is contained within an "incremental" div. + | InNonIncremental + -- ^ The current content is contained within a "nonincremental" div. + deriving (Show) + +listShouldBeIncremental :: Pres Bool +listShouldBeIncremental = do + incrementalOption <- asks (writerIncremental . envOpts) + inIncrementalDiv <- asks envInIncrementalDiv + inBlockQuote <- asks envInListInBlockQuote + let toBoolean = (\case InIncremental -> True + InNonIncremental -> False) + maybeInvert = if inBlockQuote then not else id + pure (maybeInvert (maybe incrementalOption toBoolean inIncrementalDiv)) + metadataSlideId :: SlideId metadataSlideId = SlideId "Metadata" @@ -168,7 +192,7 @@ data DocProps = DocProps { dcTitle :: Maybe T.Text , dcKeywords :: Maybe [T.Text] , dcDescription :: Maybe T.Text , cpCategory :: Maybe T.Text - , dcCreated :: Maybe UTCTime + , dcDate :: Maybe T.Text , customProperties :: Maybe [(T.Text, T.Text)] } deriving (Show, Eq) @@ -176,6 +200,7 @@ data DocProps = DocProps { dcTitle :: Maybe T.Text data Slide = Slide { slideId :: SlideId , slideLayout :: Layout , slideSpeakerNotes :: SpeakerNotes + , slideBackgroundImage :: Maybe FilePath } deriving (Show, Eq) newtype SlideId = SlideId T.Text @@ -195,9 +220,15 @@ data Layout = MetadataSlide [ParaElem] [ParaElem] [[ParaElem]] [ParaElem] -- heading content | TwoColumnSlide [ParaElem] [Shape] [Shape] -- heading left right + | ComparisonSlide [ParaElem] ([Shape], [Shape]) ([Shape], [Shape]) + -- heading left@(text, content) right@(text, content) + | ContentWithCaptionSlide [ParaElem] [Shape] [Shape] + -- heading text content + | BlankSlide deriving (Show, Eq) -data Shape = Pic PicProps FilePath [ParaElem] +data Shape = Pic PicProps FilePath T.Text [ParaElem] + -- title alt-text | GraphicFrame [Graphic] [ParaElem] | TextBox [Paragraph] | RawOOXMLShape T.Text @@ -218,7 +249,7 @@ data Graphic = Tbl TableProps [TableCell] [[TableCell]] data Paragraph = Paragraph { paraProps :: ParaProps - , paraElems :: [ParaElem] + , paraElems :: [ParaElem] } deriving (Show, Eq) data BulletType = Bullet @@ -235,6 +266,7 @@ data ParaProps = ParaProps { pPropMarginLeft :: Maybe Pixels , pPropAlign :: Maybe Algnment , pPropSpaceBefore :: Maybe Pixels , pPropIndent :: Maybe Pixels + , pPropIncremental :: Bool } deriving (Show, Eq) instance Default ParaProps where @@ -245,6 +277,7 @@ instance Default ParaProps where , pPropAlign = Nothing , pPropSpaceBefore = Nothing , pPropIndent = Just 0 + , pPropIncremental = False } newtype TeXString = TeXString {unTeXString :: T.Text} @@ -315,7 +348,7 @@ instance Default PicProps where -------------------------------------------------- inlinesToParElems :: [Inline] -> Pres [ParaElem] -inlinesToParElems ils = concatMapM inlineToParElems ils +inlinesToParElems = concatMapM inlineToParElems inlineToParElems :: Inline -> Pres [ParaElem] inlineToParElems (Str s) = do @@ -440,7 +473,8 @@ blockToParagraphs (CodeBlock attr str) = do -- (BlockQuote List) as a list to maintain compatibility with other -- formats. blockToParagraphs (BlockQuote (blk : blks)) | isListType blk = do - ps <- blockToParagraphs blk + ps <- local (\env -> env { envInListInBlockQuote = True }) + (blockToParagraphs blk) ps' <- blockToParagraphs $ BlockQuote blks return $ ps ++ ps' blockToParagraphs (BlockQuote blks) = @@ -465,25 +499,26 @@ blockToParagraphs (Header _ (ident, _, _) ils) = do return [Paragraph def{pPropSpaceBefore = Just 30} parElems] blockToParagraphs (BulletList blksLst) = do pProps <- asks envParaProps - let lvl = pPropLevel pProps + incremental <- listShouldBeIncremental local (\env -> env{ envInList = True - , envParaProps = pProps{ pPropLevel = lvl + 1 - , pPropBullet = Just Bullet + , envParaProps = pProps{ pPropBullet = Just Bullet , pPropMarginLeft = Nothing , pPropIndent = Nothing + , pPropIncremental = incremental }}) $ - concatMapM multiParBullet blksLst + concatMapM multiParList blksLst blockToParagraphs (OrderedList listAttr blksLst) = do pProps <- asks envParaProps - let lvl = pPropLevel pProps + incremental <- listShouldBeIncremental local (\env -> env{ envInList = True - , envParaProps = pProps{ pPropLevel = lvl + 1 - , pPropBullet = Just (AutoNumbering listAttr) + , envParaProps = pProps{ pPropBullet = Just (AutoNumbering listAttr) , pPropMarginLeft = Nothing , pPropIndent = Nothing + , pPropIncremental = incremental }}) $ - concatMapM multiParBullet blksLst + concatMapM multiParList blksLst blockToParagraphs (DefinitionList entries) = do + incremental <- listShouldBeIncremental let go :: ([Inline], [[Block]]) -> Pres [Paragraph] go (ils, blksLst) = do term <-blockToParagraphs $ Para [Strong ils] @@ -491,20 +526,35 @@ blockToParagraphs (DefinitionList entries) = do -- blockquote. We can extend this further later. definition <- concatMapM (blockToParagraphs . BlockQuote) blksLst return $ term ++ definition - concatMapM go entries -blockToParagraphs (Div _ blks) = concatMapM blockToParagraphs blks + local (\env -> env {envParaProps = + (envParaProps env) {pPropIncremental = incremental}}) + $ concatMapM go entries +blockToParagraphs (Div (_, classes, _) blks) = let + hasIncremental = "incremental" `elem` classes + hasNonIncremental = "nonincremental" `elem` classes + incremental = if | hasIncremental -> Just InIncremental + | hasNonIncremental -> Just InNonIncremental + | otherwise -> Nothing + addIncremental env = env { envInIncrementalDiv = incremental } + in local addIncremental (concatMapM blockToParagraphs blks) blockToParagraphs blk = do addLogMessage $ BlockNotRendered blk return [] --- Make sure the bullet env gets turned off after the first para. -multiParBullet :: [Block] -> Pres [Paragraph] -multiParBullet [] = return [] -multiParBullet (b:bs) = do +-- | Make sure the bullet env gets turned off after the first para. +multiParList :: [Block] -> Pres [Paragraph] +multiParList [] = return [] +multiParList (b:bs) = do pProps <- asks envParaProps p <- blockToParagraphs b - ps <- local (\env -> env{envParaProps = pProps{pPropBullet = Nothing}}) $ - concatMapM blockToParagraphs bs + let level = pPropLevel pProps + ps <- local (\env -> env + { envParaProps = pProps + { pPropBullet = Nothing + , pPropLevel = level + 1 + } + }) + $ concatMapM blockToParagraphs bs return $ p ++ ps cellToParagraphs :: Alignment -> SimpleCell -> Pres [Paragraph] @@ -525,21 +575,22 @@ rowToParagraphs algns tblCells = do mapM (uncurry cellToParagraphs) pairs withAttr :: Attr -> Shape -> Shape -withAttr attr (Pic picPr url caption) = +withAttr attr (Pic picPr url title caption) = let picPr' = picPr { picWidth = dimension Width attr , picHeight = dimension Height attr } in - Pic picPr' url caption + Pic picPr' url title caption withAttr _ sp = sp blockToShape :: Block -> Pres Shape blockToShape (Plain ils) = blockToShape (Para ils) -blockToShape (Para (il:_)) | Image attr ils (url, _) <- il = - withAttr attr . Pic def (T.unpack url) <$> inlinesToParElems ils +blockToShape (Para (il:_)) | Image attr ils (url, title) <- il = + withAttr attr . Pic def (T.unpack url) title <$> inlinesToParElems ils blockToShape (Para (il:_)) | Link _ (il':_) target <- il - , Image attr ils (url, _) <- il' = - withAttr attr . Pic def{picPropLink = Just $ ExternalTarget target} (T.unpack url) + , Image attr ils (url, title) <- il' = + withAttr attr . + Pic def{picPropLink = Just $ ExternalTarget target} (T.unpack url) title <$> inlinesToParElems ils blockToShape (Table _ blkCapt specs thead tbody tfoot) = do let (caption, algn, _, hdrCells, rows) = toLegacyTable blkCapt specs thead tbody tfoot @@ -582,7 +633,30 @@ isImage Image{} = True isImage (Link _ (Image{} : _) _) = True isImage _ = False -splitBlocks' :: [Block] -> [[Block]] -> [Block] -> Pres [[Block]] +plainOrPara :: Block -> Maybe [Inline] +plainOrPara (Plain ils) = Just ils +plainOrPara (Para ils) = Just ils +plainOrPara _ = Nothing + +notText :: Block -> Bool +notText block | startsWithImage block = True +notText Table{} = True +notText _ = False + +startsWithImage :: Block -> Bool +startsWithImage block = fromMaybe False $ do + inline <- plainOrPara block >>= listToMaybe + pure (isImage inline) + +-- | Group blocks into a number of "splits" +splitBlocks' :: + -- | Blocks so far in the current split + [Block] -> + -- | Splits so far + [[Block]] -> + -- | All remaining blocks + [Block] -> + Pres [[Block]] splitBlocks' cur acc [] = return $ acc ++ ([cur | not (null cur)]) splitBlocks' cur acc (HorizontalRule : blks) = splitBlocks' [] (acc ++ ([cur | not (null cur)])) blks @@ -602,25 +676,31 @@ splitBlocks' cur acc (Para (il:ils) : blks) | isImage il = do then span isNotesDiv blks else ([], blks) case cur of - [Header n _ _] | n == slideLevel -> + [Header n _ _] | n == slideLevel || slideLevel == 0 -> splitBlocks' [] (acc ++ [cur ++ [Para [il]] ++ nts]) (if null ils then blks' else Para ils : blks') _ -> splitBlocks' [] - (acc ++ ([cur | not (null cur)]) ++ [Para [il] : nts]) + (if any notText cur + then acc ++ ([cur | not (null cur)]) ++ [Para [il] : nts] + else acc ++ [cur ++ [Para [il]] ++ nts]) (if null ils then blks' else Para ils : blks') splitBlocks' cur acc (tbl@Table{} : blks) = do slideLevel <- asks envSlideLevel let (nts, blks') = span isNotesDiv blks case cur of - [Header n _ _] | n == slideLevel -> + [Header n _ _] | n == slideLevel || slideLevel == 0 -> splitBlocks' [] (acc ++ [cur ++ [tbl] ++ nts]) blks' - _ -> splitBlocks' [] (acc ++ ([cur | not (null cur)]) ++ [tbl : nts]) blks' + _ -> splitBlocks' [] + (if any notText cur + then acc ++ ([cur | not (null cur)]) ++ [tbl : nts] + else acc ++ ([cur ++ [tbl] ++ nts])) + blks' splitBlocks' cur acc (d@(Div (_, classes, _) _): blks) | "columns" `elem` classes = do slideLevel <- asks envSlideLevel let (nts, blks') = span isNotesDiv blks case cur of - [Header n _ _] | n == slideLevel -> + [Header n _ _] | n == slideLevel || slideLevel == 0 -> splitBlocks' [] (acc ++ [cur ++ [d] ++ nts]) blks' _ -> splitBlocks' [] (acc ++ ([cur | not (null cur)]) ++ [d : nts]) blks' splitBlocks' cur acc (blk : blks) = splitBlocks' (cur ++ [blk]) acc blks @@ -628,63 +708,96 @@ splitBlocks' cur acc (blk : blks) = splitBlocks' (cur ++ [blk]) acc blks splitBlocks :: [Block] -> Pres [[Block]] splitBlocks = splitBlocks' [] [] +-- | Assuming the slide title is already handled, convert these blocks to the +-- body content for the slide. +bodyBlocksToSlide :: Int -> [Block] -> SpeakerNotes -> Pres Slide +bodyBlocksToSlide _ (blk : blks) spkNotes + | Div (_, classes, _) divBlks <- blk + , "columns" `elem` classes + , Div (_, clsL, _) blksL : Div (_, clsR, _) blksR : remaining <- divBlks + , "column" `elem` clsL, "column" `elem` clsR = do + mapM_ (addLogMessage . BlockNotRendered) (blks ++ remaining) + let mkTwoColumn left right = do + blksL' <- join . take 1 <$> splitBlocks left + blksR' <- join . take 1 <$> splitBlocks right + shapesL <- blocksToShapes blksL' + shapesR <- blocksToShapes blksR' + sldId <- asks envCurSlideId + return $ Slide + sldId + (TwoColumnSlide [] shapesL shapesR) + spkNotes + Nothing + let mkComparison blksL1 blksL2 blksR1 blksR2 = do + shapesL1 <- blocksToShapes blksL1 + shapesL2 <- blocksToShapes blksL2 + shapesR1 <- blocksToShapes blksR1 + shapesR2 <- blocksToShapes blksR2 + sldId <- asks envCurSlideId + return $ Slide + sldId + (ComparisonSlide [] (shapesL1, shapesL2) (shapesR1, shapesR2)) + spkNotes + Nothing + let (blksL1, blksL2) = break notText blksL + (blksR1, blksR2) = break notText blksR + if (any null [blksL1, blksL2]) && (any null [blksR1, blksR2]) + then mkTwoColumn blksL blksR + else mkComparison blksL1 blksL2 blksR1 blksR2 +bodyBlocksToSlide _ (blk : blks) spkNotes = do + sldId <- asks envCurSlideId + inNoteSlide <- asks envInNoteSlide + let mkSlide s = + Slide sldId s spkNotes Nothing + if inNoteSlide + then mkSlide . ContentSlide [] <$> + forceFontSize noteSize (blocksToShapes (blk : blks)) + else let + contentOrBlankSlide = + if makesBlankSlide (blk : blks) + then pure (mkSlide BlankSlide) + else mkSlide . ContentSlide [] <$> blocksToShapes (blk : blks) + in case break notText (blk : blks) of + ([], _) -> contentOrBlankSlide + (_, []) -> contentOrBlankSlide + (textBlocks, contentBlocks) -> do + textShapes <- blocksToShapes textBlocks + contentShapes <- blocksToShapes contentBlocks + return (mkSlide (ContentWithCaptionSlide [] textShapes contentShapes)) +bodyBlocksToSlide _ [] spkNotes = do + sldId <- asks envCurSlideId + return $ + Slide + sldId + BlankSlide + spkNotes + Nothing + blocksToSlide' :: Int -> [Block] -> SpeakerNotes -> Pres Slide -blocksToSlide' lvl (Header n (ident, _, _) ils : blks) spkNotes +blocksToSlide' lvl (Header n (ident, _, attributes) ils : blks) spkNotes | n < lvl = do registerAnchorId ident sldId <- asks envCurSlideId hdr <- inlinesToParElems ils - return $ Slide sldId (TitleSlide hdr) spkNotes - | n == lvl = do + return $ Slide sldId (TitleSlide hdr) spkNotes backgroundImage + | n == lvl || lvl == 0 = do registerAnchorId ident hdr <- inlinesToParElems ils -- Now get the slide without the header, and then add the header -- in. - slide <- blocksToSlide' lvl blks spkNotes + slide <- bodyBlocksToSlide lvl blks spkNotes let layout = case slideLayout slide of ContentSlide _ cont -> ContentSlide hdr cont TwoColumnSlide _ contL contR -> TwoColumnSlide hdr contL contR + ComparisonSlide _ contL contR -> ComparisonSlide hdr contL contR + ContentWithCaptionSlide _ text content -> ContentWithCaptionSlide hdr text content + BlankSlide -> if all inlineIsBlank ils then BlankSlide else ContentSlide hdr [] layout' -> layout' - return $ slide{slideLayout = layout} -blocksToSlide' _ (blk : blks) spkNotes - | Div (_, classes, _) divBlks <- blk - , "columns" `elem` classes - , Div (_, clsL, _) blksL : Div (_, clsR, _) blksR : remaining <- divBlks - , "column" `elem` clsL, "column" `elem` clsR = do - mapM_ (addLogMessage . BlockNotRendered) (blks ++ remaining) - mbSplitBlksL <- splitBlocks blksL - mbSplitBlksR <- splitBlocks blksR - let blksL' = case mbSplitBlksL of - bs : _ -> bs - [] -> [] - let blksR' = case mbSplitBlksR of - bs : _ -> bs - [] -> [] - shapesL <- blocksToShapes blksL' - shapesR <- blocksToShapes blksR' - sldId <- asks envCurSlideId - return $ Slide - sldId - (TwoColumnSlide [] shapesL shapesR) - spkNotes -blocksToSlide' _ (blk : blks) spkNotes = do - inNoteSlide <- asks envInNoteSlide - shapes <- if inNoteSlide - then forceFontSize noteSize $ blocksToShapes (blk : blks) - else blocksToShapes (blk : blks) - sldId <- asks envCurSlideId - return $ - Slide - sldId - (ContentSlide [] shapes) - spkNotes -blocksToSlide' _ [] spkNotes = do - sldId <- asks envCurSlideId - return $ - Slide - sldId - (ContentSlide [] []) - spkNotes + return $ slide{slideLayout = layout, slideBackgroundImage = backgroundImage} + where + backgroundImage = T.unpack <$> (lookup "background-image" attributes + <|> lookup "data-background-image" attributes) +blocksToSlide' lvl blks spkNotes = bodyBlocksToSlide lvl blks spkNotes blockToSpeakerNotes :: Block -> Pres SpeakerNotes blockToSpeakerNotes (Div (_, ["notes"], _) blks) = @@ -764,12 +877,13 @@ getMetaSlide = do metadataSlideId (MetadataSlide title subtitle authors date) mempty + Nothing addSpeakerNotesToMetaSlide :: Slide -> [Block] -> Pres (Slide, [Block]) -addSpeakerNotesToMetaSlide (Slide sldId layout@MetadataSlide{} spkNotes) blks = +addSpeakerNotesToMetaSlide (Slide sldId layout@MetadataSlide{} spkNotes backgroundImage) blks = do let (ntsBlks, blks') = span isNotesDiv blks spkNotes' <- mconcat <$> mapM blockToSpeakerNotes ntsBlks - return (Slide sldId layout (spkNotes <> spkNotes'), blks') + return (Slide sldId layout (spkNotes <> spkNotes') backgroundImage, blks') addSpeakerNotesToMetaSlide sld blks = return (sld, blks) makeTOCSlide :: [Block] -> Pres Slide @@ -805,7 +919,7 @@ applyToParagraph f para = do return $ para {paraElems = paraElems'} applyToShape :: Monad m => (ParaElem -> m ParaElem) -> Shape -> m Shape -applyToShape f (Pic pPr fp pes) = Pic pPr fp <$> mapM f pes +applyToShape f (Pic pPr fp title pes) = Pic pPr fp title <$> mapM f pes applyToShape f (GraphicFrame gfx pes) = GraphicFrame gfx <$> mapM f pes applyToShape f (TextBox paras) = TextBox <$> mapM (applyToParagraph f) paras applyToShape _ (RawOOXMLShape str) = return $ RawOOXMLShape str @@ -827,6 +941,19 @@ applyToLayout f (TwoColumnSlide hdr contentL contentR) = do contentL' <- mapM (applyToShape f) contentL contentR' <- mapM (applyToShape f) contentR return $ TwoColumnSlide hdr' contentL' contentR' +applyToLayout f (ComparisonSlide hdr (contentL1, contentL2) (contentR1, contentR2)) = do + hdr' <- mapM f hdr + contentL1' <- mapM (applyToShape f) contentL1 + contentL2' <- mapM (applyToShape f) contentL2 + contentR1' <- mapM (applyToShape f) contentR1 + contentR2' <- mapM (applyToShape f) contentR2 + return $ ComparisonSlide hdr' (contentL1', contentL2') (contentR1', contentR2') +applyToLayout f (ContentWithCaptionSlide hdr textShapes contentShapes) = do + hdr' <- mapM f hdr + textShapes' <- mapM (applyToShape f) textShapes + contentShapes' <- mapM (applyToShape f) contentShapes + return $ ContentWithCaptionSlide hdr' textShapes' contentShapes' +applyToLayout _ BlankSlide = pure BlankSlide applyToSlide :: Monad m => (ParaElem -> m ParaElem) -> Slide -> m Slide applyToSlide f slide = do @@ -878,9 +1005,72 @@ emptyLayout layout = case layout of all emptyParaElem hdr && all emptyShape shapes1 && all emptyShape shapes2 + ComparisonSlide hdr (shapesL1, shapesL2) (shapesR1, shapesR2) -> + all emptyParaElem hdr && + all emptyShape shapesL1 && + all emptyShape shapesL2 && + all emptyShape shapesR1 && + all emptyShape shapesR2 + ContentWithCaptionSlide hdr textShapes contentShapes -> + all emptyParaElem hdr && + all emptyShape textShapes && + all emptyShape contentShapes + BlankSlide -> False + emptySlide :: Slide -> Bool -emptySlide (Slide _ layout notes) = (notes == mempty) && emptyLayout layout +emptySlide (Slide _ layout notes backgroundImage) + = (notes == mempty) + && emptyLayout layout + && isNothing backgroundImage + +makesBlankSlide :: [Block] -> Bool +makesBlankSlide = all blockIsBlank + +blockIsBlank :: Block -> Bool +blockIsBlank + = \case + Plain ins -> all inlineIsBlank ins + Para ins -> all inlineIsBlank ins + LineBlock inss -> all (all inlineIsBlank) inss + CodeBlock _ txt -> textIsBlank txt + RawBlock _ txt -> textIsBlank txt + BlockQuote bls -> all blockIsBlank bls + OrderedList _ blss -> all (all blockIsBlank) blss + BulletList blss -> all (all blockIsBlank) blss + DefinitionList ds -> all (uncurry (&&) . bimap (all inlineIsBlank) (all (all blockIsBlank))) ds + Header _ _ ils -> all inlineIsBlank ils + HorizontalRule -> True + Table{} -> False + Div _ bls -> all blockIsBlank bls + Null -> True + +textIsBlank :: T.Text -> Bool +textIsBlank = T.all isSpace + +inlineIsBlank :: Inline -> Bool +inlineIsBlank + = \case + (Str txt) -> textIsBlank txt + (Emph ins) -> all inlineIsBlank ins + (Underline ins) -> all inlineIsBlank ins + (Strong ins) -> all inlineIsBlank ins + (Strikeout ins) -> all inlineIsBlank ins + (Superscript ins) -> all inlineIsBlank ins + (Subscript ins) -> all inlineIsBlank ins + (SmallCaps ins) -> all inlineIsBlank ins + (Quoted _ ins) -> all inlineIsBlank ins + (Cite _ _) -> False + (Code _ txt) -> textIsBlank txt + Space -> True + SoftBreak -> True + LineBreak -> True + (Math _ txt) -> textIsBlank txt + (RawInline _ txt) -> textIsBlank txt + (Link _ ins (t1, t2)) -> all inlineIsBlank ins && textIsBlank t1 && textIsBlank t2 + (Image _ ins (t1, t2)) -> all inlineIsBlank ins && textIsBlank t1 && textIsBlank t2 + (Note bls) -> all blockIsBlank bls + (Span _ ins) -> all inlineIsBlank ins blocksToPresentationSlides :: [Block] -> Pres [Slide] blocksToPresentationSlides blks = do @@ -960,7 +1150,11 @@ metaToDocProps meta = , dcKeywords = keywords , dcDescription = description , cpCategory = Shared.stringify <$> lookupMeta "category" meta - , dcCreated = Nothing + , dcDate = + let t = Shared.stringify (docDate meta) + in if T.null t + then Nothing + else Just t , customProperties = customProperties' } diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 983ef412a..08733a792 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -219,28 +219,34 @@ blockToRST (Div (ident,classes,_kvs) bs) = do nest 3 contents $$ blankline blockToRST (Plain inlines) = inlineListToRST inlines -blockToRST (Para [Image attr txt (src, rawtit)]) = do +blockToRST (SimpleFigure attr txt (src, tit)) = do description <- inlineListToRST txt dims <- imageDimsToRST attr - -- title beginning with fig: indicates that the image is a figure - let (isfig, tit) = case T.stripPrefix "fig:" rawtit of - Nothing -> (False, rawtit) - Just tit' -> (True, tit') - let fig | isfig = "figure:: " <> literal src - | otherwise = "image:: " <> literal src - alt | isfig = ":alt: " <> if T.null tit then description else literal tit - | null txt = empty + let fig = "figure:: " <> literal src + alt = ":alt: " <> if T.null tit then description else literal tit + capt = description + (_,cls,_) = attr + classes = case cls of + [] -> empty + ["align-right"] -> ":align: right" + ["align-left"] -> ":align: left" + ["align-center"] -> ":align: center" + _ -> ":figclass: " <> literal (T.unwords cls) + return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline +blockToRST (Para [Image attr txt (src, _)]) = do + description <- inlineListToRST txt + dims <- imageDimsToRST attr + let fig = "image:: " <> literal src + alt | null txt = empty | otherwise = ":alt: " <> description - capt | isfig = description - | otherwise = empty + capt = empty (_,cls,_) = attr classes = case cls of [] -> empty ["align-right"] -> ":align: right" ["align-left"] -> ":align: left" ["align-center"] -> ":align: center" - _ | isfig -> ":figclass: " <> literal (T.unwords cls) - | otherwise -> ":class: " <> literal (T.unwords cls) + _ -> ":class: " <> literal (T.unwords cls) return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline blockToRST (Para inlines) | LineBreak `elem` inlines = @@ -270,7 +276,12 @@ blockToRST (Header level (name,classes,_) inlines) = do let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) let border = literal $ T.replicate (offset contents) $ T.singleton headerChar let anchor | T.null name || name == autoId = empty - | otherwise = ".. _" <> literal name <> ":" $$ blankline + | otherwise = ".. _" <> + (if T.any (==':') name || + T.take 1 name == "_" + then "`" <> literal name <> "`" + else literal name) <> + ":" $$ blankline return $ nowrap $ anchor $$ contents $$ border $$ blankline else do let rub = "rubric:: " <> contents @@ -402,7 +413,7 @@ blockListToRST' topLevel blocks = do toClose Header{} = False toClose LineBlock{} = False toClose HorizontalRule = False - toClose (Para [Image _ _ (_,t)]) = "fig:" `T.isPrefixOf` t + toClose SimpleFigure{} = True toClose Para{} = False toClose _ = True commentSep = RawBlock "rst" "..\n\n" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 3527949b4..eeef3eaf3 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -43,10 +43,11 @@ rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError (do result <- P.fetchItem src case result of (imgdata, Just mime) - | mime == "image/jpeg" || mime == "image/png" -> do + | mime' <- T.takeWhile (/=';') mime + , mime' == "image/jpeg" || mime' == "image/png" -> do let bytes = map (T.pack . printf "%02x") $ B.unpack imgdata filetype <- - case mime of + case mime' of "image/jpeg" -> return "\\jpegblip" "image/png" -> return "\\pngblip" _ -> throwError $ @@ -64,7 +65,7 @@ rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError -- twip = 1/1440in = 1/20pt where (xpx, ypx) = sizeInPixels sz (xpt, ypt) = desiredSizeInPoints opts attr sz - let raw = "{\\pict" <> filetype <> sizeSpec <> "\\bin " <> + let raw = "{\\pict" <> filetype <> sizeSpec <> " " <> T.concat bytes <> "}" if B.null imgdata then do @@ -259,7 +260,8 @@ blockToRTF indent _ HorizontalRule = return $ blockToRTF indent alignment (Header level _ lst) = do contents <- inlinesToRTF lst return $ rtfPar indent 0 alignment $ - "\\b \\fs" <> tshow (40 - (level * 4)) <> " " <> contents + "\\outlinelevel" <> tshow (level - 1) <> + " \\b \\fs" <> tshow (40 - (level * 4)) <> " " <> contents blockToRTF indent alignment (Table _ blkCapt specs thead tbody tfoot) = do let (caption, aligns, sizes, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot caption' <- inlinesToRTF caption diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 0b7c6bee0..b23fc1341 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -36,6 +36,7 @@ module Text.Pandoc.Writers.Shared ( , toTableOfContents , endsWithPlain , toLegacyTable + , splitSentences ) where import Safe (lastMay) @@ -49,6 +50,7 @@ import Data.List.NonEmpty (NonEmpty(..), nonEmpty) import Data.Text.Conversions (FromText(..)) import qualified Data.Map as M import qualified Data.Text as T +import Data.Text (Text) import qualified Text.Pandoc.Builder as Builder import Text.Pandoc.Definition import Text.Pandoc.Options @@ -119,13 +121,13 @@ metaValueToVal _ inlineWriter (MetaInlines is) = SimpleVal <$> inlineWriter is -- | Retrieve a field value from a template context. -getField :: FromContext a b => T.Text -> Context a -> Maybe b +getField :: FromContext a b => Text -> Context a -> Maybe b getField field (Context m) = M.lookup field m >>= fromVal -- | Set a field of a template context. If the field already has a value, -- convert it into a list with the new value appended to the old value(s). -- This is a utility function to be used in preparing template contexts. -setField :: ToContext a b => T.Text -> b -> Context a -> Context a +setField :: ToContext a b => Text -> b -> Context a -> Context a setField field val (Context m) = Context $ M.insertWith combine field (toVal val) m where @@ -135,21 +137,21 @@ setField field val (Context m) = -- | Reset a field of a template context. If the field already has a -- value, the new value replaces it. -- This is a utility function to be used in preparing template contexts. -resetField :: ToContext a b => T.Text -> b -> Context a -> Context a +resetField :: ToContext a b => Text -> b -> Context a -> Context a resetField field val (Context m) = Context (M.insert field (toVal val) m) -- | Set a field of a template context if it currently has no value. -- If it has a value, do nothing. -- This is a utility function to be used in preparing template contexts. -defField :: ToContext a b => T.Text -> b -> Context a -> Context a +defField :: ToContext a b => Text -> b -> Context a -> Context a defField field val (Context m) = Context (M.insertWith f field (toVal val) m) where f _newval oldval = oldval -- | Get the contents of the `lang` metadata field or variable. -getLang :: WriterOptions -> Meta -> Maybe T.Text +getLang :: WriterOptions -> Meta -> Maybe Text getLang opts meta = case lookupContext "lang" (writerVariables opts) of Just s -> Just s @@ -162,7 +164,7 @@ getLang opts meta = _ -> Nothing -- | Produce an HTML tag with the given pandoc attributes. -tagWithAttrs :: HasChars a => T.Text -> Attr -> Doc a +tagWithAttrs :: HasChars a => Text -> Attr -> Doc a tagWithAttrs tag (ident,classes,kvs) = hsep ["<" <> text (T.unpack tag) ,if T.null ident @@ -213,7 +215,7 @@ fixDisplayMath x = x -- | Converts a Unicode character into the ASCII sequence used to -- represent the character in "smart" Markdown. -unsmartify :: WriterOptions -> T.Text -> T.Text +unsmartify :: WriterOptions -> Text -> Text unsmartify opts = T.concatMap $ \c -> case c of '\8217' -> "'" '\8230' -> "..." @@ -345,7 +347,7 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do -- | Retrieve the metadata value for a given @key@ -- and convert to Bool. -lookupMetaBool :: T.Text -> Meta -> Bool +lookupMetaBool :: Text -> Meta -> Bool lookupMetaBool key meta = case lookupMeta key meta of Just (MetaBlocks _) -> True @@ -356,7 +358,7 @@ lookupMetaBool key meta = -- | Retrieve the metadata value for a given @key@ -- and extract blocks. -lookupMetaBlocks :: T.Text -> Meta -> [Block] +lookupMetaBlocks :: Text -> Meta -> [Block] lookupMetaBlocks key meta = case lookupMeta key meta of Just (MetaBlocks bs) -> bs @@ -366,7 +368,7 @@ lookupMetaBlocks key meta = -- | Retrieve the metadata value for a given @key@ -- and extract inlines. -lookupMetaInlines :: T.Text -> Meta -> [Inline] +lookupMetaInlines :: Text -> Meta -> [Inline] lookupMetaInlines key meta = case lookupMeta key meta of Just (MetaString s) -> [Str s] @@ -377,7 +379,7 @@ lookupMetaInlines key meta = -- | Retrieve the metadata value for a given @key@ -- and convert to String. -lookupMetaString :: T.Text -> Meta -> T.Text +lookupMetaString :: Text -> Meta -> Text lookupMetaString key meta = case lookupMeta key meta of Just (MetaString s) -> s @@ -506,7 +508,7 @@ toLegacyTable (Caption _ cbody) specs thead tbodies tfoot = let (h, w, cBody) = getComponents c cRowPieces = cBody : replicate (w - 1) mempty cPendingPieces = replicate w $ replicate (h - 1) mempty - pendingPieces' = dropWhile null pendingPieces + pendingPieces' = drop w pendingPieces (pendingPieces'', rowPieces) = placeCutCells pendingPieces' cells' in (cPendingPieces <> pendingPieces'', cRowPieces <> rowPieces) | otherwise = ([], []) @@ -519,3 +521,27 @@ toLegacyTable (Caption _ cbody) specs thead tbodies tfoot getComponents (Cell _ _ (RowSpan h) (ColSpan w) body) = (h, w, body) + +splitSentences :: Doc Text -> Doc Text +splitSentences = go . toList + where + go [] = mempty + go (Text len t : BreakingSpace : xs) = + if isSentenceEnding t + then Text len t <> NewLine <> go xs + else Text len t <> BreakingSpace <> go xs + go (x:xs) = x <> go xs + + toList (Concat (Concat a b) c) = toList (Concat a (Concat b c)) + toList (Concat a b) = a : toList b + toList x = [x] + + isSentenceEnding t = + case T.unsnoc t of + Just (t',c) + | c == '.' || c == '!' || c == '?' -> True + | c == ')' || c == ']' || c == '"' || c == '\x201D' -> + case T.unsnoc t' of + Just (_,d) -> d == '.' || d == '!' || d == '?' + _ -> False + _ -> False diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 6a33b4283..3c5591b3a 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -123,8 +123,7 @@ blockToTexinfo (Plain lst) = inlineListToTexinfo lst -- title beginning with fig: indicates that the image is a figure -blockToTexinfo (Para [Image attr txt (src,tgt)]) - | Just tit <- T.stripPrefix "fig:" tgt = do +blockToTexinfo (SimpleFigure attr txt (src, tit)) = do capt <- if null txt then return empty else (\c -> text "@caption" <> braces c) `fmap` diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 03d030477..7f0d668e5 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,5 +1,4 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Textile Copyright : Copyright (C) 2010-2021 John MacFarlane @@ -111,8 +110,7 @@ blockToTextile opts (Div attr bs) = do blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines --- title beginning with fig: indicates that the image is a figure -blockToTextile opts (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do +blockToTextile opts (SimpleFigure attr txt (src, tit)) = do capt <- blockToTextile opts (Para txt) im <- inlineToTextile opts (Image attr txt (src,tit)) return $ im <> "\n" <> capt diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index df914f590..5722b6d2e 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -1,5 +1,4 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ZimWiki Copyright : © 2008-2021 John MacFarlane, @@ -86,9 +85,8 @@ blockToZimWiki opts (Div _attrs bs) = do blockToZimWiki opts (Plain inlines) = inlineListToZimWiki opts inlines --- title beginning with fig: indicates that the image is a figure -- ZimWiki doesn't support captions - so combine together alt and caption into alt -blockToZimWiki opts (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do +blockToZimWiki opts (SimpleFigure attr txt (src, tit)) = do capt <- if null txt then return "" else (" " <>) `fmap` inlineListToZimWiki opts txt |