diff options
Diffstat (limited to 'src')
98 files changed, 2671 insertions, 2712 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 345ef3b18..8ee1adf13 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -2,7 +2,7 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index c38ebdd84..19066e8b7 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -1,8 +1,9 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.App - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> @@ -40,45 +41,50 @@ module Text.Pandoc.App ( import Control.Applicative ((<|>)) import qualified Control.Exception as E import Control.Monad +import Control.Monad.Except (throwError) import Control.Monad.Trans -import Data.Aeson (eitherDecode', encode) +import Data.Monoid +import Data.Aeson (FromJSON (..), ToJSON (..), defaultOptions, eitherDecode', + encode, genericToEncoding) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.Char (toLower, toUpper) -import qualified Data.Set as Set import Data.Foldable (foldrM) import Data.List (intercalate, isPrefixOf, isSuffixOf, sort) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) +import qualified Data.Set as Set +import Data.Text (Text) import qualified Data.Text as T import Data.Yaml (decode) import qualified Data.Yaml as Yaml -import Network.URI (URI (..), isURI, parseURI) +import GHC.Generics +import Network.URI (URI (..), parseURI) import Paths_pandoc (getDataDir) import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme) -import Skylighting.Parser (missingIncludes, parseSyntaxDefinition, - addSyntaxDefinition) +import Skylighting.Parser (addSyntaxDefinition, missingIncludes, + parseSyntaxDefinition) import System.Console.GetOpt import System.Directory (Permissions (..), doesFileExist, findExecutable, getAppUserDataDirectory, getPermissions) import System.Environment (getArgs, getEnvironment, getProgName) import System.Exit (ExitCode (..), exitSuccess) import System.FilePath -import System.IO (stdout) +import System.IO (nativeNewline, stdout) +import qualified System.IO as IO (Newline (..)) import System.IO.Error (isDoesNotExistError) import Text.Pandoc import Text.Pandoc.Builder (setMeta) -import Text.Pandoc.Class (PandocIO, getLog, withMediaBag) +import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog, + setResourcePath, withMediaBag) import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.Lua ( runLuaFilter ) -import Text.Pandoc.MediaBag (MediaBag, extractMediaBag, mediaDirectory) +import Text.Pandoc.Lua (runLuaFilter) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) -import Text.Pandoc.SelfContained (makeSelfContained, makeDataURI) -import Text.Pandoc.Shared (headerShift, openURL, readDataFile, +import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) +import Text.Pandoc.Shared (headerShift, isURI, openURL, readDataFile, readDataFileUTF8, safeRead, tabFilter) import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (toEntities) import Text.Printf #ifndef _WINDOWS @@ -86,6 +92,12 @@ import System.Posix.IO (stdOutput) import System.Posix.Terminal (queryTerminal) #endif +data LineEnding = LF | CRLF | Native deriving (Show, Generic) + +instance ToJSON LineEnding where + toEncoding = genericToEncoding defaultOptions +instance FromJSON LineEnding + parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt parseOptions options' defaults = do rawArgs <- map UTF8.decodeArg <$> getArgs @@ -171,7 +183,7 @@ convertWithOpts opts = do -- disabling the custom writer for now writer <- if ".lua" `isSuffixOf` format -- note: use non-lowercased version writerName - then return (StringWriter + then return (TextWriter (\o d -> liftIO $ writeCustom writerName o d) :: Writer PandocIO) else case getWriter writerName of @@ -233,10 +245,9 @@ convertWithOpts opts = do withList f (x:xs) vars = f x vars >>= withList f xs variables <- - return (("outputfile", optOutputFile opts) : optVariables opts) - >>= + withList (addStringAsVariable "sourcefile") - (reverse $ optInputFiles opts) + (reverse $ optInputFiles opts) (("outputfile", optOutputFile opts) : optVariables opts) -- we reverse this list because, unlike -- the other option lists here, it is -- not reversed when parsed from CLI arguments. @@ -372,8 +383,8 @@ convertWithOpts opts = do then 0 else optTabStop opts) - readSources :: (Functor m, MonadIO m) => [FilePath] -> m String - readSources srcs = convertTabs . intercalate "\n" <$> + readSources :: [FilePath] -> PandocIO Text + readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$> mapM readSource srcs let runIO' :: PandocIO a -> IO a @@ -391,42 +402,47 @@ convertWithOpts opts = do E.throwIO PandocFailOnWarningError return res - let sourceToDoc :: [FilePath] -> PandocIO (Pandoc, MediaBag) + let sourceToDoc :: [FilePath] -> PandocIO Pandoc sourceToDoc sources' = case reader of - StringReader r - | optFileScope opts || readerName == "json" -> do - pairs <- mapM - (readSource >=> withMediaBag . r readerOpts) sources - return (mconcat (map fst pairs), mconcat (map snd pairs)) + TextReader r + | optFileScope opts || readerName == "json" -> + mconcat <$> mapM (readSource >=> r readerOpts) sources | otherwise -> - readSources sources' >>= withMediaBag . r readerOpts - ByteStringReader r -> do - pairs <- mapM (readFile' >=> - withMediaBag . r readerOpts) sources - return (mconcat (map fst pairs), mconcat (map snd pairs)) + readSources sources' >>= r readerOpts + ByteStringReader r -> + mconcat <$> mapM (readFile' >=> r readerOpts) sources metadata <- if format == "jats" && - lookup "csl" (optMetadata opts) == Nothing && - lookup "citation-style" (optMetadata opts) == Nothing + isNothing (lookup "csl" (optMetadata opts)) && + isNothing (lookup "citation-style" (optMetadata opts)) then do jatsCSL <- readDataFile datadir "jats.csl" let jatsEncoded = makeDataURI ("application/xml", jatsCSL) return $ ("csl", jatsEncoded) : optMetadata opts else return $ optMetadata opts + let eol = case optEol opts of + CRLF -> IO.CRLF + LF -> IO.LF + Native -> nativeNewline + runIO' $ do - (doc, media) <- sourceToDoc sources - doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=> - return . flip (foldr addMetadata) metadata >=> - applyTransforms transforms >=> - applyLuaFilters datadir (optLuaFilters opts) [format] >=> - applyFilters datadir filters' [format]) doc + setResourcePath (optResourcePath opts) + (doc, media) <- withMediaBag $ sourceToDoc sources >>= + ( (if isJust (optExtractMedia opts) + then fillMediaBag (writerSourceURL writerOptions) + else return) + >=> maybe return extractMedia (optExtractMedia opts) + >=> return . flip (foldr addMetadata) metadata + >=> applyTransforms transforms + >=> applyLuaFilters datadir (optLuaFilters opts) [format] + >=> applyFilters datadir filters' [format] + ) case writer of - -- StringWriter f -> f writerOptions doc' >>= writerFn outputFile - ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile - StringWriter f + ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile + TextWriter f | pdfOutput -> do -- make sure writer is latex, beamer, context, html5 or ms unless (laTeXOutput || conTeXtOutput || html5Output || @@ -445,7 +461,7 @@ convertWithOpts opts = do when (isNothing mbPdfProg) $ liftIO $ E.throwIO $ PandocPDFProgramNotFoundError pdfprog - res <- makePDF pdfprog f writerOptions verbosity media doc' + res <- makePDF pdfprog f writerOptions verbosity media doc case res of Right pdf -> writeFnBinary outputFile pdf Left err' -> liftIO $ @@ -453,18 +469,23 @@ convertWithOpts opts = do | otherwise -> do let htmlFormat = format `elem` ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"] - selfcontain = if optSelfContained opts && htmlFormat - then makeSelfContained writerOptions - else return handleEntities = if (htmlFormat || format == "docbook4" || format == "docbook5" || format == "docbook") && optAscii opts then toEntities else id - output <- f writerOptions doc' - selfcontain (output ++ ['\n' | not standalone]) >>= - writerFn outputFile . handleEntities + addNl = if standalone + then id + else (<> T.singleton '\n') + output <- (addNl . handleEntities) <$> f writerOptions doc + writerFn eol outputFile =<< + if optSelfContained opts && htmlFormat + -- TODO not maximally efficient; change type + -- of makeSelfContained so it works w/ Text + then T.pack <$> makeSelfContained writerOptions + (T.unpack output) + else return output type Transform = Pandoc -> Pandoc @@ -568,7 +589,13 @@ data Opt = Opt , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body , optIncludeInHeader :: [FilePath] -- ^ Files to include in header - } + , optResourcePath :: [FilePath] -- ^ Path to search for images etc + , optEol :: LineEnding -- ^ Style of line-endings to use + } deriving (Generic, Show) + +instance ToJSON Opt where + toEncoding = genericToEncoding defaultOptions +instance FromJSON Opt -- | Defaults for command-line options. defaultOpts :: Opt @@ -636,6 +663,8 @@ defaultOpts = Opt , optIncludeBeforeBody = [] , optIncludeAfterBody = [] , optIncludeInHeader = [] + , optResourcePath = ["."] + , optEol = Native } addMetadata :: (String, String) -> Pandoc -> Pandoc @@ -728,19 +757,6 @@ defaultWriterName x = -- Transformations of a Pandoc document post-parsing: -extractMedia :: MonadIO m => MediaBag -> FilePath -> Pandoc -> m Pandoc -extractMedia media dir d = - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - extractMediaBag True dir media - return $ walk (adjustImagePath dir fps) d - -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) -adjustImagePath _ _ x = x - applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc applyTransforms transforms d = return $ foldr ($) d transforms @@ -773,17 +789,23 @@ applyFilters mbDatadir filters args d = do expandedFilters <- mapM (expandFilterPath mbDatadir) filters foldrM ($) d $ map (flip externalFilter args) expandedFilters -readSource :: MonadIO m => FilePath -> m String -readSource "-" = liftIO UTF8.getContents +readSource :: FilePath -> PandocIO Text +readSource "-" = liftIO (UTF8.toText <$> BS.getContents) readSource src = case parseURI src of Just u | uriScheme u `elem` ["http:","https:"] -> readURI src | uriScheme u == "file:" -> - liftIO $ UTF8.readFile (uriPath u) - _ -> liftIO $ UTF8.readFile src - -readURI :: MonadIO m => FilePath -> m String -readURI src = liftIO $ (UTF8.toString . fst) <$> openURL src + liftIO $ UTF8.toText <$> + BS.readFile (uriPath u) + _ -> liftIO $ UTF8.toText <$> + BS.readFile src + +readURI :: FilePath -> PandocIO Text +readURI src = do + res <- liftIO $ openURL src + case res of + Left e -> throwError $ PandocHttpError src e + Right (contents, _) -> return $ UTF8.toText contents readFile' :: MonadIO m => FilePath -> m B.ByteString readFile' "-" = liftIO B.getContents @@ -793,9 +815,10 @@ writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m () writeFnBinary "-" = liftIO . B.putStr writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f) -writerFn :: MonadIO m => FilePath -> String -> m () -writerFn "-" = liftIO . UTF8.putStr -writerFn f = liftIO . UTF8.writeFile f +writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m () +-- TODO this implementation isn't maximally efficient: +writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack +writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack lookupHighlightStyle :: Maybe String -> IO (Maybe Style) lookupHighlightStyle Nothing = return Nothing @@ -968,6 +991,19 @@ options = "NUMBER") "" -- "Dpi (default 96)" + , Option "" ["eol"] + (ReqArg + (\arg opt -> + case toLower <$> arg of + "crlf" -> return opt { optEol = CRLF } + "lf" -> return opt { optEol = LF } + "native" -> return opt { optEol = Native } + -- mac-syntax (cr) is not supported in ghc-base. + _ -> E.throwIO $ PandocOptionError + "--eol must be crlf, lf, or native") + "crlf|lf|native") + "" -- "EOL (default OS-dependent)" + , Option "" ["wrap"] (ReqArg (\arg opt -> @@ -1046,6 +1082,14 @@ options = "FILE") "" -- "File to include after document body" + , Option "" ["resource-path"] + (ReqArg + (\arg opt -> return opt { optResourcePath = + splitSearchPath arg }) + "SEARCHPATH") + "" -- "Paths to search for images and other resources" + + , Option "" ["self-contained"] (NoArg (\opt -> return opt { optSelfContained = True, @@ -1388,8 +1432,8 @@ options = map ("--" ++) longs let allopts = unwords (concatMap optnames options) UTF8.hPutStrLn stdout $ printf tpl allopts - (unwords readers'names) - (unwords writers'names) + (unwords readersNames) + (unwords writersNames) (unwords $ map fst highlightingStyles) ddir exitSuccess )) @@ -1398,14 +1442,14 @@ options = , Option "" ["list-input-formats"] (NoArg (\_ -> do - mapM_ (UTF8.hPutStrLn stdout) readers'names + mapM_ (UTF8.hPutStrLn stdout) readersNames exitSuccess )) "" , Option "" ["list-output-formats"] (NoArg (\_ -> do - mapM_ (UTF8.hPutStrLn stdout) writers'names + mapM_ (UTF8.hPutStrLn stdout) writersNames exitSuccess )) "" @@ -1509,14 +1553,15 @@ uppercaseFirstLetter :: String -> String uppercaseFirstLetter (c:cs) = toUpper c : cs uppercaseFirstLetter [] = [] -readers'names :: [String] -readers'names = sort (map fst (readers :: [(String, Reader PandocIO)])) +readersNames :: [String] +readersNames = sort (map fst (readers :: [(String, Reader PandocIO)])) -writers'names :: [String] -writers'names = sort (map fst (writers :: [(String, Writer PandocIO)])) +writersNames :: [String] +writersNames = sort (map fst (writers :: [(String, Writer PandocIO)])) splitField :: String -> (String, String) splitField s = case break (`elem` ":=") s of (k,_:v) -> (k,v) (k,[]) -> (k,"true") + diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index 411a112b2..7125e5bcd 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Asciify - Copyright : Copyright (C) 2013-2016 John MacFarlane + Copyright : Copyright (C) 2013-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs index 3e2fd6309..41be1ea13 100644 --- a/src/Text/Pandoc/CSS.hs +++ b/src/Text/Pandoc/CSS.hs @@ -11,7 +11,7 @@ import Text.Parsec.String ruleParser :: Parser (String, String) ruleParser = do p <- many1 (noneOf ":") <* char ':' - v <- many1 (noneOf ":;") <* (optional $ char ';') <* spaces + v <- many1 (noneOf ":;") <* optional (char ';') <* spaces return (trim p, trim v) styleAttrParser :: Parser [(String, String)] diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 1afa64c10..14a0b8044 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE DeriveFunctor, DeriveDataTypeable, TypeSynonymInstances, -FlexibleInstances, GeneralizedNewtypeDeriving, FlexibleContexts, -StandaloneDeriving #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} {- Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu> @@ -61,6 +63,8 @@ module Text.Pandoc.Class ( PandocMonad(..) , runIOorExplode , runPure , withMediaBag + , fillMediaBag + , extractMedia ) where import Prelude hiding (readFile) @@ -76,8 +80,11 @@ import Text.Pandoc.Compat.Time (UTCTime) import Text.Pandoc.Logging import Text.Parsec (ParsecT) import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime) -import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) +import Text.Pandoc.Definition import Data.Char (toLower) +import Data.Digest.Pure.SHA (sha1, showDigest) +import Data.Maybe (fromMaybe) import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds , posixSecondsToUTCTime , POSIXTime ) @@ -86,13 +93,16 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.Walk (walkM, walk) import qualified Text.Pandoc.MediaBag as MB import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified System.Environment as IO (lookupEnv) import System.FilePath.Glob (match, compile) -import System.FilePath ((</>), takeExtension, dropExtension, isRelative) +import System.Directory (createDirectoryIfMissing) +import System.FilePath ((</>), (<.>), takeDirectory, + takeExtension, dropExtension, isRelative, normalise) import qualified System.FilePath.Glob as IO (glob) import qualified System.Directory as IO (getModificationTime) import Control.Monad as M (fail) @@ -145,7 +155,7 @@ report :: PandocMonad m => LogMessage -> m () report msg = do verbosity <- getsCommonState stVerbosity let level = messageVerbosity msg - when (level <= verbosity) $ do + when (level <= verbosity) $ logOutput msg unless (level == DEBUG) $ modifyCommonState $ \st -> st{ stLog = msg : stLog st } @@ -213,7 +223,7 @@ runIO :: PandocIO a -> IO (Either PandocError a) runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma withMediaBag :: PandocMonad m => m a -> m (a, MediaBag) -withMediaBag ma = ((,)) <$> ma <*> getMediaBag +withMediaBag ma = (,) <$> ma <*> getMediaBag runIOorExplode :: PandocIO a -> IO a runIOorExplode ma = runIO ma >>= handleError @@ -239,10 +249,13 @@ instance PandocMonad PandocIO where getCurrentTime = liftIO IO.getCurrentTime getCurrentTimeZone = liftIO IO.getCurrentTimeZone newStdGen = liftIO IO.newStdGen - newUniqueHash = hashUnique <$> (liftIO IO.newUnique) + newUniqueHash = hashUnique <$> liftIO IO.newUnique openURL u = do report $ Fetching u - liftIOError IO.openURL u + res <- liftIO (IO.openURL u) + case res of + Right r -> return r + Left e -> throwError $ PandocHttpError u e readFileLazy s = liftIOError BL.readFile s readFileStrict s = liftIOError B.readFile s readDataFile mfp fname = liftIOError (IO.readDataFile mfp) fname @@ -252,7 +265,7 @@ instance PandocMonad PandocIO where putCommonState x = PandocIO $ lift $ put x logOutput msg = liftIO $ do UTF8.hPutStr stderr $ "[" ++ - (map toLower $ show (messageVerbosity msg)) ++ "] " + map toLower (show (messageVerbosity msg)) ++ "] " alertIndent $ lines $ showLogMessage msg alertIndent :: [String] -> IO () @@ -283,14 +296,14 @@ fetchItem :: PandocMonad m fetchItem sourceURL s = do mediabag <- getMediaBag case lookupMedia s mediabag of - Just (mime, bs) -> return $ (BL.toStrict bs, Just mime) + Just (mime, bs) -> return (BL.toStrict bs, Just mime) Nothing -> downloadOrRead sourceURL s downloadOrRead :: PandocMonad m => Maybe String -> String -> m (B.ByteString, Maybe MimeType) -downloadOrRead sourceURL s = do +downloadOrRead sourceURL s = case (sourceURL >>= parseURIReference' . ensureEscaped, ensureEscaped s) of (Just u, s') -> -- try fetching from relative path at source @@ -330,12 +343,73 @@ downloadOrRead sourceURL s = do convertSlash x = x withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a -withPaths [] _ fp = throwError $ PandocIOError fp - (userError "file not found in resource path") +withPaths [] _ fp = throwError $ PandocResourceNotFound fp withPaths (p:ps) action fp = catchError (action (p </> fp)) (\_ -> withPaths ps action fp) +-- | Traverse tree, filling media bag for any images that +-- aren't already in the media bag. +fillMediaBag :: PandocMonad m => Maybe String -> Pandoc -> m Pandoc +fillMediaBag sourceURL d = walkM handleImage d + where handleImage :: PandocMonad m => Inline -> m Inline + handleImage (Image attr lab (src, tit)) = catchError + (do mediabag <- getMediaBag + case lookupMedia src mediabag of + Just (_, _) -> return $ Image attr lab (src, tit) + Nothing -> do + (bs, mt) <- downloadOrRead sourceURL src + let ext = fromMaybe (takeExtension src) + (mt >>= extensionFromMimeType) + let bs' = BL.fromChunks [bs] + let basename = showDigest $ sha1 bs' + let fname = basename <.> ext + insertMedia fname mt bs' + return $ Image attr lab (fname, tit)) + (\e -> + case e of + PandocResourceNotFound _ -> do + report $ CouldNotFetchResource src + "replacing image with description" + -- emit alt text + return $ Span ("",["image"],[]) lab + PandocHttpError u er -> do + report $ CouldNotFetchResource u + (show er ++ "\rReplacing image with description.") + -- emit alt text + return $ Span ("",["image"],[]) lab + _ -> throwError e) + handleImage x = return x + +-- | Extract media from the mediabag into a directory. +extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc +extractMedia dir d = do + media <- getMediaBag + case [fp | (fp, _, _) <- mediaDirectory media] of + [] -> return d + fps -> do + mapM_ (writeMedia dir media) fps + return $ walk (adjustImagePath dir fps) d + +writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO () +writeMedia dir mediabag subpath = do + -- we join and split to convert a/b/c to a\b\c on Windows; + -- in zip containers all paths use / + let fullpath = dir </> normalise subpath + let mbcontents = lookupMedia subpath mediabag + case mbcontents of + Nothing -> throwError $ PandocResourceNotFound subpath + Just (_, bs) -> do + report $ Extracting fullpath + liftIO $ do + createDirectoryIfMissing True $ takeDirectory fullpath + BL.writeFile fullpath bs + +adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline +adjustImagePath dir paths (Image attr lab (src, tit)) + | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) +adjustImagePath _ _ x = x + data PureState = PureState { stStdGen :: StdGen , stWord8Store :: [Word8] -- should be -- inifinite, @@ -373,7 +447,7 @@ instance Default PureState where getPureState :: PandocPure PureState -getPureState = PandocPure $ lift $ lift $ get +getPureState = PandocPure $ lift $ lift get getsPureState :: (PureState -> a) -> PandocPure a getsPureState f = f <$> getPureState @@ -433,30 +507,27 @@ instance PandocMonad PandocPure where modifyPureState $ \st -> st { stUniqStore = us } return u _ -> M.fail "uniq store ran out of elements" - openURL u = throwError $ PandocIOError u $ - userError "Cannot open URL in PandocPure" + openURL u = throwError $ PandocResourceNotFound u readFileLazy fp = do fps <- getsPureState stFiles case infoFileContents <$> getFileInfo fp fps of Just bs -> return (BL.fromStrict bs) - Nothing -> throwError $ PandocIOError fp - (userError "File not found in PureState") + Nothing -> throwError $ PandocResourceNotFound fp readFileStrict fp = do fps <- getsPureState stFiles case infoFileContents <$> getFileInfo fp fps of Just bs -> return bs - Nothing -> throwError $ PandocIOError fp - (userError "File not found in PureState") - readDataFile Nothing "reference.docx" = do + Nothing -> throwError $ PandocResourceNotFound fp + readDataFile Nothing "reference.docx" = (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx - readDataFile Nothing "reference.odt" = do + readDataFile Nothing "reference.odt" = (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT readDataFile Nothing fname = do let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname readFileStrict fname' readDataFile (Just userDir) fname = do userDirFiles <- getsPureState stUserDataDir - case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of + case infoFileContents <$> getFileInfo (userDir </> fname) userDirFiles of Just bs -> return bs Nothing -> readDataFile Nothing fname @@ -466,12 +537,12 @@ instance PandocMonad PandocPure where getModificationTime fp = do fps <- getsPureState stFiles - case infoFileMTime <$> (getFileInfo fp fps) of + case infoFileMTime <$> getFileInfo fp fps of Just tm -> return tm Nothing -> throwError $ PandocIOError fp (userError "Can't get modification time") - getCommonState = PandocPure $ lift $ get + getCommonState = PandocPure $ lift get putCommonState x = PandocPure $ lift $ put x logOutput _msg = return () @@ -555,4 +626,3 @@ instance PandocMonad m => PandocMonad (StateT st m) where getCommonState = lift getCommonState putCommonState = lift . putCommonState logOutput = lift . logOutput - diff --git a/src/Text/Pandoc/Compat/Time.hs b/src/Text/Pandoc/Compat/Time.hs index b1cde82a4..1de197801 100644 --- a/src/Text/Pandoc/Compat/Time.hs +++ b/src/Text/Pandoc/Compat/Time.hs @@ -27,4 +27,4 @@ where import Data.Time import System.Locale ( defaultTimeLocale ) -#endif +#endif
\ No newline at end of file diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 135cb3945..3cf381168 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -1,7 +1,7 @@ {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Error - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -42,10 +42,12 @@ import Text.Parsec.Pos hiding (Line) import qualified Text.Pandoc.UTF8 as UTF8 import System.Exit (exitWith, ExitCode(..)) import System.IO (stderr) +import Network.HTTP.Client (HttpException) type Input = String data PandocError = PandocIOError String IOError + | PandocHttpError String HttpException | PandocShouldNeverHappenError String | PandocSomeError String | PandocParseError String @@ -58,6 +60,7 @@ data PandocError = PandocIOError String IOError | PandocPDFError String | PandocFilterError String String | PandocCouldNotFindDataFileError String + | PandocResourceNotFound String | PandocAppError String deriving (Show, Typeable, Generic) @@ -69,6 +72,8 @@ handleError (Right r) = return r handleError (Left e) = case e of PandocIOError _ err' -> ioError err' + PandocHttpError u err' -> err 61 $ + "Could not fetch " ++ u ++ "\n" ++ show err' PandocShouldNeverHappenError s -> err 62 s PandocSomeError s -> err 63 s PandocParseError s -> err 64 s @@ -78,7 +83,7 @@ handleError (Left e) = errColumn = sourceColumn errPos ls = lines input ++ [""] errorInFile = if length ls > errLine - 1 - then concat ["\n", (ls !! (errLine - 1)) + then concat ["\n", ls !! (errLine - 1) ,"\n", replicate (errColumn - 1) ' ' ,"^"] else "" @@ -94,6 +99,8 @@ handleError (Left e) = filtername ++ ":\n" ++ msg PandocCouldNotFindDataFileError fn -> err 97 $ "Could not find data file " ++ fn + PandocResourceNotFound fn -> err 99 $ + "File " ++ fn ++ " not found in resource path" PandocAppError s -> err 1 s err :: Int -> String -> IO a diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 24f7d56ec..58e8c414d 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -137,6 +137,7 @@ data Extension = | Ext_shortcut_reference_links -- ^ Shortcut reference links | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes | Ext_old_dashes -- ^ -- = em, - before number = en + | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) -- | Extensions to be used with pandoc-flavored markdown. @@ -187,7 +188,7 @@ pandocExtensions = extensionsFromList , Ext_smart ] --- | Extensions to be used with github-flavored markdown. +-- | Extensions to be used with plain text output. plainExtensions :: Extensions plainExtensions = extensionsFromList [ Ext_table_captions @@ -220,6 +221,7 @@ phpMarkdownExtraExtensions = extensionsFromList , Ext_link_attributes , Ext_abbreviations , Ext_shortcut_reference_links + , Ext_spaced_reference_links ] -- | Extensions to be used with github-flavored markdown. @@ -271,6 +273,8 @@ multimarkdownExtensions = extensionsFromList -- not to include these: , Ext_superscript , Ext_subscript + , Ext_backtick_code_blocks + , Ext_spaced_reference_links ] -- | Language extensions to be used with strict markdown. @@ -278,6 +282,7 @@ strictExtensions :: Extensions strictExtensions = extensionsFromList [ Ext_raw_html , Ext_shortcut_reference_links + , Ext_spaced_reference_links ] -- | Default extensions from format-describing string. diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index f249f96ad..0754aae4c 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Highlighting - Copyright : Copyright (C) 2008-2016 John MacFarlane + Copyright : Copyright (C) 2008-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -91,7 +91,7 @@ highlight syntaxmap formatter (_, classes, keyvals) rawCode = , traceOutput = False } classes' = map T.pack classes rawCode' = T.pack rawCode - in case msum (map (\l -> lookupSyntax l syntaxmap) classes') of + in case msum (map ((`lookupSyntax` syntaxmap)) classes') of Nothing | numberLines fmtOpts -> Right $ formatter fmtOpts{ codeClasses = [], @@ -100,9 +100,9 @@ highlight syntaxmap formatter (_, classes, keyvals) rawCode = $ T.lines rawCode' | otherwise -> Left "" Just syntax -> - (formatter fmtOpts{ codeClasses = + formatter fmtOpts{ codeClasses = [T.toLower (sShortname syntax)], - containerClasses = classes' }) <$> + containerClasses = classes' } <$> tokenize tokenizeOpts syntax rawCode' -- Functions for correlating latex listings package's language names diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 8b2d577a9..61ff006cf 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-} {-# OPTIONS_GHC -fno-warn-type-defaults #-} {- - Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> + Copyright (C) 2011-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ {- | Module : Text.Pandoc.ImageSize -Copyright : Copyright (C) 2011-2016 John MacFarlane +Copyright : Copyright (C) 2011-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -43,6 +43,7 @@ module Text.Pandoc.ImageSize ( ImageType(..) , inInch , inPixel , inPoints + , inEm , numUnit , showInInch , showInPixel @@ -80,12 +81,14 @@ data Dimension = Pixel Integer | Centimeter Double | Inch Double | Percent Double + | Em Double instance Show Dimension where show (Pixel a) = show a ++ "px" show (Centimeter a) = showFl a ++ "cm" show (Inch a) = showFl a ++ "in" show (Percent a) = show a ++ "%" + show (Em a) = showFl a ++ "em" data ImageSize = ImageSize{ pxX :: Integer @@ -97,7 +100,13 @@ instance Default ImageSize where def = ImageSize 300 200 72 72 showFl :: (RealFloat a) => a -> String -showFl a = showFFloat (Just 5) a "" +showFl a = removeExtra0s $ showFFloat (Just 5) a "" + +removeExtra0s :: String -> String +removeExtra0s s = + case dropWhile (=='0') $ reverse s of + '.':xs -> reverse xs + xs -> reverse xs imageType :: ByteString -> Maybe ImageType imageType img = case B.take 4 img of @@ -111,12 +120,12 @@ imageType img = case B.take 4 img of | findSvgTag img -> return Svg "%!PS" - | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" + | B.take 4 (B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" -> return Eps _ -> mzero findSvgTag :: ByteString -> Bool -findSvgTag img = B.null $ snd (B.breakSubstring img "<svg") +findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img imageSize :: WriterOptions -> ByteString -> Either String ImageSize imageSize opts img = @@ -159,7 +168,7 @@ desiredSizeInPoints opts attr s = (Nothing, Nothing) -> sizeInPoints s where ratio = fromIntegral (pxX s) / fromIntegral (pxY s) - getDim dir = case (dimension dir attr) of + getDim dir = case dimension dir attr of Just (Percent _) -> Nothing Just dim -> Just $ inPoints opts dim Nothing -> Nothing @@ -167,13 +176,17 @@ desiredSizeInPoints opts attr s = inPoints :: WriterOptions -> Dimension -> Double inPoints opts dim = 72 * inInch opts dim +inEm :: WriterOptions -> Dimension -> Double +inEm opts dim = (64/11) * inInch opts dim + inInch :: WriterOptions -> Dimension -> Double inInch opts dim = case dim of - (Pixel a) -> fromIntegral a / (fromIntegral $ writerDpi opts) + (Pixel a) -> fromIntegral a / fromIntegral (writerDpi opts) (Centimeter a) -> a * 0.3937007874 (Inch a) -> a (Percent _) -> 0 + (Em a) -> a * (11/64) inPixel :: WriterOptions -> Dimension -> Integer inPixel opts dim = @@ -181,7 +194,8 @@ inPixel opts dim = (Pixel a) -> a (Centimeter a) -> floor $ dpi * a * 0.3937007874 :: Integer (Inch a) -> floor $ dpi * a :: Integer - _ -> 0 + (Percent _) -> 0 + (Em a) -> floor $ dpi * a * (11/64) :: Integer where dpi = fromIntegral $ writerDpi opts @@ -213,6 +227,7 @@ scaleDimension factor dim = Centimeter x -> Centimeter (factor * x) Inch x -> Inch (factor * x) Percent x -> Percent (factor * x) + Em x -> Em (factor * x) -- | Read a Dimension from an Attr attribute. -- `dimension Width attr` might return `Just (Pixel 3)` or for example `Just (Centimeter 2.0)`, etc. @@ -236,6 +251,7 @@ lengthToDim s = numUnit s >>= uncurry toDim toDim a "" = Just $ Pixel (floor a::Integer) toDim a "pt" = Just $ Inch (a / 72) toDim a "pc" = Just $ Inch (a / 6) + toDim a "em" = Just $ Em a toDim _ _ = Nothing epsSize :: ByteString -> Maybe ImageSize @@ -245,7 +261,7 @@ epsSize img = do case ls' of [] -> mzero (x:_) -> case B.words x of - (_:_:_:ux:uy:[]) -> do + [_, _, _, ux, uy] -> do ux' <- safeRead $ B.unpack ux uy' <- safeRead $ B.unpack uy return ImageSize{ @@ -263,27 +279,26 @@ pngSize img = do let (i, rest') = B.splitAt 4 $ B.drop 4 rest guard $ i == "MHDR" || i == "IHDR" let (sizes, rest'') = B.splitAt 8 rest' - (x,y) <- case map fromIntegral $ unpack $ sizes of + (x,y) <- case map fromIntegral $unpack sizes of ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return - ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, - (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) + (shift w1 24 + shift w2 16 + shift w3 8 + w4, + shift h1 24 + shift h2 16 + shift h3 8 + h4) _ -> Nothing -- "PNG parse error" let (dpix, dpiy) = findpHYs rest'' - return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } + return ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } findpHYs :: ByteString -> (Integer, Integer) -findpHYs x = - if B.null x || "IDAT" `B.isPrefixOf` x - then (72,72) -- default, no pHYs - else if "pHYs" `B.isPrefixOf` x - then let [x1,x2,x3,x4,y1,y2,y3,y4,u] = map fromIntegral - $ unpack $ B.take 9 $ B.drop 4 x - factor = if u == 1 -- dots per meter - then \z -> z * 254 `div` 10000 - else const 72 - in ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4, - factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 ) - else findpHYs $ B.drop 1 x -- read another byte +findpHYs x + | B.null x || "IDAT" `B.isPrefixOf` x = (72,72) + | "pHYs" `B.isPrefixOf` x = + let [x1,x2,x3,x4,y1,y2,y3,y4,u] = + map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x + factor = if u == 1 -- dots per meter + then \z -> z * 254 `div` 10000 + else const 72 + in ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4, + factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 ) + | otherwise = findpHYs $ B.drop 1 x -- read another byte gifSize :: ByteString -> Maybe ImageSize gifSize img = do @@ -327,16 +342,16 @@ jpegSize img = jfifSize :: ByteString -> Either String ImageSize jfifSize rest = let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral - $ unpack $ B.take 5 $ B.drop 9 $ rest + $ unpack $ B.take 5 $B.drop 9 rest factor = case dpiDensity of 1 -> id - 2 -> \x -> (x * 254 `div` 10) + 2 -> \x -> x * 254 `div` 10 _ -> const 72 dpix = factor (shift dpix1 8 + dpix2) dpiy = factor (shift dpiy1 8 + dpiy2) in case findJfifSize rest of Left msg -> Left msg - Right (w,h) -> Right $ ImageSize { pxX = w + Right (w,h) ->Right ImageSize { pxX = w , pxY = h , dpiX = dpix , dpiY = dpiy } @@ -370,7 +385,7 @@ runGet' p bl = exifSize :: ByteString -> Either String ImageSize -exifSize bs = runGet' header $ bl +exifSize bs =runGet' header bl where bl = BL.fromChunks [bs] header = runExceptT $ exifHeader bl -- NOTE: It would be nicer to do @@ -440,14 +455,13 @@ exifHeader hdr = do Left msg -> throwError msg Right x -> return x return (tag, payload) - entries <- sequence $ replicate (fromIntegral numentries) ifdEntry + entries <- replicateM (fromIntegral numentries) ifdEntry subentries <- case lookup ExifOffset entries of Just (UnsignedLong offset') -> do pos <- lift bytesRead lift $ skip (fromIntegral offset' - (fromIntegral pos - 8)) numsubentries <- lift getWord16 - sequence $ - replicate (fromIntegral numsubentries) ifdEntry + replicateM (fromIntegral numsubentries) ifdEntry _ -> return [] let allentries = entries ++ subentries (wdth, hght) <- case (lookup ExifImageWidth allentries, @@ -458,13 +472,13 @@ exifHeader hdr = do -- we return a default width and height when -- the exif header doesn't contain these let resfactor = case lookup ResolutionUnit allentries of - Just (UnsignedShort 1) -> (100 / 254) + Just (UnsignedShort 1) -> 100 / 254 _ -> 1 let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) $ lookup XResolution allentries let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) $ lookup YResolution allentries - return $ ImageSize{ + return ImageSize{ pxX = wdth , pxY = hght , dpiX = xres @@ -588,3 +602,4 @@ tagTypeTable = M.fromList , (0xa300, FileSource) , (0xa301, SceneType) ] + diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 59b010034..da8c775f6 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Logging - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -39,6 +39,7 @@ module Text.Pandoc.Logging ( , messageVerbosity ) where +import Control.Monad (mzero) import Data.Aeson import Data.Aeson.Encode.Pretty (Config (..), defConfig, encodePretty', keyOrder) @@ -56,12 +57,22 @@ data Verbosity = ERROR | WARNING | INFO | DEBUG instance ToJSON Verbosity where toJSON x = toJSON (show x) +instance FromJSON Verbosity where + parseJSON (String t) = + case t of + "ERROR" -> return ERROR + "WARNING" -> return WARNING + "INFO" -> return INFO + "DEBUG" -> return DEBUG + _ -> mzero + parseJSON _ = mzero data LogMessage = SkippedContent String SourcePos | CouldNotParseYamlMetadata String SourcePos | DuplicateLinkReference String SourcePos | DuplicateNoteReference String SourcePos + | NoteDefinedButNotUsed String SourcePos | DuplicateIdentifier String SourcePos | ReferenceNotFound String SourcePos | CircularReference String SourcePos @@ -78,6 +89,7 @@ data LogMessage = | CouldNotConvertTeXMath String String | CouldNotParseCSS String | Fetching String + | Extracting String | NoTitleElement String | NoLangSpecified | CouldNotHighlight String @@ -103,6 +115,11 @@ instance ToJSON LogMessage where "source" .= Text.pack (sourceName pos), "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] + NoteDefinedButNotUsed s pos -> + ["key" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] DuplicateNoteReference s pos -> ["contents" .= Text.pack s, "source" .= Text.pack (sourceName pos), @@ -162,6 +179,8 @@ instance ToJSON LogMessage where ["message" .= Text.pack msg] Fetching fp -> ["path" .= Text.pack fp] + Extracting fp -> + ["path" .= Text.pack fp] NoTitleElement fallback -> ["fallback" .= Text.pack fallback] NoLangSpecified -> [] @@ -193,6 +212,9 @@ showLogMessage msg = "Duplicate link reference '" ++ s ++ "' at " ++ showPos pos DuplicateNoteReference s pos -> "Duplicate note reference '" ++ s ++ "' at " ++ showPos pos + NoteDefinedButNotUsed s pos -> + "Note with key '" ++ s ++ "' defined at " ++ showPos pos ++ + " but not used." DuplicateIdentifier s pos -> "Duplicate identifier '" ++ s ++ "' at " ++ showPos pos ReferenceNotFound s pos -> @@ -229,6 +251,8 @@ showLogMessage msg = "Could not parse CSS" ++ if null m then "" else (':':'\n':m) Fetching fp -> "Fetching " ++ fp ++ "..." + Extracting fp -> + "Extracting " ++ fp ++ "..." NoTitleElement fallback -> "This document format requires a nonempty <title> element.\n" ++ "Please specify either 'title' or 'pagetitle' in the metadata.\n" ++ @@ -242,10 +266,11 @@ showLogMessage msg = messageVerbosity:: LogMessage -> Verbosity messageVerbosity msg = case msg of - SkippedContent{} -> INFO + SkippedContent{} -> WARNING CouldNotParseYamlMetadata{} -> WARNING DuplicateLinkReference{} -> WARNING DuplicateNoteReference{} -> WARNING + NoteDefinedButNotUsed{} -> WARNING DuplicateIdentifier{} -> WARNING ReferenceNotFound{} -> WARNING CircularReference{} -> WARNING @@ -262,6 +287,7 @@ messageVerbosity msg = CouldNotConvertTeXMath{} -> WARNING CouldNotParseCSS{} -> WARNING Fetching{} -> INFO + Extracting{} -> INFO NoTitleElement{} -> WARNING NoLangSpecified -> INFO CouldNotHighlight{} -> WARNING diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index f4a22b92a..f74c0e425 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -15,8 +15,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {- | Module : Text.Pandoc.Lua @@ -30,12 +30,12 @@ Pandoc lua utils. -} module Text.Pandoc.Lua ( runLuaFilter, pushPandocModule ) where -import Control.Monad ( (>=>), when ) -import Control.Monad.Trans ( MonadIO(..) ) -import Data.Map ( Map ) -import Scripting.Lua ( LuaState, StackValue(..) ) +import Control.Monad (unless, when, (>=>)) +import Control.Monad.Trans (MonadIO (..)) +import Data.Map (Map) +import Scripting.Lua (LuaState, StackValue (..)) import Text.Pandoc.Definition -import Text.Pandoc.Lua.PandocModule ( pushPandocModule ) +import Text.Pandoc.Lua.PandocModule (pushPandocModule) import Text.Pandoc.Lua.StackInstances () import Text.Pandoc.Walk @@ -80,7 +80,7 @@ pushGlobalFilter lua = *> Lua.rawseti lua (-2) 1 runAll :: [LuaFilter] -> Pandoc -> IO Pandoc -runAll [] = return +runAll [] = return runAll (x:xs) = walkMWithLuaFilter x >=> runAll xs walkMWithLuaFilter :: LuaFilter -> Pandoc -> IO Pandoc @@ -225,7 +225,7 @@ instance StackValue LuaFilterFunction where push lua v = pushFilterFunction lua v peek lua i = do isFn <- Lua.isfunction lua i - when (not isFn) (error $ "Not a function at index " ++ (show i)) + unless isFn (error $ "Not a function at index " ++ (show i)) Lua.pushvalue lua i push lua ("PANDOC_FILTER_FUNCTIONS"::String) Lua.rawget lua Lua.registryindex diff --git a/src/Text/Pandoc/Lua/Compat.hs b/src/Text/Pandoc/Lua/Compat.hs index 998d8d032..3fc81a15c 100644 --- a/src/Text/Pandoc/Lua/Compat.hs +++ b/src/Text/Pandoc/Lua/Compat.hs @@ -28,13 +28,13 @@ Compatibility helpers for hslua -} module Text.Pandoc.Lua.Compat ( loadstring ) where -import Scripting.Lua ( LuaState ) +import Scripting.Lua (LuaState) import qualified Scripting.Lua as Lua -- | Interpret string as lua code and load into the lua environment. loadstring :: LuaState -> String -> String -> IO Int #if MIN_VERSION_hslua(0,5,0) -loadstring lua script _ = Lua.loadstring lua script +loadstring lua script _ = Lua.loadstring lua script #else loadstring lua script cn = Lua.loadstring lua script cn #endif diff --git a/src/Text/Pandoc/Lua/PandocModule.hs b/src/Text/Pandoc/Lua/PandocModule.hs index 15f19f024..27c19d4f0 100644 --- a/src/Text/Pandoc/Lua/PandocModule.hs +++ b/src/Text/Pandoc/Lua/PandocModule.hs @@ -27,25 +27,24 @@ Pandoc module for lua. -} module Text.Pandoc.Lua.PandocModule ( pushPandocModule ) where -import Data.ByteString.Char8 ( unpack ) -import Data.Default ( Default(..) ) -import Scripting.Lua ( LuaState, call, push, pushhsfunction, rawset) -import Text.Pandoc.Class hiding ( readDataFile ) -import Text.Pandoc.Definition ( Pandoc ) -import Text.Pandoc.Lua.Compat ( loadstring ) +import Control.Monad (unless) +import Data.ByteString.Char8 (unpack) +import Data.Default (Default (..)) +import Data.Text (pack) +import Scripting.Lua (LuaState, call, push, pushhsfunction, rawset) +import Text.Pandoc.Class hiding (readDataFile) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Lua.Compat (loadstring) import Text.Pandoc.Lua.StackInstances () -import Text.Pandoc.Readers ( Reader(..), getReader ) -import Text.Pandoc.Shared ( readDataFile ) +import Text.Pandoc.Readers (Reader (..), getReader) +import Text.Pandoc.Shared (readDataFile) -- | Push the "pandoc" on the lua stack. pushPandocModule :: LuaState -> IO () pushPandocModule lua = do script <- pandocModuleScript status <- loadstring lua script "pandoc.lua" - if (status /= 0) - then return () - else do - call lua 0 1 + unless (status /= 0) $ call lua 0 1 push lua "__read" pushhsfunction lua read_doc rawset lua (-3) @@ -57,13 +56,13 @@ pandocModuleScript = unpack <$> readDataFile Nothing "pandoc.lua" read_doc :: String -> String -> IO (Either String Pandoc) read_doc formatSpec content = do case getReader formatSpec of - Left s -> return $ Left s + Left s -> return $ Left s Right reader -> case reader of - StringReader r -> do - res <- runIO $ r def content + TextReader r -> do + res <- runIO $ r def (pack content) case res of - Left s -> return . Left $ show s + Left s -> return . Left $ show s Right pd -> return $ Right pd _ -> return $ Left "Only string formats are supported at the moment." diff --git a/src/Text/Pandoc/Lua/SharedInstances.hs b/src/Text/Pandoc/Lua/SharedInstances.hs index 3d2d29ebf..a5d4ba1e9 100644 --- a/src/Text/Pandoc/Lua/SharedInstances.hs +++ b/src/Text/Pandoc/Lua/SharedInstances.hs @@ -1,5 +1,5 @@ {- -Copyright © 2012-2016 John MacFarlane <jgm@berkeley.edu> +Copyright © 2012-2017 John MacFarlane <jgm@berkeley.edu> 2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify @@ -16,9 +16,9 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -{-# LANGUAGE CPP #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE LambdaCase #-} #if !MIN_VERSION_base(4,8,0) {-# LANGUAGE OverlappingInstances #-} #endif @@ -36,8 +36,8 @@ Shared StackValue instances for pandoc and generic types. -} module Text.Pandoc.Lua.SharedInstances () where -import Scripting.Lua ( LTYPE(..), StackValue(..), newtable ) -import Text.Pandoc.Lua.Util ( addRawInt, addValue, getRawInt, keyValuePairs ) +import Scripting.Lua (LTYPE (..), StackValue (..), newtable) +import Text.Pandoc.Lua.Util (addRawInt, addValue, getRawInt, keyValuePairs) import qualified Data.Map as M import qualified Text.Pandoc.UTF8 as UTF8 @@ -112,5 +112,5 @@ instance (StackValue a, StackValue b) => StackValue (Either a b) where peek lua idx = peek lua idx >>= \case Just left -> return . Just $ Left left Nothing -> fmap Right <$> peek lua idx - valuetype (Left x) = valuetype x + valuetype (Left x) = valuetype x valuetype (Right x) = valuetype x diff --git a/src/Text/Pandoc/Lua/StackInstances.hs b/src/Text/Pandoc/Lua/StackInstances.hs index 03f6e06e2..d2e3f630a 100644 --- a/src/Text/Pandoc/Lua/StackInstances.hs +++ b/src/Text/Pandoc/Lua/StackInstances.hs @@ -1,5 +1,5 @@ {- -Copyright © 2012-2015 John MacFarlane <jgm@berkeley.edu> +Copyright © 2012-2017 John MacFarlane <jgm@berkeley.edu> 2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify @@ -17,11 +17,11 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE LambdaCase #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {- | Module : Text.Pandoc.Lua.StackInstances - Copyright : © 2012-2016 John MacFarlane + Copyright : © 2012-2017 John MacFarlane © 2017 Albert Krewinkel License : GNU GPL, version 2 or above @@ -32,13 +32,13 @@ StackValue instances for pandoc types. -} module Text.Pandoc.Lua.StackInstances () where -import Control.Applicative ( (<|>) ) -import Scripting.Lua - ( LTYPE(..), LuaState, StackValue(..), ltype, newtable, objlen ) +import Control.Applicative ((<|>)) +import Scripting.Lua (LTYPE (..), LuaState, StackValue (..), ltype, newtable, + objlen) import Text.Pandoc.Definition import Text.Pandoc.Lua.SharedInstances () -import Text.Pandoc.Lua.Util ( addValue, getTable, pushViaConstructor ) -import Text.Pandoc.Shared ( safeRead ) +import Text.Pandoc.Lua.Util (addValue, getTable, pushViaConstructor) +import Text.Pandoc.Shared (safeRead) instance StackValue Pandoc where push lua (Pandoc meta blocks) = do diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs index f0b87c231..0a704d027 100644 --- a/src/Text/Pandoc/Lua/Util.hs +++ b/src/Text/Pandoc/Lua/Util.hs @@ -1,5 +1,5 @@ {- -Copyright © 2012-2016 John MacFarlane <jgm@berkeley.edu> +Copyright © 2012-2017 John MacFarlane <jgm@berkeley.edu> 2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify @@ -42,10 +42,8 @@ module Text.Pandoc.Lua.Util , pushViaConstructor ) where -import Scripting.Lua - ( LuaState, StackValue(..) - , call, getglobal2, gettable, next, pop, pushnil, rawgeti, rawseti, settable - ) +import Scripting.Lua (LuaState, StackValue (..), call, getglobal2, gettable, + next, pop, pushnil, rawgeti, rawseti, settable) -- | Adjust the stack index, assuming that @n@ new elements have been pushed on -- the stack. diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 2e4a97b71..162112634 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MIME - Copyright : Copyright (C) 2011-2016 John MacFarlane + Copyright : Copyright (C) 2011-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index b865f97c2..d8d6da345 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -1,7 +1,7 @@ {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {- -Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2014-2015, 2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MediaBag - Copyright : Copyright (C) 2014 John MacFarlane + Copyright : Copyright (C) 2014-2015, 2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,21 +35,15 @@ module Text.Pandoc.MediaBag ( lookupMedia, insertMedia, mediaDirectory, - extractMediaBag ) where -import Control.Monad (when) -import Control.Monad.Trans (MonadIO (..)) import qualified Data.ByteString.Lazy as BL import Data.Data (Data) import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Typeable (Typeable) -import System.Directory (createDirectoryIfMissing) import System.FilePath import qualified System.FilePath.Posix as Posix -import System.IO (stderr) import Text.Pandoc.MIME (MimeType, getMimeTypeDef) -import qualified Text.Pandoc.UTF8 as UTF8 -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' @@ -87,28 +81,3 @@ mediaDirectory :: MediaBag -> [(String, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldWithKey (\fp (mime,contents) -> (((Posix.joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap - --- | Extract contents of MediaBag to a given directory. Print informational --- messages if 'verbose' is true. --- TODO: eventually we may want to put this into PandocMonad --- In PandocPure, it could write to the fake file system... -extractMediaBag :: MonadIO m - => Bool - -> FilePath - -> MediaBag - -> m () -extractMediaBag verbose dir (MediaBag mediamap) = liftIO $ do - sequence_ $ M.foldWithKey - (\fp (_ ,contents) -> - ((writeMedia verbose dir (Posix.joinPath fp, contents)):)) [] mediamap - -writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () -writeMedia verbose dir (subpath, bs) = do - -- we join and split to convert a/b/c to a\b\c on Windows; - -- in zip containers all paths use / - let fullpath = dir </> normalise subpath - createDirectoryIfMissing True $ takeDirectory fullpath - when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath - BL.writeFile fullpath bs - - diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 0b09f0497..c7211c86e 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,7 +1,7 @@ {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} {- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Options - Copyright : Copyright (C) 2012-2016 John MacFarlane + Copyright : Copyright (C) 2012-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -45,6 +45,8 @@ module Text.Pandoc.Options ( module Text.Pandoc.Extensions , def , isEnabled ) where +import Data.Aeson (ToJSON(..), FromJSON(..), + genericToEncoding, defaultOptions) import Data.Data (Data) import Data.Default import qualified Data.Set as Set @@ -104,17 +106,29 @@ data HTMLMathMethod = PlainMath | KaTeX String String -- url of stylesheet and katex.js deriving (Show, Read, Eq, Data, Typeable, Generic) +instance ToJSON HTMLMathMethod where + toEncoding = genericToEncoding defaultOptions +instance FromJSON HTMLMathMethod + data CiteMethod = Citeproc -- use citeproc to render them | Natbib -- output natbib cite commands | Biblatex -- output biblatex cite commands deriving (Show, Read, Eq, Data, Typeable, Generic) +instance ToJSON CiteMethod where + toEncoding = genericToEncoding defaultOptions +instance FromJSON CiteMethod + -- | Methods for obfuscating email addresses in HTML. data ObfuscationMethod = NoObfuscation | ReferenceObfuscation | JavascriptObfuscation deriving (Show, Read, Eq, Data, Typeable, Generic) +instance ToJSON ObfuscationMethod where + toEncoding = genericToEncoding defaultOptions +instance FromJSON ObfuscationMethod + -- | Varieties of HTML slide shows. data HTMLSlideVariant = S5Slides | SlidySlides @@ -124,18 +138,30 @@ data HTMLSlideVariant = S5Slides | NoSlides deriving (Show, Read, Eq, Data, Typeable, Generic) +instance ToJSON HTMLSlideVariant where + toEncoding = genericToEncoding defaultOptions +instance FromJSON HTMLSlideVariant + -- | Options for accepting or rejecting MS Word track-changes. data TrackChanges = AcceptChanges | RejectChanges | AllChanges deriving (Show, Read, Eq, Data, Typeable, Generic) +instance ToJSON TrackChanges where + toEncoding = genericToEncoding defaultOptions +instance FromJSON TrackChanges + -- | Options for wrapping text in the output. data WrapOption = WrapAuto -- ^ Automatically wrap to width | WrapNone -- ^ No non-semantic newlines | WrapPreserve -- ^ Preserve wrapping of input source deriving (Show, Read, Eq, Data, Typeable, Generic) +instance ToJSON WrapOption where + toEncoding = genericToEncoding defaultOptions +instance FromJSON WrapOption + -- | Options defining the type of top-level headers. data TopLevelDivision = TopLevelPart -- ^ Top-level headers become parts | TopLevelChapter -- ^ Top-level headers become chapters @@ -144,12 +170,20 @@ data TopLevelDivision = TopLevelPart -- ^ Top-level headers become parts -- heuristics deriving (Show, Read, Eq, Data, Typeable, Generic) +instance ToJSON TopLevelDivision where + toEncoding = genericToEncoding defaultOptions +instance FromJSON TopLevelDivision + -- | Locations for footnotes and references in markdown output data ReferenceLocation = EndOfBlock -- ^ End of block | EndOfSection -- ^ prior to next section header (or end of document) | EndOfDocument -- ^ at end of document deriving (Show, Read, Eq, Data, Typeable, Generic) +instance ToJSON ReferenceLocation where + toEncoding = genericToEncoding defaultOptions +instance FromJSON ReferenceLocation + -- | Options for writers data WriterOptions = WriterOptions { writerTemplate :: Maybe String -- ^ Template to use diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 696dbacf0..cd75d869d 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.PDF - Copyright : Copyright (C) 2012-2016 John MacFarlane + Copyright : Copyright (C) 2012-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,15 +34,15 @@ module Text.Pandoc.PDF ( makePDF ) where import qualified Codec.Picture as JP import qualified Control.Exception as E -import Control.Monad (unless, when, (<=<)) +import Control.Monad (unless, when) import Control.Monad.Trans (MonadIO (..)) +import qualified Data.Text as T +import Data.Text (Text) import qualified Data.ByteString as BS import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BC -import Data.Digest.Pure.SHA (sha1, showDigest) -import Data.List (isInfixOf) import Data.Maybe (fromMaybe) import Data.Monoid ((<>)) import System.Directory @@ -53,7 +53,7 @@ import System.IO (stdout) import System.IO.Temp (withTempDirectory, withTempFile) import Text.Pandoc.Definition import Text.Pandoc.MediaBag -import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) +import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..)) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.Shared (inDirectory, stringify, withTempDir) @@ -63,8 +63,9 @@ import Text.Pandoc.Writers.Shared (getField, metaToJSON) #ifdef _WINDOWS import Data.List (intercalate) #endif -import Text.Pandoc.Class (PandocIO, fetchItem, report, runIO, runIOorExplode, - setMediaBag, setVerbosity) +import Text.Pandoc.Class (PandocIO, report, runIO, runIOorExplode, + setMediaBag, setVerbosity, getResourcePath, + setResourcePath, fillMediaBag, extractMedia) import Text.Pandoc.Logging #ifdef _WINDOWS @@ -72,16 +73,15 @@ changePathSeparators :: FilePath -> FilePath changePathSeparators = intercalate "/" . splitDirectories #endif -makePDF :: MonadIO m - => String -- ^ pdf creator (pdflatex, lualatex, +makePDF :: String -- ^ pdf creator (pdflatex, lualatex, -- xelatex, context, wkhtmltopdf, pdfroff) - -> (WriterOptions -> Pandoc -> PandocIO String) -- ^ writer + -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer -> WriterOptions -- ^ options -> Verbosity -- ^ verbosity level -> MediaBag -- ^ media -> Pandoc -- ^ document - -> m (Either ByteString ByteString) -makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = liftIO $ do + -> PandocIO (Either ByteString ByteString) +makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = do let mathArgs = case writerHTMLMathMethod opts of -- with MathJax, wait til all math is rendered: MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });", @@ -102,23 +102,20 @@ makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = liftIO $ do ,("margin-left", fromMaybe (Just "1.25in") (getField "margin-left" meta')) ] - source <- runIOorExplode $ do - setVerbosity verbosity - writer opts doc - html2pdf verbosity args source -makePDF "pdfroff" writer opts verbosity _mediabag doc = liftIO $ do - source <- runIOorExplode $ do - setVerbosity verbosity - writer opts doc + source <- writer opts doc + liftIO $ html2pdf verbosity args source +makePDF "pdfroff" writer opts verbosity _mediabag doc = do + source <- writer opts doc let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i", "--no-toc-relocation"] - ms2pdf verbosity args source + liftIO $ ms2pdf verbosity args source makePDF program writer opts verbosity mediabag doc = do let withTemp = if takeBaseName program == "context" then withTempDirectory "." else withTempDir + resourcePath <- getResourcePath liftIO $ withTemp "tex2pdf." $ \tmpdir -> do - doc' <- handleImages verbosity opts mediabag tmpdir doc + doc' <- handleImages verbosity opts resourcePath mediabag tmpdir doc source <- runIOorExplode $ do setVerbosity verbosity writer opts doc' @@ -131,44 +128,19 @@ makePDF program writer opts verbosity mediabag doc = do handleImages :: Verbosity -> WriterOptions + -> [FilePath] -> MediaBag -> FilePath -- ^ temp dir to store images -> Pandoc -- ^ document -> IO Pandoc -handleImages verbosity opts mediabag tmpdir = - walkM (convertImages verbosity tmpdir) <=< - walkM (handleImage' verbosity opts mediabag tmpdir) - -handleImage' :: Verbosity - -> WriterOptions - -> MediaBag - -> FilePath - -> Inline - -> IO Inline -handleImage' verbosity opts mediabag tmpdir (Image attr ils (src,tit)) = do - exists <- doesFileExist src - if exists - then return $ Image attr ils (src,tit) - else do - res <- runIO $ do - setVerbosity verbosity - setMediaBag mediabag - fetchItem (writerSourceURL opts) src - case res of - Right (contents, Just mime) -> do - let ext = fromMaybe (takeExtension src) $ - extensionFromMimeType mime - let basename = showDigest $ sha1 $ BL.fromChunks [contents] - let fname = tmpdir </> basename <.> ext - BS.writeFile fname contents - return $ Image attr ils (fname,tit) - _ -> do - runIO $ do - setVerbosity verbosity - report $ CouldNotFetchResource src "skipping..." - -- return alt text - return $ Emph ils -handleImage' _ _ _ _ x = return x +handleImages verbosity opts resourcePath mediabag tmpdir doc = do + doc' <- runIOorExplode $ do + setVerbosity verbosity + setResourcePath resourcePath + setMediaBag mediabag + fillMediaBag (writerSourceURL opts) doc >>= + extractMedia tmpdir + walkM (convertImages verbosity tmpdir) doc' convertImages :: Verbosity -> FilePath -> Inline -> IO Inline convertImages verbosity tmpdir (Image attr ils (src, tit)) = do @@ -191,6 +163,7 @@ convertImage tmpdir fname = Just "image/png" -> doNothing Just "image/jpeg" -> doNothing Just "application/pdf" -> doNothing + Just "image/svg+xml" -> return $ Left "conversion from svg not supported" _ -> JP.readImage fname >>= \res -> case res of Left e -> return $ Left e @@ -206,10 +179,10 @@ tex2pdf' :: Verbosity -- ^ Verbosity level -> [String] -- ^ Arguments to the latex-engine -> FilePath -- ^ temp directory for output -> String -- ^ tex program - -> String -- ^ tex source + -> Text -- ^ tex source -> IO (Either ByteString ByteString) tex2pdf' verbosity args tmpDir program source = do - let numruns = if "\\tableofcontents" `isInfixOf` source + let numruns = if "\\tableofcontents" `T.isInfixOf` source then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks (exit, log', mbPdf) <- runTeXProgram verbosity program args 1 numruns tmpDir source @@ -251,11 +224,11 @@ extractConTeXtMsg log' = do -- contents of stdout, contents of produced PDF if any). Rerun -- a fixed number of times to resolve references. runTeXProgram :: Verbosity -> String -> [String] -> Int -> Int -> FilePath - -> String -> IO (ExitCode, ByteString, Maybe ByteString) + -> Text -> IO (ExitCode, ByteString, Maybe ByteString) runTeXProgram verbosity program args runNumber numRuns tmpDir source = do let file = tmpDir </> "input.tex" exists <- doesFileExist file - unless exists $ UTF8.writeFile file source + unless exists $ BS.writeFile file $ UTF8.fromText source #ifdef _WINDOWS -- note: we want / even on Windows, for TexLive let tmpDir' = changePathSeparators tmpDir @@ -304,7 +277,7 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do ms2pdf :: Verbosity -> [String] - -> String + -> Text -> IO (Either ByteString ByteString) ms2pdf verbosity args source = do env' <- getEnvironment @@ -316,10 +289,10 @@ ms2pdf verbosity args source = do mapM_ print env' putStr "\n" putStrLn $ "[makePDF] Contents:\n" - putStr source + putStr $ T.unpack source putStr "\n" (exit, out) <- pipeProcess (Just env') "pdfroff" args - (UTF8.fromStringLazy source) + (BL.fromStrict $ UTF8.fromText source) when (verbosity >= INFO) $ do B.hPutStr stdout out putStr "\n" @@ -329,12 +302,12 @@ ms2pdf verbosity args source = do html2pdf :: Verbosity -- ^ Verbosity level -> [String] -- ^ Args to wkhtmltopdf - -> String -- ^ HTML5 source + -> Text -- ^ HTML5 source -> IO (Either ByteString ByteString) html2pdf verbosity args source = do file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp - UTF8.writeFile file source + BS.writeFile file $ UTF8.fromText source let programArgs = args ++ [file, pdfFile] env' <- getEnvironment when (verbosity >= INFO) $ do @@ -369,11 +342,11 @@ html2pdf verbosity args source = do context2pdf :: Verbosity -- ^ Verbosity level -> FilePath -- ^ temp directory for output - -> String -- ^ ConTeXt source + -> Text -- ^ ConTeXt source -> IO (Either ByteString ByteString) context2pdf verbosity tmpDir source = inDirectory tmpDir $ do let file = "input.tex" - UTF8.writeFile file source + BS.writeFile file $ UTF8.fromText source #ifdef _WINDOWS -- note: we want / even on Windows, for TexLive let tmpDir' = changePathSeparators tmpDir diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index a6d3cd46a..cd51bff69 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -7,7 +7,7 @@ , IncoherentInstances #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,7 +26,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -36,6 +36,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA A utility library with parsers used in pandoc readers. -} module Text.Pandoc.Parsing ( anyLine, + anyLineNewline, + indentWith, many1Till, notFollowedBy', oneOfStrings, @@ -48,6 +50,7 @@ module Text.Pandoc.Parsing ( anyLine, enclosed, stringAnyCase, parseFromString, + parseFromString', lineClump, charsInBalanced, romanNumeral, @@ -66,6 +69,7 @@ module Text.Pandoc.Parsing ( anyLine, tableWith, widthsFromIndices, gridTableWith, + gridTableWith', readWith, readWithM, testStringWith, @@ -82,6 +86,7 @@ module Text.Pandoc.Parsing ( anyLine, HasMacros (..), HasLogMessages (..), HasLastStrPosition (..), + HasIncludeFiles (..), defaultParserState, HeaderType (..), ParserContext (..), @@ -119,6 +124,7 @@ module Text.Pandoc.Parsing ( anyLine, (<+?>), extractIdClass, insertIncludedFile, + insertIncludedFileF, -- * Re-exports from Text.Pandoc.Parsec Stream, runParser, @@ -252,12 +258,28 @@ anyLine = do return this _ -> mzero +-- | Parse any line, include the final newline in the output +anyLineNewline :: Stream [Char] m Char => ParserT [Char] st m [Char] +anyLineNewline = (++ "\n") <$> anyLine + +-- | Parse indent by specified number of spaces (or equiv. tabs) +indentWith :: Stream [Char] m Char + => HasReaderOptions st + => Int -> ParserT [Char] st m [Char] +indentWith num = do + tabStop <- getOption readerTabStop + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')) + , try (char '\t' >> indentWith (num - tabStop)) ] + -- | Like @manyTill@, but reads at least one item. -many1Till :: Stream s m t +many1Till :: (Show end, Stream s m t) => ParserT s st m a -> ParserT s st m end -> ParserT s st m [a] many1Till p end = do + notFollowedBy' end first <- p rest <- manyTill p end return (first:rest) @@ -322,7 +344,7 @@ blanklines :: Stream s m Char => ParserT s st m [Char] blanklines = many1 blankline -- | Parses material enclosed between start and end parsers. -enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser +enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser -> ParserT s st m end -- ^ end parser -> ParserT s st m a -- ^ content parser (to be used repeatedly) -> ParserT s st m [a] @@ -338,7 +360,10 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a +parseFromString :: Monad m + => ParserT String st m a + -> String + -> ParserT String st m a parseFromString parser str = do oldPos <- getPosition oldInput <- getInput @@ -350,6 +375,18 @@ parseFromString parser str = do setPosition oldPos return result +-- | Like 'parseFromString' but specialized for 'ParserState'. +-- This resets 'stateLastStrPos', which is almost always what we want. +parseFromString' :: Monad m + => ParserT String ParserState m a + -> String + -> ParserT String ParserState m a +parseFromString' parser str = do + oldStrPos <- stateLastStrPos <$> getState + res <- parseFromString parser str + updateState $ \st -> st{ stateLastStrPos = oldStrPos } + return res + -- | Parse raw line block up to and including blank lines. lineClump :: Stream [Char] m Char => ParserT [Char] st m String lineClump = blanklines @@ -445,33 +482,8 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p)) --- Schemes from http://www.iana.org/assignments/uri-schemes.html plus --- the unofficial schemes coap, doi, javascript, isbn, pmid -schemes :: [String] -schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid", - "crid","data","dav","dict","dns","file","ftp","geo","go","gopher", - "h323","http","https","iax","icap","im","imap","info","ipp","iris", - "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid", - "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp", - "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve", - "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet", - "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon", - "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s", - "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin", - "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee", - "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb", - "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject", - "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms", - "keyparc","lastfm","ldaps","magnet","maps","market","message","mms", - "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi", - "platform","proxy","psyc","query","res","resource","rmi","rsync", - "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify", - "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004", - "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri", - "ymsgr", "isbn", "pmid"] - uriScheme :: Stream s m Char => ParserT s st m String -uriScheme = oneOfStringsCI schemes +uriScheme = oneOfStringsCI (Set.toList schemes) -- | Parses a URI. Returns pair of original and URI-escaped version. uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) @@ -762,21 +774,36 @@ lineBlockLines = try $ do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. -tableWith :: Stream s m Char - => ParserT s ParserState m ([Blocks], [Alignment], [Int]) - -> ([Int] -> ParserT s ParserState m [Blocks]) - -> ParserT s ParserState m sep - -> ParserT s ParserState m end - -> ParserT s ParserState m Blocks +tableWith :: (Stream s m Char, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT s st m (mf [Blocks], [Alignment], [Int]) + -> ([Int] -> ParserT s st m (mf [Blocks])) + -> ParserT s st m sep + -> ParserT s st m end + -> ParserT s st m (mf Blocks) tableWith headerParser rowParser lineParser footerParser = try $ do + (aligns, widths, heads, rows) <- tableWith' headerParser rowParser + lineParser footerParser + return $ B.table mempty (zip aligns widths) <$> heads <*> rows + +type TableComponents mf = ([Alignment], [Double], mf [Blocks], mf [[Blocks]]) + +tableWith' :: (Stream s m Char, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT s st m (mf [Blocks], [Alignment], [Int]) + -> ([Int] -> ParserT s st m (mf [Blocks])) + -> ParserT s st m sep + -> ParserT s st m end + -> ParserT s st m (TableComponents mf) +tableWith' headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy1` lineParser + lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser footerParser numColumns <- getOption readerColumns let widths = if (indices == []) then replicate (length aligns) 0.0 else widthsFromIndices numColumns indices - return $ B.table mempty (zip aligns widths) heads lines' + return $ (aligns, widths, heads, lines') -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal @@ -809,25 +836,44 @@ widthsFromIndices numColumns' indices = -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: Stream [Char] m Char - => ParserT [Char] ParserState m Blocks -- ^ Block list parser - -> Bool -- ^ Headerless table - -> ParserT [Char] ParserState m Blocks +gridTableWith :: (Stream [Char] m Char, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT [Char] st m (mf Blocks) -- ^ Block list parser + -> Bool -- ^ Headerless table + -> ParserT [Char] st m (mf Blocks) gridTableWith blocks headless = tableWith (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter +gridTableWith' :: (Stream [Char] m Char, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT [Char] st m (mf Blocks) -- ^ Block list parser + -> Bool -- ^ Headerless table + -> ParserT [Char] st m (TableComponents mf) +gridTableWith' blocks headless = + tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) + (gridTableSep '-') gridTableFooter + gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ trimr line -gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int) +gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment) gridPart ch = do + leftColon <- option False (True <$ char ':') dashes <- many1 (char ch) + rightColon <- option False (True <$ char ':') char '+' - return (length dashes, length dashes + 1) - -gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)] + let lengthDashes = length dashes + (if leftColon then 1 else 0) + + (if rightColon then 1 else 0) + let alignment = case (leftColon, rightColon) of + (True, True) -> AlignCenter + (True, False) -> AlignLeft + (False, True) -> AlignRight + (False, False) -> AlignDefault + return ((lengthDashes, lengthDashes + 1), alignment) + +gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String @@ -835,14 +881,14 @@ removeFinalBar = reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. -gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char +gridTableSep :: Stream s m Char => Char -> ParserT s st m Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. -gridTableHeader :: Stream [Char] m Char +gridTableHeader :: (Stream [Char] m Char, Functor mf, Applicative mf, Monad mf) => Bool -- ^ Headerless table - -> ParserT [Char] ParserState m Blocks - -> ParserT [Char] ParserState m ([Blocks], [Alignment], [Int]) + -> ParserT [Char] st m (mf Blocks) + -> ParserT [Char] st m (mf [Blocks], [Alignment], [Int]) gridTableHeader headless blocks = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -851,36 +897,40 @@ gridTableHeader headless blocks = try $ do else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () - let lines' = map snd dashes + underDashes <- if headless + then return dashes + else gridDashedLines '=' + guard $ length dashes == length underDashes + let lines' = map (snd . fst) underDashes let indices = scanl (+) 0 lines' - let aligns = replicate (length lines') AlignDefault - -- RST does not have a notion of alignments + let aligns = map snd underDashes let rawHeads = if headless - then replicate (length dashes) "" - else map (intercalate " ") $ transpose + then replicate (length underDashes) "" + else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- mapM (parseFromString blocks) $ map trim rawHeads + heads <- fmap sequence $ mapM (parseFromString blocks . trim) rawHeads return (heads, aligns, indices) -gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String] +gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline return (gridTableSplitLine indices line) -- | Parse row of grid table. -gridTableRow :: Stream [Char] m Char - => ParserT [Char] ParserState m Blocks +gridTableRow :: (Stream [Char] m Char, Functor mf, Applicative mf, Monad mf) + => ParserT [Char] st m (mf Blocks) -> [Int] - -> ParserT [Char] ParserState m [Blocks] + -> ParserT [Char] st m (mf [Blocks]) gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - mapM (liftM compactifyCell . parseFromString blocks) cols + compactifyCell bs = case compactify [bs] of + [] -> mempty + x:_ -> x + cells <- sequence <$> mapM (parseFromString blocks) cols + return $ fmap (map compactifyCell) cells removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -890,11 +940,8 @@ removeOneLeadingSpace xs = where startsWithSpace "" = True startsWithSpace (y:_) = y == ' ' -compactifyCell :: Blocks -> Blocks -compactifyCell bs = head $ compactify [bs] - -- | Parse footer for a grid table. -gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char] +gridTableFooter :: Stream s m Char => ParserT s st m [Char] gridTableFooter = blanklines --- @@ -937,6 +984,7 @@ data ParserState = ParserState stateSubstitutions :: SubstTable, -- ^ List of substitution references stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) + stateNoteRefs :: Set.Set String, -- ^ List of note references used stateMeta :: Meta, -- ^ Document metadata stateMeta' :: F Meta, -- ^ Document metadata stateCitations :: M.Map String String, -- ^ RST-style citations @@ -972,6 +1020,9 @@ class HasReaderOptions st where -- default getOption f = (f . extractReaderOptions) <$> getState +instance HasReaderOptions ParserState where + extractReaderOptions = stateOptions + class HasQuoteContext st m where getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a @@ -987,9 +1038,6 @@ instance Monad m => HasQuoteContext ParserState m where setState newState { stateQuoteContext = oldQuoteContext } return result -instance HasReaderOptions ParserState where - extractReaderOptions = stateOptions - class HasHeaderMap st where extractHeaderMap :: st -> M.Map Inlines String updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> @@ -1031,6 +1079,16 @@ instance HasLogMessages ParserState where addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st } getLogMessages st = reverse $ stateLogMessages st +class HasIncludeFiles st where + getIncludeFiles :: st -> [String] + addIncludeFile :: String -> st -> st + dropLatestIncludeFile :: st -> st + +instance HasIncludeFiles ParserState where + getIncludeFiles = stateContainers + addIncludeFile f s = s{ stateContainers = f : stateContainers s } + dropLatestIncludeFile s = s { stateContainers = drop 1 $ stateContainers s } + defaultParserState :: ParserState defaultParserState = ParserState { stateOptions = def, @@ -1043,7 +1101,8 @@ defaultParserState = stateHeaderKeys = M.empty, stateSubstitutions = M.empty, stateNotes = [], - stateNotes' = [], + stateNotes' = M.empty, + stateNoteRefs = Set.empty, stateMeta = nullMeta, stateMeta' = return nullMeta, stateCitations = M.empty, @@ -1110,7 +1169,8 @@ data QuoteContext type NoteTable = [(String, String)] -type NoteTable' = [(String, F Blocks)] -- used in markdown reader +type NoteTable' = M.Map String (SourcePos, F Blocks) +-- used in markdown reader newtype Key = Key String deriving (Show, Read, Eq, Ord) @@ -1322,17 +1382,18 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs') Nothing -> cls kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs -insertIncludedFile :: PandocMonad m - => ParserT String ParserState m Blocks - -> [FilePath] -> FilePath - -> ParserT String ParserState m Blocks -insertIncludedFile blocks dirs f = do +insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st, + Functor mf, Applicative mf, Monad mf) + => ParserT String st m (mf Blocks) + -> [FilePath] -> FilePath + -> ParserT String st m (mf Blocks) +insertIncludedFile' blocks dirs f = do oldPos <- getPosition oldInput <- getInput - containers <- stateContainers <$> getState + containers <- getIncludeFiles <$> getState when (f `elem` containers) $ throwError $ PandocParseError $ "Include file loop at " ++ show oldPos - updateState $ \s -> s{ stateContainers = f : stateContainers s } + updateState $ addIncludeFile f mbcontents <- readFileFromDirs dirs f contents <- case mbcontents of Just s -> return s @@ -1344,5 +1405,22 @@ insertIncludedFile blocks dirs f = do bs <- blocks setInput oldInput setPosition oldPos - updateState $ \s -> s{ stateContainers = tail $ stateContainers s } + updateState dropLatestIncludeFile return bs + +-- | Parse content of include file as blocks. Circular includes result in an +-- @PandocParseError@. +insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) + => ParserT String st m Blocks + -> [FilePath] -> FilePath + -> ParserT String st m Blocks +insertIncludedFile blocks dirs f = + runIdentity <$> insertIncludedFile' (Identity <$> blocks) dirs f + +-- | Parse content of include file as future blocks. Circular includes result in +-- an @PandocParseError@. +insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) + => ParserT String st m (Future st Blocks) + -> [FilePath] -> FilePath + -> ParserT String st m (Future st Blocks) +insertIncludedFileF = insertIncludedFile' diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 32e60843c..d78a2f1d9 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,7 +1,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {- -Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA {- | Module : Text.Pandoc.Pretty - Copyright : Copyright (C) 2010-2016 John MacFarlane + Copyright : Copyright (C) 2010-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -108,10 +108,10 @@ data D = Text Int String | CarriageReturn | NewLine | BlankLines Int -- number of blank lines - deriving (Show) + deriving (Show, Eq) newtype Doc = Doc { unDoc :: Seq D } - deriving (Monoid, Show) + deriving (Monoid, Show, Eq) instance IsString Doc where fromString = text diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index 1014f37dd..b2a0c17f1 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Process - Copyright : Copyright (C) 2013-2016 John MacFarlane + Copyright : Copyright (C) 2013-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index e2d40336c..004fefe25 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -93,36 +93,37 @@ import Text.Pandoc.Shared (mapLeft) import Text.Parsec.Error import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.ByteString.Lazy as BL +import Data.Text (Text) -data Reader m = StringReader (ReaderOptions -> String -> m Pandoc) +data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc) | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc) -- | Association list of formats and readers. readers :: PandocMonad m => [(String, Reader m)] -readers = [ ("native" , StringReader readNative) - ,("json" , StringReader $ \o s -> +readers = [ ("native" , TextReader readNative) + ,("json" , TextReader $ \o s -> case readJSON o s of Right doc -> return doc Left _ -> throwError $ PandocParseError "JSON parse error") - ,("markdown" , StringReader readMarkdown) - ,("markdown_strict" , StringReader readMarkdown) - ,("markdown_phpextra" , StringReader readMarkdown) - ,("markdown_github" , StringReader readMarkdown) - ,("markdown_mmd", StringReader readMarkdown) - ,("commonmark" , StringReader readCommonMark) - ,("rst" , StringReader readRST) - ,("mediawiki" , StringReader readMediaWiki) - ,("docbook" , StringReader readDocBook) - ,("opml" , StringReader readOPML) - ,("org" , StringReader readOrg) - ,("textile" , StringReader readTextile) -- TODO : textile+lhs - ,("html" , StringReader readHtml) - ,("latex" , StringReader readLaTeX) - ,("haddock" , StringReader readHaddock) - ,("twiki" , StringReader readTWiki) + ,("markdown" , TextReader readMarkdown) + ,("markdown_strict" , TextReader readMarkdown) + ,("markdown_phpextra" , TextReader readMarkdown) + ,("markdown_github" , TextReader readMarkdown) + ,("markdown_mmd", TextReader readMarkdown) + ,("commonmark" , TextReader readCommonMark) + ,("rst" , TextReader readRST) + ,("mediawiki" , TextReader readMediaWiki) + ,("docbook" , TextReader readDocBook) + ,("opml" , TextReader readOPML) + ,("org" , TextReader readOrg) + ,("textile" , TextReader readTextile) -- TODO : textile+lhs + ,("html" , TextReader readHtml) + ,("latex" , TextReader readLaTeX) + ,("haddock" , TextReader readHaddock) + ,("twiki" , TextReader readTWiki) ,("docx" , ByteStringReader readDocx) ,("odt" , ByteStringReader readOdt) - ,("t2t" , StringReader readTxt2Tags) + ,("t2t" , TextReader readTxt2Tags) ,("epub" , ByteStringReader readEPUB) ] @@ -134,7 +135,7 @@ getReader s = Right (readerName, setExts) -> case lookup readerName readers of Nothing -> Left $ "Unknown reader: " ++ readerName - Just (StringReader r) -> Right $ StringReader $ \o -> + Just (TextReader r) -> Right $ TextReader $ \o -> r o{ readerExtensions = setExts $ getDefaultExtensions readerName } Just (ByteStringReader r) -> Right $ ByteStringReader $ \o -> @@ -142,5 +143,6 @@ getReader s = getDefaultExtensions readerName } -- | Read pandoc document from JSON format. -readJSON :: ReaderOptions -> String -> Either PandocError Pandoc -readJSON _ = mapLeft PandocParseError . eitherDecode' . UTF8.fromStringLazy +readJSON :: ReaderOptions -> Text -> Either PandocError Pandoc +readJSON _ = + mapLeft PandocParseError . eitherDecode' . BL.fromStrict . UTF8.fromText diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index e98ee066e..3c62f8db5 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -34,15 +34,15 @@ where import CMark import Data.List (groupBy) -import Data.Text (pack, unpack) +import Data.Text (Text, unpack) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options -- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readCommonMark :: PandocMonad m => ReaderOptions -> String -> m Pandoc +readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ - nodeToPandoc $ commonmarkToNode opts' $ pack s + nodeToPandoc $ commonmarkToNode opts' s where opts' = if extensionEnabled Ext_smart (readerExtensions opts) then [optNormalize, optSmart] else [optNormalize] diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index bef256a93..bd3c7c356 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -16,6 +16,8 @@ import Text.TeXMath (readMathML, writeTeX) import Data.Default import Data.Foldable (asum) import Text.Pandoc.Class (PandocMonad) +import Data.Text (Text) +import qualified Data.Text as T {- @@ -522,11 +524,11 @@ instance Default DBState where , dbContent = [] } -readDocBook :: PandocMonad m => ReaderOptions -> String -> m Pandoc +readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ inp + let tree = normalizeTree . parseXML . handleInstructions $ T.unpack inp (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree - return $ Pandoc (dbMeta st') (toList . mconcat $ bs) + return $ Pandoc (dbMeta st') (toList . mconcat $ bs) -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 683277993..2757314ab 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -3,7 +3,7 @@ {-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2017 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal + Copyright : Copyright (C) 2014-2017 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 94b4d919a..8be2e1894 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2017 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx.Lists - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal + Copyright : Copyright (C) 2014-2017 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 0f23555f4..e6736100f 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2017 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx.Parse - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal + Copyright : Copyright (C) 2014-2017 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index db58e9654..c0d8029dc 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -13,6 +13,8 @@ import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) +import qualified Data.Text.Lazy.Encoding as TL +import qualified Data.Text.Lazy as TL import Data.List (isInfixOf, isPrefixOf) import qualified Data.Map as M (Map, elems, fromList, lookup) import Data.Maybe (fromMaybe, mapMaybe) @@ -73,7 +75,7 @@ archiveToEPUB os archive = do mimeToReader "application/xhtml+xml" (unEscapeString -> root) (unEscapeString -> path) = do fname <- findEntryByPathE (root </> path) archive - html <- readHtml os' . UTF8.toStringLazy $ fromEntry fname + html <- readHtml os' . TL.toStrict . TL.decodeUtf8 $ fromEntry fname return $ fixInternalReferences path html mimeToReader s _ (unEscapeString -> path) | s `elem` imageMimes = return $ imageToPandoc path diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 14b051539..94f933c4d 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,7 +1,7 @@ {-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, -ViewPatterns#-} +ViewPatterns, OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,6 +34,7 @@ module Text.Pandoc.Readers.HTML ( readHtml , htmlInBalanced , isInlineTag , isBlockTag + , NamedTag(..) , isTextTag , isCommentTag ) where @@ -43,7 +44,7 @@ import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) -import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField +import Text.Pandoc.Shared ( extractSpaces, addMetaField , escapeURI, safeRead ) import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled, Extension (Ext_epub_html_exts, @@ -53,12 +54,14 @@ import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import qualified Data.Map as M import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf, isPrefixOf ) +import Data.List ( intercalate, isPrefixOf ) import Data.Char ( isDigit, isLetter, isAlphaNum ) import Control.Monad ( guard, mzero, void, unless ) import Control.Arrow ((***)) import Control.Applicative ( (<|>) ) import Data.Monoid (First (..)) +import Data.Text (Text) +import qualified Data.Text as T import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (ask, asks, local, ReaderT, runReaderT, lift) @@ -74,11 +77,12 @@ import Control.Monad.Except (throwError) -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readHtml opts inp = do let tags = stripPrefixes . canonicalizeTags $ - parseTagsOptions parseOptions{ optTagPosition = True } inp + parseTagsOptions parseOptions{ optTagPosition = True } + inp parseDoc = do blocks <- (fixPlains False) . mconcat <$> manyTill block eof meta <- stateMeta . parserState <$> getState @@ -128,7 +132,7 @@ setInPlain = local (\s -> s {inPlain = True}) type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m) -type TagParser m = HTMLParser m [Tag String] +type TagParser m = HTMLParser m [Tag Text] pBody :: PandocMonad m => TagParser m Blocks pBody = pInTags "body" block @@ -138,12 +142,12 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag where pTitle = pInTags "title" inline >>= setTitle . trimInlines setTitle t = mempty <$ (updateState $ B.setMeta "title" t) pMetaTag = do - mt <- pSatisfy (~== TagOpen "meta" []) - let name = fromAttrib "name" mt + mt <- pSatisfy (matchTagOpen "meta" []) + let name = T.unpack $ fromAttrib "name" mt if null name then return mempty else do - let content = fromAttrib "content" mt + let content = T.unpack $ fromAttrib "content" mt updateState $ \s -> let ps = parserState s in s{ parserState = ps{ @@ -151,9 +155,9 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag (stateMeta ps) } } return mempty pBaseTag = do - bt <- pSatisfy (~== TagOpen "base" []) + bt <- pSatisfy (matchTagOpen "base" []) updateState $ \st -> st{ baseHref = - parseURIReference $ fromAttrib "href" bt } + parseURIReference $ T.unpack $ fromAttrib "href" bt } return mempty block :: PandocMonad m => TagParser m Blocks @@ -193,29 +197,31 @@ eSwitch :: (PandocMonad m, Monoid a) -> TagParser m a eSwitch constructor parser = try $ do guardEnabled Ext_epub_html_exts - pSatisfy (~== TagOpen "switch" []) + pSatisfy (matchTagOpen "switch" []) cases <- getFirst . mconcat <$> manyTill (First <$> (eCase <* skipMany pBlank) ) - (lookAhead $ try $ pSatisfy (~== TagOpen "default" [])) + (lookAhead $ try $ pSatisfy (matchTagOpen "default" [])) skipMany pBlank fallback <- pInTags "default" (skipMany pBlank *> parser <* skipMany pBlank) skipMany pBlank - pSatisfy (~== TagClose "switch") + pSatisfy (matchTagClose "switch") return $ maybe fallback constructor cases eCase :: PandocMonad m => TagParser m (Maybe Inlines) eCase = do skipMany pBlank - TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" []) + TagOpen _ attr' <- lookAhead $ pSatisfy $ (matchTagOpen "case" []) + let attr = toStringAttr attr' case (flip lookup namespaces) =<< lookup "required-namespace" attr of Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank)) - Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case")) + Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (matchTagClose "case")) eFootnote :: PandocMonad m => TagParser m () eFootnote = try $ do let notes = ["footnote", "rearnote"] guardEnabled Ext_epub_html_exts - (TagOpen tag attr) <- lookAhead $ pAnyTag + (TagOpen tag attr') <- lookAhead $ pAnyTag + let attr = toStringAttr attr' guard (maybe False (flip elem notes) (lookup "type" attr)) let ident = fromMaybe "" (lookup "id" attr) content <- pInTags tag block @@ -227,7 +233,8 @@ addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s) eNoteref :: PandocMonad m => TagParser m Inlines eNoteref = try $ do guardEnabled Ext_epub_html_exts - TagOpen tag attr <- lookAhead $ pAnyTag + TagOpen tag attr' <- lookAhead $ pAnyTag + let attr = toStringAttr attr' guard (maybe False (== "noteref") (lookup "type" attr)) let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr) guard (not (null ident)) @@ -247,10 +254,10 @@ pList = pBulletList <|> pOrderedList <|> pDefinitionList pBulletList :: PandocMonad m => TagParser m Blocks pBulletList = try $ do - pSatisfy (~== TagOpen "ul" []) + pSatisfy (matchTagOpen "ul" []) let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && - not (t ~== TagClose "ul")) + not (matchTagClose "ul" t)) -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem @@ -259,7 +266,8 @@ pBulletList = try $ do pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks pListItem nonItem = do - TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" []) + TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" []) + let attr = toStringAttr attr' let addId ident bs = case B.toList bs of (Plain ils:xs) -> B.fromList (Plain [Span (ident, [], []) ils] : xs) @@ -285,7 +293,8 @@ parseTypeAttr _ = DefaultStyle pOrderedList :: PandocMonad m => TagParser m Blocks pOrderedList = try $ do - TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) + TagOpen _ attribs' <- pSatisfy (matchTagOpen "ol" []) + let attribs = toStringAttr attribs' let (start, style) = (sta', sty') where sta = fromMaybe "1" $ lookup "start" attribs @@ -307,7 +316,7 @@ pOrderedList = try $ do ] let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && - not (t ~== TagClose "ol")) + not (matchTagClose "ol" t)) -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem @@ -316,14 +325,14 @@ pOrderedList = try $ do pDefinitionList :: PandocMonad m => TagParser m Blocks pDefinitionList = try $ do - pSatisfy (~== TagOpen "dl" []) + pSatisfy (matchTagOpen "dl" []) items <- manyTill pDefListItem (pCloses "dl") return $ B.definitionList items pDefListItem :: PandocMonad m => TagParser m (Inlines, [Blocks]) pDefListItem = try $ do - let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) && - not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl")) + let nonItem = pSatisfy (\t -> not (matchTagOpen "dt" [] t) && + not (matchTagOpen "dd" [] t) && not (matchTagClose "dl" t)) terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline) defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) skipMany nonItem @@ -346,12 +355,12 @@ fixPlains inList bs = if any isParaish bs' plainToPara x = x bs' = B.toList bs -pRawTag :: PandocMonad m => TagParser m String +pRawTag :: PandocMonad m => TagParser m Text pRawTag = do tag <- pAnyTag let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"] if tagOpen ignorable (const True) tag || tagClose ignorable tag - then return [] + then return mempty else return $ renderTags' [tag] pDiv :: PandocMonad m => TagParser m Blocks @@ -360,7 +369,8 @@ pDiv = try $ do let isDivLike "div" = True isDivLike "section" = True isDivLike _ = False - TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) + TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) + let attr = toStringAttr attr' contents <- pInTags tag block let (ident, classes, kvs) = mkAttr attr let classes' = if tag == "section" @@ -370,7 +380,7 @@ pDiv = try $ do pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do - raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag + raw <- T.unpack <$> (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag) exts <- getOption readerExtensions if extensionEnabled Ext_raw_html exts && not (null raw) then return $ B.rawBlock "html" raw @@ -385,33 +395,35 @@ ignore raw = do logMessage $ SkippedContent raw pos return mempty -pHtmlBlock :: PandocMonad m => String -> TagParser m String +pHtmlBlock :: PandocMonad m => Text -> TagParser m Text pHtmlBlock t = try $ do - open <- pSatisfy (~== TagOpen t []) - contents <- manyTill pAnyTag (pSatisfy (~== TagClose t)) - return $ renderTags' $ [open] ++ contents ++ [TagClose t] + open <- pSatisfy (matchTagOpen t []) + contents <- manyTill pAnyTag (pSatisfy (matchTagClose t)) + return $ renderTags' $ [open] <> contents <> [TagClose t] -- Sets chapter context eSection :: PandocMonad m => TagParser m Blocks eSection = try $ do - let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as) + let matchChapter as = maybe False (T.isInfixOf "chapter") (lookup "type" as) let sectTag = tagOpen (`elem` sectioningContent) matchChapter TagOpen tag _ <- lookAhead $ pSatisfy sectTag setInChapter (pInTags tag block) -headerLevel :: PandocMonad m => String -> TagParser m Int +headerLevel :: PandocMonad m => Text -> TagParser m Int headerLevel tagtype = do - let level = read (drop 1 tagtype) - (try $ do - guardEnabled Ext_epub_html_exts - asks inChapter >>= guard - return (level - 1)) - <|> - return level + case safeRead (T.unpack (T.drop 1 tagtype)) of + Just level -> + (try $ do + guardEnabled Ext_epub_html_exts + asks inChapter >>= guard + return (level - 1)) + <|> + return level + Nothing -> fail "Could not retrieve header level" eTitlePage :: PandocMonad m => TagParser m () eTitlePage = try $ do - let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as) + let isTitlePage as = maybe False (T.isInfixOf "titlepage") (lookup "type" as) let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section") isTitlePage TagOpen tag _ <- lookAhead $ pSatisfy groupTag @@ -419,19 +431,21 @@ eTitlePage = try $ do pHeader :: PandocMonad m => TagParser m Blocks pHeader = try $ do - TagOpen tagtype attr <- pSatisfy $ + TagOpen tagtype attr' <- pSatisfy $ tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) (const True) - let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] + let attr = toStringAttr attr' + let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text) + [("class","title")] level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] - attr' <- registerHeader (ident, classes, keyvals) contents + attr'' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle then mempty -- skip a representation of the title in the body - else B.headerWith attr' level contents + else B.headerWith attr'' level contents pHrule :: PandocMonad m => TagParser m Blocks pHrule = do @@ -440,7 +454,7 @@ pHrule = do pTable :: PandocMonad m => TagParser m Blocks pTable = try $ do - TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) + TagOpen _ _ <- pSatisfy (matchTagOpen "table" []) skipMany pBlank caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank widths' <- (mconcat <$> many1 pColgroup) <|> many pCol @@ -454,8 +468,8 @@ pTable = try $ do else return head'' rowsLs <- many pTBody rows' <- pOptInTag "tfoot" $ many pTr - TagClose _ <- pSatisfy (~== TagClose "table") - let rows'' = (concat rowsLs) ++ rows' + TagClose _ <- pSatisfy (matchTagClose "table") + let rows'' = (concat rowsLs) <> rows' -- fail on empty table guard $ not $ null head' && null rows'' let isSinglePlain x = case B.toList x of @@ -466,7 +480,7 @@ pTable = try $ do let cols = length $ if null head' then head rows'' else head' -- add empty cells to short rows let addEmpties r = case cols - length r of - n | n > 0 -> r ++ replicate n mempty + n | n > 0 -> r <> replicate n mempty | otherwise -> r let rows = map addEmpties rows'' let aligns = replicate cols AlignDefault @@ -479,15 +493,16 @@ pTable = try $ do pCol :: PandocMonad m => TagParser m Double pCol = try $ do - TagOpen _ attribs <- pSatisfy (~== TagOpen "col" []) + TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" []) + let attribs = toStringAttr attribs' skipMany pBlank - optional $ pSatisfy (~== TagClose "col") + optional $ pSatisfy (matchTagClose "col") skipMany pBlank return $ case lookup "width" attribs of Nothing -> case lookup "style" attribs of Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs -> fromMaybe 0.0 $ safeRead ('0':'.':filter - (`notElem` " \t\r\n%'\";") xs) + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) _ -> 0.0 Just x | not (null x) && last x == '%' -> fromMaybe 0.0 $ safeRead ('0':'.':init x) @@ -495,18 +510,18 @@ pCol = try $ do pColgroup :: PandocMonad m => TagParser m [Double] pColgroup = try $ do - pSatisfy (~== TagOpen "colgroup" []) + pSatisfy (matchTagOpen "colgroup" []) skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank -noColOrRowSpans :: Tag String -> Bool +noColOrRowSpans :: Tag Text -> Bool noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan" where isNullOrOne x = case fromAttrib x t of "" -> True "1" -> True _ -> False -pCell :: PandocMonad m => String -> TagParser m [Blocks] +pCell :: PandocMonad m => Text -> TagParser m [Blocks] pCell celltype = try $ do skipMany pBlank res <- pInTags' celltype noColOrRowSpans block @@ -532,7 +547,8 @@ pPara = do pCodeBlock :: PandocMonad m => TagParser m Blocks pCodeBlock = try $ do - TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) + TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) + let attr = toStringAttr attr' contents <- manyTill pAnyTag (pCloses "pre" <|> eof) let rawText = concatMap tagToString contents -- drop leading newline if any @@ -545,8 +561,8 @@ pCodeBlock = try $ do _ -> result' return $ B.codeBlockWith (mkAttr attr) result -tagToString :: Tag String -> String -tagToString (TagText s) = s +tagToString :: Tag Text -> String +tagToString (TagText s) = T.unpack s tagToString (TagOpen "br" _) = "\n" tagToString _ = "" @@ -575,20 +591,20 @@ pLocation = do (TagPosition r c) <- pSat isTagPosition setPosition $ newPos "input" r c -pSat :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String) +pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSat f = do pos <- getPosition token show (const pos) (\x -> if f x then Just x else Nothing) -pSatisfy :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String) +pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSatisfy f = try $ optional pLocation >> pSat f -pAnyTag :: PandocMonad m => TagParser m (Tag String) +pAnyTag :: PandocMonad m => TagParser m (Tag Text) pAnyTag = pSatisfy (const True) pSelfClosing :: PandocMonad m - => (String -> Bool) -> ([Attribute String] -> Bool) - -> TagParser m (Tag String) + => (Text -> Bool) -> ([Attribute Text] -> Bool) + -> TagParser m (Tag Text) pSelfClosing f g = do open <- pSatisfy (tagOpen f g) optional $ pSatisfy (tagClose f) @@ -626,7 +642,7 @@ pStrikeout = do pInlinesInTags "s" B.strikeout <|> pInlinesInTags "strike" B.strikeout <|> pInlinesInTags "del" B.strikeout <|> - try (do pSatisfy (~== TagOpen "span" [("class","strikeout")]) + try (do pSatisfy (matchTagOpen "span" [("class","strikeout")]) contents <- mconcat <$> manyTill inline (pCloses "span") return $ B.strikeout contents) @@ -637,17 +653,19 @@ pLineBreak = do -- Unlike fromAttrib from tagsoup, this distinguishes -- between a missing attribute and an attribute with empty content. -maybeFromAttrib :: String -> Tag String -> Maybe String -maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs +maybeFromAttrib :: String -> Tag Text -> Maybe String +maybeFromAttrib name (TagOpen _ attrs) = + T.unpack <$> lookup (T.pack name) attrs maybeFromAttrib _ _ = Nothing pLink :: PandocMonad m => TagParser m Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) - let title = fromAttrib "title" tag + let title = T.unpack $ fromAttrib "title" tag -- take id from id attribute if present, otherwise name - let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag - let cls = words $ fromAttrib "class" tag + let uid = maybe (T.unpack $ fromAttrib "name" tag) id $ + maybeFromAttrib "id" tag + let cls = words $ T.unpack $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of @@ -665,30 +683,33 @@ pImage :: PandocMonad m => TagParser m Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") mbBaseHref <- baseHref <$> getState - let url' = fromAttrib "src" tag + let url' = T.unpack $ fromAttrib "src" tag let url = case (parseURIReference url', mbBaseHref) of (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) _ -> url' - let title = fromAttrib "title" tag - let alt = fromAttrib "alt" tag - let uid = fromAttrib "id" tag - let cls = words $ fromAttrib "class" tag + let title = T.unpack $ fromAttrib "title" tag + let alt = T.unpack $ fromAttrib "alt" tag + let uid = T.unpack $ fromAttrib "id" tag + let cls = words $ T.unpack $ fromAttrib "class" tag let getAtt k = case fromAttrib k tag of "" -> [] - v -> [(k, v)] + v -> [(T.unpack k, T.unpack v)] let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) pCode :: PandocMonad m => TagParser m Inlines pCode = try $ do - (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) + (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) + let attr = toStringAttr attr' result <- manyTill pAnyTag (pCloses open) - return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result + return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ T.unpack $ + innerText result pSpan :: PandocMonad m => TagParser m Inlines pSpan = try $ do guardEnabled Ext_native_spans - TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) + TagOpen _ attr' <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) + let attr = toStringAttr attr' contents <- pInTags "span" inline let isSmallCaps = fontVariant == "small-caps" || "smallcaps" `elem` classes where styleAttr = fromMaybe "" $ lookup "style" attr @@ -706,7 +727,7 @@ pRawHtmlInline = do then pSatisfy (not . isBlockTag) else pSatisfy isInlineTag exts <- getOption readerExtensions - let raw = renderTags' [result] + let raw = T.unpack $ renderTags' [result] if extensionEnabled Ext_raw_html exts then return $ B.rawInline "html" raw else ignore raw @@ -714,32 +735,38 @@ pRawHtmlInline = do mathMLToTeXMath :: String -> Either String String mathMLToTeXMath s = writeTeX <$> readMathML s +toStringAttr :: [(Text, Text)] -> [(String, String)] +toStringAttr = map go + where go (x,y) = (T.unpack x, T.unpack y) + pMath :: PandocMonad m => Bool -> TagParser m Inlines pMath inCase = try $ do - open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True) + open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True) -- we'll assume math tags are MathML unless specially marked -- otherwise... + let attr = toStringAttr attr' unless inCase $ guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr)) - contents <- manyTill pAnyTag (pSatisfy (~== TagClose "math")) - case mathMLToTeXMath (renderTags $ [open] ++ contents ++ [TagClose "math"]) of + contents <- manyTill pAnyTag (pSatisfy (matchTagClose "math")) + case mathMLToTeXMath (T.unpack $ renderTags $ + [open] <> contents <> [TagClose "math"]) of Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $ - innerText contents + T.unpack $ innerText contents Right [] -> return mempty Right x -> return $ case lookup "display" attr of Just "block" -> B.displayMath x _ -> B.math x -pInlinesInTags :: PandocMonad m => String -> (Inlines -> Inlines) +pInlinesInTags :: PandocMonad m => Text -> (Inlines -> Inlines) -> TagParser m Inlines pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline -pInTags :: (PandocMonad m, Monoid a) => String -> TagParser m a -> TagParser m a +pInTags :: (PandocMonad m, Monoid a) => Text -> TagParser m a -> TagParser m a pInTags tagtype parser = pInTags' tagtype (const True) parser pInTags' :: (PandocMonad m, Monoid a) - => String - -> (Tag String -> Bool) + => Text + -> (Tag Text -> Bool) -> TagParser m a -> TagParser m a pInTags' tagtype tagtest parser = try $ do @@ -748,18 +775,18 @@ pInTags' tagtype tagtest parser = try $ do -- parses p, preceeded by an optional opening tag -- and followed by an optional closing tags -pOptInTag :: PandocMonad m => String -> TagParser m a -> TagParser m a +pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a pOptInTag tagtype p = try $ do skipMany pBlank - optional $ pSatisfy (~== TagOpen tagtype []) + optional $ pSatisfy (matchTagOpen tagtype []) skipMany pBlank x <- p skipMany pBlank - optional $ pSatisfy (~== TagClose tagtype) + optional $ pSatisfy (matchTagClose tagtype) skipMany pBlank return x -pCloses :: PandocMonad m => String -> TagParser m () +pCloses :: PandocMonad m => Text -> TagParser m () pCloses tagtype = try $ do t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag case t of @@ -780,15 +807,15 @@ pTagText = try $ do parsed <- lift $ lift $ flip runReaderT qu $ runParserT (many pTagContents) st "text" str case parsed of - Left _ -> throwError $ PandocParseError $ "Could not parse `" ++ str ++ "'" + Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'" Right result -> return $ mconcat result pBlank :: PandocMonad m => TagParser m () pBlank = try $ do (TagText str) <- pSatisfy isTagText - guard $ all isSpace str + guard $ T.all isSpace str -type InlinesParser m = HTMLParser m String +type InlinesParser m = HTMLParser m Text pTagContents :: PandocMonad m => InlinesParser m Inlines pTagContents = @@ -869,80 +896,89 @@ pSpace = many1 (satisfy isSpace) >>= \xs -> -- Constants -- -eitherBlockOrInline :: [String] -eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed", - "del", "ins", - "progress", "map", "area", "noscript", "script", - "object", "svg", "video", "source"] - -{- -inlineHtmlTags :: [[Char]] -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", - "br", "cite", "code", "dfn", "em", "font", "i", "img", - "input", "kbd", "label", "q", "s", "samp", "select", - "small", "span", "strike", "strong", "sub", "sup", - "textarea", "tt", "u", "var"] --} - -blockHtmlTags :: [String] -blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside", - "blockquote", "body", "button", "canvas", - "caption", "center", "col", "colgroup", "dd", "dir", "div", - "dl", "dt", "fieldset", "figcaption", "figure", - "footer", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "head", "header", "hgroup", "hr", "html", - "isindex", "menu", "noframes", "ol", "output", "p", "pre", - "section", "table", "tbody", "textarea", - "thead", "tfoot", "ul", "dd", - "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr", "script", "style"] +eitherBlockOrInline :: Set.Set Text +eitherBlockOrInline = Set.fromList + ["audio", "applet", "button", "iframe", "embed", + "del", "ins", "progress", "map", "area", "noscript", "script", + "object", "svg", "video", "source"] + +blockHtmlTags :: Set.Set Text +blockHtmlTags = Set.fromList + ["?xml", "!DOCTYPE", "address", "article", "aside", + "blockquote", "body", "canvas", + "caption", "center", "col", "colgroup", "dd", "details", + "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", + "footer", "form", "h1", "h2", "h3", "h4", + "h5", "h6", "head", "header", "hgroup", "hr", "html", + "isindex", "menu", "noframes", "ol", "output", "p", "pre", + "section", "table", "tbody", "textarea", + "thead", "tfoot", "ul", "dd", + "dt", "frameset", "li", "tbody", "td", "tfoot", + "th", "thead", "tr", "script", "style"] -- We want to allow raw docbook in markdown documents, so we -- include docbook block tags here too. -blockDocBookTags :: [String] -blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist", - "orderedlist", "segmentedlist", "simplelist", - "variablelist", "caution", "important", "note", "tip", - "warning", "address", "literallayout", "programlisting", - "programlistingco", "screen", "screenco", "screenshot", - "synopsis", "example", "informalexample", "figure", - "informalfigure", "table", "informaltable", "para", - "simpara", "formalpara", "equation", "informalequation", - "figure", "screenshot", "mediaobject", "qandaset", - "procedure", "task", "cmdsynopsis", "funcsynopsis", - "classsynopsis", "blockquote", "epigraph", "msgset", - "sidebar", "title"] - -epubTags :: [String] -epubTags = ["case", "switch", "default"] - -blockTags :: [String] -blockTags = blockHtmlTags ++ blockDocBookTags ++ epubTags - -isInlineTag :: Tag String -> Bool -isInlineTag t = tagOpen isInlineTagName (const True) t || - tagClose isInlineTagName t || - tagComment (const True) t - where isInlineTagName x = x `notElem` blockTags - -isBlockTag :: Tag String -> Bool -isBlockTag t = tagOpen isBlockTagName (const True) t || - tagClose isBlockTagName t || - tagComment (const True) t - where isBlockTagName ('?':_) = True - isBlockTagName ('!':_) = True - isBlockTagName x = x `elem` blockTags - || x `elem` eitherBlockOrInline - -isTextTag :: Tag String -> Bool +blockDocBookTags :: Set.Set Text +blockDocBookTags = Set.fromList + ["calloutlist", "bibliolist", "glosslist", "itemizedlist", + "orderedlist", "segmentedlist", "simplelist", + "variablelist", "caution", "important", "note", "tip", + "warning", "address", "literallayout", "programlisting", + "programlistingco", "screen", "screenco", "screenshot", + "synopsis", "example", "informalexample", "figure", + "informalfigure", "table", "informaltable", "para", + "simpara", "formalpara", "equation", "informalequation", + "figure", "screenshot", "mediaobject", "qandaset", + "procedure", "task", "cmdsynopsis", "funcsynopsis", + "classsynopsis", "blockquote", "epigraph", "msgset", + "sidebar", "title"] + +epubTags :: Set.Set Text +epubTags = Set.fromList ["case", "switch", "default"] + +blockTags :: Set.Set Text +blockTags = Set.unions [blockHtmlTags, blockDocBookTags, epubTags] + +class NamedTag a where + getTagName :: a -> Maybe Text + +instance NamedTag (Tag Text) where + getTagName (TagOpen t _) = Just t + getTagName (TagClose t) = Just t + getTagName _ = Nothing + +instance NamedTag (Tag String) where + getTagName (TagOpen t _) = Just (T.pack t) + getTagName (TagClose t) = Just (T.pack t) + getTagName _ = Nothing + +isInlineTag :: NamedTag (Tag a) => Tag a -> Bool +isInlineTag t = isInlineTagName || isCommentTag t + where isInlineTagName = case getTagName t of + Just x -> x + `Set.notMember` blockTags + Nothing -> False + +isBlockTag :: NamedTag (Tag a) => Tag a -> Bool +isBlockTag t = isBlockTagName || isTagComment t + where isBlockTagName = + case getTagName t of + Just x + | "?" `T.isPrefixOf` x -> True + | "!" `T.isPrefixOf` x -> True + | otherwise -> x `Set.member` blockTags + || x `Set.member` eitherBlockOrInline + Nothing -> False + +isTextTag :: Tag a -> Bool isTextTag = tagText (const True) -isCommentTag :: Tag String -> Bool +isCommentTag :: Tag a -> Bool isCommentTag = tagComment (const True) -- taken from HXT and extended -- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags -closes :: String -> String -> Bool +closes :: Text -> Text -> Bool _ `closes` "body" = False _ `closes` "html" = False "body" `closes` "head" = True @@ -975,8 +1011,9 @@ t `closes` t2 | t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] && t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" t1 `closes` t2 | - t1 `elem` blockTags && - t2 `notElem` (blockTags ++ eitherBlockOrInline) = True + t1 `Set.member` blockTags && + t2 `Set.notMember` blockTags && + t2 `Set.notMember` eitherBlockOrInline = True _ `closes` _ = False --- parsers for use in markdown, textile readers @@ -1003,8 +1040,11 @@ htmlInBalanced f = try $ do let cs = ec - sc lscontents <- unlines <$> count ls anyLine cscontents <- count cs anyChar - (_,closetag) <- htmlTag (~== TagClose tn) - return (lscontents ++ cscontents ++ closetag) + closetag <- do + x <- many (satisfy (/='>')) + char '>' + return (x <> ">") + return (lscontents <> cscontents <> closetag) _ -> mzero _ -> mzero @@ -1022,7 +1062,7 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts go n (t:ts') = (t :) <$> go n ts' go _ [] = mzero -hasTagWarning :: [Tag String] -> Bool +hasTagWarning :: [Tag a] -> Bool hasTagWarning (TagWarning _:_) = True hasTagWarning _ = False @@ -1050,47 +1090,48 @@ htmlTag f = try $ do -- basic sanity check, since the parser is very forgiving -- and finds tags in stuff like x<y) guard $ isName tagname + guard $ not $ null tagname -- <https://example.org> should NOT be a tag either. -- tagsoup will parse it as TagOpen "https:" [("example.org","")] guard $ last tagname /= ':' rendered <- manyTill anyChar (char '>') - return (next, rendered ++ ">") + return (next, rendered <> ">") case next of TagComment s | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar skipMany (satisfy (/='>')) char '>' - return (next, "<!--" ++ s ++ "-->") + return (next, "<!--" <> s <> "-->") | otherwise -> fail "bogus comment mode, HTML5 parse error" TagOpen tagname attr -> do guard $ all (isName . fst) attr handleTag tagname - TagClose tagname -> handleTag tagname + TagClose tagname -> + handleTag tagname _ -> mzero mkAttr :: [(String, String)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) where attribsId = fromMaybe "" $ lookup "id" attr - attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) ++ epubTypes + attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) <> epubTypes attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr -- Strip namespace prefixes -stripPrefixes :: [Tag String] -> [Tag String] +stripPrefixes :: [Tag Text] -> [Tag Text] stripPrefixes = map stripPrefix -stripPrefix :: Tag String -> Tag String +stripPrefix :: Tag Text -> Tag Text stripPrefix (TagOpen s as) = TagOpen (stripPrefix' s) (map (stripPrefix' *** id) as) stripPrefix (TagClose s) = TagClose (stripPrefix' s) stripPrefix x = x -stripPrefix' :: String -> String +stripPrefix' :: Text -> Text stripPrefix' s = - case span (/= ':') s of - (_, "") -> s - (_, (_:ts)) -> ts + if T.null t then s else T.drop 1 t + where (_, t) = T.span (/= ':') s isSpace :: Char -> Bool isSpace ' ' = True @@ -1133,19 +1174,32 @@ instance HasLastStrPosition HTMLState where setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)} getLastStrPos = getLastStrPos . parserState +-- For now we need a special verison here; the one in Shared has String type +renderTags' :: [Tag Text] -> Text +renderTags' = renderTagsOptions + renderOptions{ optMinimize = matchTags ["hr", "br", "img", + "meta", "link"] + , optRawTag = matchTags ["script", "style"] } + where matchTags = \tags -> flip elem tags . T.toLower + -- EPUB Specific -- -- -sectioningContent :: [String] +sectioningContent :: [Text] sectioningContent = ["article", "aside", "nav", "section"] -groupingContent :: [String] +groupingContent :: [Text] groupingContent = ["p", "hr", "pre", "blockquote", "ol" , "ul", "li", "dl", "dt", "dt", "dd" , "figure", "figcaption", "div", "main"] +matchTagClose :: Text -> (Tag Text -> Bool) +matchTagClose t = (~== TagClose t) + +matchTagOpen :: Text -> [(Text, Text)] -> (Tag Text -> Bool) +matchTagOpen t as = (~== TagOpen t as) {- @@ -1153,7 +1207,7 @@ types :: [(String, ([String], Int))] types = -- Document divisions map (\s -> (s, (["section", "body"], 0))) ["volume", "part", "chapter", "division"] - ++ -- Document section and components + <> -- Document section and components [ ("abstract", ([], 0))] -} diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 28caa528e..b22b71b96 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -16,6 +16,7 @@ module Text.Pandoc.Readers.Haddock import Control.Monad.Except (throwError) import Data.List (intersperse, stripPrefix) +import Data.Text (Text, unpack) import Data.Maybe (fromMaybe) import Data.Monoid ((<>)) import Documentation.Haddock.Parser @@ -32,9 +33,9 @@ import Text.Pandoc.Shared (splitBy, trim) -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: PandocMonad m => ReaderOptions - -> String + -> Text -> m Pandoc -readHaddock opts s = case readHaddockEither opts s of +readHaddock opts s = case readHaddockEither opts (unpack s) of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 1d13f7107..17fb48548 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -39,6 +39,7 @@ import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) import Data.Char (chr, isAlphaNum, isLetter, ord) +import Data.Text (Text, unpack) import Data.List (intercalate, isPrefixOf) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) @@ -46,7 +47,7 @@ import Safe (minimumDef) import System.FilePath (addExtension, replaceExtension, takeExtension) import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, readFileFromDirs, - report, setResourcePath) + report, setResourcePath, getResourcePath) import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging @@ -59,10 +60,10 @@ import Text.Pandoc.Walk -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readLaTeX opts ltx = do - parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx + parsed <- readWithM parseLaTeX def{ stateOptions = opts } (unpack ltx) case parsed of Right result -> return result Left e -> throwError e @@ -276,8 +277,6 @@ block = (mempty <$ comment) <|> blockCommand <|> paragraph <|> grouped block - <|> (mempty <$ char '&') -- loose & in table environment - blocks :: PandocMonad m => LP m Blocks blocks = mconcat <$> many block @@ -304,8 +303,8 @@ blockCommand = try $ do rawcommand <- getRawCommand name' transformed <- applyMacros' rawcommand guard $ transformed /= rawcommand - notFollowedBy $ parseFromString inlines transformed - parseFromString blocks transformed + notFollowedBy $ parseFromString' inlines transformed + parseFromString' blocks transformed lookupListDefault raw [name',name] blockCommands inBrackets :: Inlines -> Inlines @@ -432,7 +431,7 @@ coloredBlock stylename = do graphicsPath :: PandocMonad m => LP m Blocks graphicsPath = do ps <- bgroup *> (manyTill braced egroup) - setResourcePath (".":ps) + getResourcePath >>= setResourcePath . (++ ps) return mempty addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () @@ -490,16 +489,19 @@ inlineCommand = try $ do transformed <- applyMacros' rawcommand exts <- getOption readerExtensions if transformed /= rawcommand - then parseFromString inlines transformed + then parseFromString' inlines transformed else if extensionEnabled Ext_raw_tex exts then return $ rawInline "latex" rawcommand else ignore rawcommand (lookupListDefault raw [name',name] inlineCommands <* optional (try (string "{}"))) -unlessParseRaw :: PandocMonad m => LP m () -unlessParseRaw = getOption readerExtensions >>= - guard . not . extensionEnabled Ext_raw_tex +rawInlineOr :: PandocMonad m => String -> LP m Inlines -> LP m Inlines +rawInlineOr name' fallback = do + parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions + if parseRaw + then rawInline "latex" <$> getRawCommand name' + else fallback isBlockCommand :: String -> Bool isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks)) @@ -507,20 +509,20 @@ isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Bl inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines) inlineEnvironments = M.fromList - [ ("displaymath", mathEnv id Nothing "displaymath") - , ("math", math <$> verbEnv "math") - , ("equation", mathEnv id Nothing "equation") - , ("equation*", mathEnv id Nothing "equation*") - , ("gather", mathEnv id (Just "gathered") "gather") - , ("gather*", mathEnv id (Just "gathered") "gather*") - , ("multline", mathEnv id (Just "gathered") "multline") - , ("multline*", mathEnv id (Just "gathered") "multline*") - , ("eqnarray", mathEnv id (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*") - , ("align", mathEnv id (Just "aligned") "align") - , ("align*", mathEnv id (Just "aligned") "align*") - , ("alignat", mathEnv id (Just "aligned") "alignat") - , ("alignat*", mathEnv id (Just "aligned") "alignat*") + [ ("displaymath", mathEnvWith id Nothing "displaymath") + , ("math", math <$> mathEnv "math") + , ("equation", mathEnvWith id Nothing "equation") + , ("equation*", mathEnvWith id Nothing "equation*") + , ("gather", mathEnvWith id (Just "gathered") "gather") + , ("gather*", mathEnvWith id (Just "gathered") "gather*") + , ("multline", mathEnvWith id (Just "gathered") "multline") + , ("multline*", mathEnvWith id (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") + , ("align", mathEnvWith id (Just "aligned") "align") + , ("align*", mathEnvWith id (Just "aligned") "align*") + , ("alignat", mathEnvWith id (Just "aligned") "alignat") + , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") ] inlineCommands :: PandocMonad m => M.Map String (LP m Inlines) @@ -547,11 +549,11 @@ inlineCommands = M.fromList $ , ("dots", lit "…") , ("mdots", lit "…") , ("sim", lit "~") - , ("label", unlessParseRaw >> (inBrackets <$> tok)) - , ("ref", unlessParseRaw >> (inBrackets <$> tok)) + , ("label", rawInlineOr "label" (inBrackets <$> tok)) + , ("ref", rawInlineOr "ref" (inBrackets <$> tok)) , ("textgreek", tok) , ("sep", lit ",") - , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty + , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) , ("ensuremath", mathInline braced) @@ -605,7 +607,7 @@ inlineCommands = M.fromList $ , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) - , (",", pure mempty) + , (",", lit "\8198") , ("@", pure mempty) , (" ", lit "\160") , ("ps", pure $ str "PS." <> space) @@ -698,6 +700,9 @@ inlineCommands = M.fromList $ -- LaTeX colors , ("textcolor", coloredInline "color") , ("colorbox", coloredInline "background-color") + -- fontawesome + , ("faCheck", lit "\10003") + , ("faClose", lit "\10007") ] ++ map ignoreInlines -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks: @@ -1045,7 +1050,7 @@ rawEnv name = do (bs, raw) <- withRaw $ env name blocks raw' <- applyMacros' $ beginCommand ++ raw if raw' /= beginCommand ++ raw - then parseFromString blocks raw' + then parseFromString' blocks raw' else if parseRaw then return $ rawBlock "latex" $ beginCommand ++ raw' else do @@ -1055,6 +1060,19 @@ rawEnv name = do report $ SkippedContent ("\\end{" ++ name ++ "}") pos2 return bs +rawVerbEnv :: PandocMonad m => String -> LP m Blocks +rawVerbEnv name = do + pos <- getPosition + (_, raw) <- withRaw $ verbEnv name + let raw' = "\\begin{tikzpicture}" ++ raw + exts <- getOption readerExtensions + let parseRaw = extensionEnabled Ext_raw_tex exts + if parseRaw + then return $ rawBlock "latex" raw' + else do + report $ SkippedContent raw' pos + return mempty + ---- maybeAddExtension :: String -> FilePath -> FilePath @@ -1119,7 +1137,7 @@ parseListingsOptions options = keyval :: PandocMonad m => LP m (String, String) keyval = try $ do key <- many1 alphaNum - val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\') + val <- option "" $ char '=' >> braced <|> (many1 (alphaNum <|> oneOf ".:-|\\")) skipMany spaceChar optional (char ',') skipMany spaceChar @@ -1130,7 +1148,7 @@ keyvals :: PandocMonad m => LP m [(String, String)] keyvals = try $ char '[' *> manyTill keyval (char ']') alltt :: PandocMonad m => String -> LP m Blocks -alltt t = walk strToCode <$> parseFromString blocks +alltt t = walk strToCode <$> parseFromString' blocks (substitute " " "\\ " $ substitute "%" "\\%" $ intercalate "\\\\\n" $ lines t) where strToCode (Str s) = Code nullAttr s @@ -1176,11 +1194,12 @@ environments = M.fromList , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) , ("center", env "center" blocks) , ("longtable", env "longtable" $ - resetCaption *> simpTable False >>= addTableCaption) + resetCaption *> simpTable "longtable" False >>= addTableCaption) , ("table", env "table" $ resetCaption *> skipopts *> blocks >>= addTableCaption) - , ("tabular*", env "tabular" $ simpTable True) - , ("tabular", env "tabular" $ simpTable False) + , ("tabular*", env "tabular" $ simpTable "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable "tabularx" True) + , ("tabular", env "tabular" $ simpTable "tabular" False) , ("quote", blockQuote <$> env "quote" blocks) , ("quotation", blockQuote <$> env "quotation" blocks) , ("verse", blockQuote <$> env "verse" blocks) @@ -1210,19 +1229,20 @@ environments = M.fromList , ("obeylines", parseFromString (para . trimInlines . mconcat <$> many inline) =<< intercalate "\\\\\n" . lines <$> verbEnv "obeylines") - , ("displaymath", mathEnv para Nothing "displaymath") - , ("equation", mathEnv para Nothing "equation") - , ("equation*", mathEnv para Nothing "equation*") - , ("gather", mathEnv para (Just "gathered") "gather") - , ("gather*", mathEnv para (Just "gathered") "gather*") - , ("multline", mathEnv para (Just "gathered") "multline") - , ("multline*", mathEnv para (Just "gathered") "multline*") - , ("eqnarray", mathEnv para (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*") - , ("align", mathEnv para (Just "aligned") "align") - , ("align*", mathEnv para (Just "aligned") "align*") - , ("alignat", mathEnv para (Just "aligned") "alignat") - , ("alignat*", mathEnv para (Just "aligned") "alignat*") + , ("displaymath", mathEnvWith para Nothing "displaymath") + , ("equation", mathEnvWith para Nothing "equation") + , ("equation*", mathEnvWith para Nothing "equation*") + , ("gather", mathEnvWith para (Just "gathered") "gather") + , ("gather*", mathEnvWith para (Just "gathered") "gather*") + , ("multline", mathEnvWith para (Just "gathered") "multline") + , ("multline*", mathEnvWith para (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") + , ("align", mathEnvWith para (Just "aligned") "align") + , ("align*", mathEnvWith para (Just "aligned") "align*") + , ("alignat", mathEnvWith para (Just "aligned") "alignat") + , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") + , ("tikzpicture", rawVerbEnv "tikzpicture") ] figure :: PandocMonad m => LP m Blocks @@ -1287,19 +1307,32 @@ listenv name p = try $ do updateState $ \st -> st{ stateParserContext = oldCtx } return res -mathEnv :: PandocMonad m => (Inlines -> a) -> Maybe String -> String -> LP m a -mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name) +mathEnvWith :: PandocMonad m + => (Inlines -> a) -> Maybe String -> String -> LP m a +mathEnvWith f innerEnv name = f <$> mathDisplay (inner <$> mathEnv name) where inner x = case innerEnv of Nothing -> x Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ "\\end{" ++ y ++ "}" +mathEnv :: PandocMonad m => String -> LP m String +mathEnv name = do + skipopts + optional blankline + let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) + charMuncher = skipMany comment *> + (many1 (noneOf "\\%") <|> try (string "\\%") + <|> try (string "\\\\") <|> count 1 anyChar) + res <- concat <$> manyTill charMuncher endEnv + return $ stripTrailingNewlines res + verbEnv :: PandocMonad m => String -> LP m String verbEnv name = do skipopts optional blankline let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - res <- manyTill anyChar endEnv + charMuncher = anyChar + res <- manyTill charMuncher endEnv return $ stripTrailingNewlines res fancyverbEnv :: PandocMonad m => String -> LP m Blocks @@ -1314,7 +1347,7 @@ fancyverbEnv name = do codeBlockWith attr <$> verbEnv name orderedList' :: PandocMonad m => LP m Blocks -orderedList' = do +orderedList' = try $ do optional sp (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ try $ char '[' *> anyOrderedListMarker <* char ']' @@ -1429,7 +1462,7 @@ complexNatbibCitation mode = try $ do -- tables -parseAligns :: PandocMonad m => LP m [(String, Alignment, String)] +parseAligns :: PandocMonad m => LP m [(Alignment, Double, (String, String))] parseAligns = try $ do bgroup let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) @@ -1437,18 +1470,36 @@ parseAligns = try $ do let cAlign = AlignCenter <$ char 'c' let lAlign = AlignLeft <$ char 'l' let rAlign = AlignRight <$ char 'r' - let parAlign = AlignLeft <$ (char 'p' >> braced) + let parAlign = AlignLeft <$ char 'p' + -- algins from tabularx + let xAlign = AlignLeft <$ char 'X' + let mAlign = AlignLeft <$ char 'm' + let bAlign = AlignLeft <$ char 'b' let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign let alignPrefix = char '>' >> braced let alignSuffix = char '<' >> braced + let colWidth = try $ do + char '{' + ds <- many1 (oneOf "0123456789.") + spaces + string "\\linewidth" + char '}' + case safeRead ds of + Just w -> return w + Nothing -> return 0.0 let alignSpec = do spaces pref <- option "" alignPrefix spaces - ch <- alignChar + al <- alignChar + width <- colWidth <|> option 0.0 (do s <- braced + pos <- getPosition + report $ SkippedContent s pos + return 0.0) spaces suff <- option "" alignSuffix - return (pref, ch, suff) + return (al, width, (pref, suff)) aligns' <- sepEndBy alignSpec maybeBar spaces egroup @@ -1478,24 +1529,26 @@ amp :: PandocMonad m => LP m () amp = () <$ try (spaces' *> char '&' <* spaces') parseTableRow :: PandocMonad m - => Int -- ^ number of columns - -> [String] -- ^ prefixes - -> [String] -- ^ suffixes + => String -- ^ table environment name + -> [(String, String)] -- ^ pref/suffixes -> LP m [Blocks] -parseTableRow cols prefixes suffixes = try $ do - let tableCellRaw = many (notFollowedBy - (amp <|> lbreak <|> - (() <$ try (string "\\end"))) >> anyChar) - let minipage = try $ controlSeq "begin" *> string "{minipage}" *> - env "minipage" - (skipopts *> spaces' *> optional braced *> spaces' *> blocks) - let tableCell = minipage <|> - ((plain . trimInlines . mconcat) <$> many inline) +parseTableRow envname prefsufs = try $ do + let cols = length prefsufs + let tableCellRaw = concat <$> many + (do notFollowedBy amp + notFollowedBy lbreak + notFollowedBy $ () <$ try (string ("\\end{" ++ envname ++ "}")) + many1 (noneOf "&%\n\r\\") + <|> try (string "\\&") + <|> count 1 anyChar) + let plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs rawcells <- sepBy1 tableCellRaw amp guard $ length rawcells == cols - let rawcells' = zipWith3 (\c p s -> p ++ trim c ++ s) - rawcells prefixes suffixes - cells' <- mapM (parseFromString tableCell) rawcells' + let rawcells' = zipWith (\c (p, s) -> p ++ trim c ++ s) rawcells prefsufs + let tableCell = plainify <$> blocks + cells' <- mapM (parseFromString' tableCell) rawcells' let numcells = length cells' guard $ numcells <= cols && numcells >= 1 guard $ cells' /= [mempty] @@ -1507,21 +1560,22 @@ parseTableRow cols prefixes suffixes = try $ do spaces' :: PandocMonad m => LP m () spaces' = spaces *> skipMany (comment *> spaces) -simpTable :: PandocMonad m => Bool -> LP m Blocks -simpTable hasWidthParameter = try $ do +simpTable :: PandocMonad m => String -> Bool -> LP m Blocks +simpTable envname hasWidthParameter = try $ do when hasWidthParameter $ () <$ (spaces' >> tok) skipopts - (prefixes, aligns, suffixes) <- unzip3 <$> parseAligns - let cols = length aligns + colspecs <- parseAligns + let (aligns, widths, prefsufs) = unzip3 colspecs + let cols = length colspecs optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak spaces' skipMany hline spaces' - header' <- option [] $ try (parseTableRow cols prefixes suffixes <* + header' <- option [] $ try (parseTableRow envname prefsufs <* lbreak <* many1 hline) spaces' - rows <- sepEndBy (parseTableRow cols prefixes suffixes) + rows <- sepEndBy (parseTableRow envname prefsufs) (lbreak <* optional (skipMany hline)) spaces' optional $ controlSeq "caption" *> skipopts *> setCaption @@ -1531,7 +1585,7 @@ simpTable hasWidthParameter = try $ do then replicate cols mempty else header' lookAhead $ controlSeq "end" -- make sure we're at end - return $ table mempty (zip aligns (repeat 0)) header'' rows + return $ table mempty (zip aligns widths) header'' rows removeDoubleQuotes :: String -> String removeDoubleQuotes ('"':xs) = diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5515c735b..e1c481311 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -55,11 +55,9 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) -import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (tableWith) -import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) @@ -72,10 +70,11 @@ type MarkdownParser m = ParserT [Char] ParserState m -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readMarkdown opts s = do - parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } + (T.unpack s ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -155,9 +154,11 @@ litChar = escapedChar' inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) inlinesInBalancedBrackets = do char '[' + pos <- getPosition (_, raw) <- withRaw $ charsInBalancedBrackets 1 guard $ not $ null raw - parseFromString (trimInlinesF . mconcat <$> many inline) (init raw) + parseFromString' (setPosition pos >> + trimInlinesF . mconcat <$> many inline) (init raw) charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m () charsInBalancedBrackets 0 = return () @@ -189,7 +190,7 @@ rawTitleBlockLine = do titleLine :: PandocMonad m => MarkdownParser m (F Inlines) titleLine = try $ do raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw + res <- parseFromString' (many inline) raw return $ trimInlinesF $ mconcat res authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines]) @@ -200,12 +201,12 @@ authorsLine = try $ do (trimInlinesF . mconcat <$> many (try $ notFollowedBy sep >> inline)) sep - sequence <$> parseFromString pAuthors raw + sequence <$> parseFromString' pAuthors raw dateLine :: PandocMonad m => MarkdownParser m (F Inlines) dateLine = try $ do raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw + res <- parseFromString' (many inline) raw return $ trimInlinesF $ mconcat res titleBlock :: PandocMonad m => MarkdownParser m () @@ -290,7 +291,7 @@ ignorable t = (T.pack "_") `T.isSuffixOf` t toMetaValue :: PandocMonad m => Text -> MarkdownParser m (F MetaValue) -toMetaValue x = toMeta <$> parseFromString parseBlocks (T.unpack x) +toMetaValue x = toMeta <$> parseFromString' parseBlocks (T.unpack x) where toMeta p = do p' <- p @@ -360,20 +361,20 @@ parseMarkdown = do optional titleBlock blocks <- parseBlocks st <- getState + -- check for notes with no corresponding note references + let notesUsed = stateNoteRefs st + let notesDefined = M.keys (stateNotes' st) + mapM_ (\n -> unless (n `Set.member` notesUsed) $ do + -- lookup to get sourcepos + case M.lookup n (stateNotes' st) of + Just (pos, _) -> report (NoteDefinedButNotUsed n pos) + Nothing -> error "The impossible happened.") notesDefined let doc = runF (do Pandoc _ bs <- B.doc <$> blocks meta <- stateMeta' st return $ Pandoc meta bs) st reportLogMessages (do guardEnabled Ext_east_asian_line_breaks - return $ bottomUp softBreakFilter doc) <|> return doc - -softBreakFilter :: [Inline] -> [Inline] -softBreakFilter (x:SoftBreak:y:zs) = - case (stringify x, stringify y) of - (xs@(_:_), (c:_)) - | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs - _ -> x:SoftBreak:y:zs -softBreakFilter xs = xs + return $ eastAsianLineBreakFilter doc) <|> return doc referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do @@ -392,7 +393,9 @@ referenceKey = try $ do src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle attr <- option nullAttr $ try $ - guardEnabled Ext_link_attributes >> skipSpaces >> attributes + do guardEnabled Ext_link_attributes + skipSpaces >> optional newline >> skipSpaces + attributes addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes >> many (try $ spnl >> keyValAttr) blanklines @@ -402,8 +405,12 @@ referenceKey = try $ do let oldkeys = stateKeys st let key = toKey raw case M.lookup key oldkeys of - Just _ -> logMessage $ DuplicateLinkReference raw pos - Nothing -> return () + Just (t,a) | not (t == target && a == attr') -> + -- We don't warn on two duplicate keys if the targets are also + -- the same. This can happen naturally with --reference-location=block + -- or section. See #3701. + logMessage $ DuplicateLinkReference raw pos + _ -> return () updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty @@ -464,13 +471,12 @@ noteBlock = try $ do rest <- many $ try $ blanklines >> indentSpaces >> rawLines let raw = unlines (first:rest) ++ "\n" optional blanklines - parsed <- parseFromString parseBlocks raw - let newnote = (ref, parsed) + parsed <- parseFromString' parseBlocks raw oldnotes <- stateNotes' <$> getState - case lookup ref oldnotes of + case M.lookup ref oldnotes of Just _ -> logMessage $ DuplicateNoteReference ref pos Nothing -> return () - updateState $ \s -> s { stateNotes' = newnote : oldnotes } + updateState $ \s -> s { stateNotes' = M.insert ref (pos, parsed) oldnotes } return mempty -- @@ -614,7 +620,7 @@ hrule = try $ do -- indentedLine :: PandocMonad m => MarkdownParser m String -indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") +indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) @@ -772,7 +778,7 @@ blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" + contents <- parseFromString' parseBlocks $ (intercalate "\n" raw) ++ "\n\n" return $ B.blockQuote <$> contents -- @@ -868,8 +874,7 @@ listContinuationLine = try $ do notFollowedBy' listStart notFollowedByHtmlCloser optional indentSpaces - result <- anyLine - return $ result ++ "\n" + anyLineNewline listItem :: PandocMonad m => MarkdownParser m a @@ -885,7 +890,7 @@ listItem start = try $ do setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may contain various block elements: let raw = concat (first:continuations) - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw updateState (\st -> st {stateParserContext = oldContext}) return contents @@ -932,8 +937,8 @@ definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Bl definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact - term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' - contents <- mapM (parseFromString parseBlocks . (++"\n")) raw + term <- parseFromString' (trimInlinesF . mconcat <$> many inline) rawLine' + contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw optional blanklines return $ liftM2 (,) term (sequence contents) @@ -941,7 +946,7 @@ defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String defRawBlock compact = try $ do hasBlank <- option False $ blankline >> return True defListMarker - firstline <- anyLine + firstline <- anyLineNewline let dline = try ( do notFollowedBy blankline notFollowedByHtmlCloser @@ -956,7 +961,7 @@ defRawBlock compact = try $ do ln <- indentSpaces >> notFollowedBy blankline >> anyLine lns <- many dline return $ trailing ++ unlines (ln:lns) - return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ + return $ trimr (firstline ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" definitionList :: PandocMonad m => MarkdownParser m (F Blocks) @@ -1088,13 +1093,19 @@ rawTeXBlock = do rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag + -- we don't want '<td> text' to be a code block: + skipMany spaceChar + indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0 -- try to find closing tag -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just tagtype } let closer = htmlTag (\x -> x ~== TagClose tagtype) - contents <- mconcat <$> many (notFollowedBy' closer >> block) + let block' = do notFollowedBy' closer + atMostSpaces indentlevel + block + contents <- mconcat <$> many block' result <- (closer >>= \(_, rawcloser) -> return ( return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> @@ -1119,7 +1130,7 @@ lineBlock :: PandocMonad m => MarkdownParser m (F Blocks) lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= - mapM (parseFromString (trimInlinesF . mconcat <$> many inline)) + mapM (parseFromString' (trimInlinesF . mconcat <$> many inline)) return $ B.lineBlock <$> sequence lines' -- @@ -1162,7 +1173,7 @@ simpleTableHeader headless = try $ do then replicate (length dashes) "" else rawHeads heads <- fmap sequence - $ mapM (parseFromString (mconcat <$> many plain)) + $ mapM (parseFromString' (mconcat <$> many plain)) $ map trim rawHeads' return (heads, aligns, indices) @@ -1208,7 +1219,7 @@ tableLine :: PandocMonad m => [Int] -> MarkdownParser m (F [Blocks]) tableLine indices = rawTableLine indices >>= - fmap sequence . mapM (parseFromString (mconcat <$> many plain)) + fmap sequence . mapM (parseFromString' (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: PandocMonad m @@ -1217,7 +1228,7 @@ multilineRow :: PandocMonad m multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols + fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. @@ -1275,7 +1286,7 @@ multilineTableHeader headless = try $ do then replicate (length dashes) "" else map (unlines . map trim) rawHeadsList heads <- fmap sequence $ - mapM (parseFromString (mconcat <$> many plain)) $ + mapM (parseFromString' (mconcat <$> many plain)) $ map trim rawHeads return (heads, aligns, indices) @@ -1285,89 +1296,7 @@ multilineTableHeader headless = try $ do -- ending with a footer (dashed line followed by blank line). gridTable :: PandocMonad m => Bool -- ^ Headerless table -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow - (gridTableSep '-') gridTableFooter - -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitStringByIndices (init indices) $ trimr line - -gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment) -gridPart ch = do - leftColon <- option False (True <$ char ':') - dashes <- many1 (char ch) - rightColon <- option False (True <$ char ':') - char '+' - let lengthDashes = length dashes + (if leftColon then 1 else 0) + - (if rightColon then 1 else 0) - let alignment = case (leftColon, rightColon) of - (True, True) -> AlignCenter - (True, False) -> AlignLeft - (False, True) -> AlignRight - (False, False) -> AlignDefault - return ((lengthDashes, lengthDashes + 1), alignment) - -gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline - -removeFinalBar :: String -> String -removeFinalBar = - reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse - --- | Separator between rows of grid table. -gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - --- | Parse header for a grid table. -gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table - -> MarkdownParser m (F [Blocks], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return [] - else many1 (try (char '|' >> anyLine)) - underDashes <- if headless - then return dashes - else gridDashedLines '=' - guard $ length dashes == length underDashes - let lines' = map (snd . fst) underDashes - let indices = scanl (+) 0 lines' - let aligns = map snd underDashes - let rawHeads = if headless - then replicate (length underDashes) "" - else map (unlines . map trim) $ transpose - $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads - return (heads, aligns, indices) - -gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String] -gridTableRawLine indices = do - char '|' - line <- anyLine - return (gridTableSplitLine indices line) - --- | Parse row of grid table. -gridTableRow :: PandocMonad m => [Int] - -> MarkdownParser m (F [Blocks]) -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols) - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- | Parse footer for a grid table. -gridTableFooter :: PandocMonad m => MarkdownParser m [Char] -gridTableFooter = blanklines +gridTable headless = gridTableWith' parseBlocks headless pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak = try $ do @@ -1414,7 +1343,7 @@ pipeTableRow = try $ do let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') <|> void (noneOf "|\n\r") let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>= - parseFromString pipeTableCell + parseFromString' pipeTableCell cells <- cellContents `sepEndBy1` (char '|') -- surrounding pipes needed for a one-column table: guard $ not (length cells == 1 && not openPipe) @@ -1522,6 +1451,7 @@ inline = choice [ whitespace , autoLink , spanHtml , rawHtmlInline + , escapedNewline , escapedChar , rawLaTeXInline' , exampleRef @@ -1538,16 +1468,20 @@ escapedChar' = try $ do (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) <|> (guardEnabled Ext_angle_brackets_escapable >> oneOf "\\`*_{}[]()>#+-.!~\"<>") - <|> (guardEnabled Ext_escaped_line_breaks >> char '\n') <|> oneOf "\\`*_{}[]()>#+-.!~\"" +escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines) +escapedNewline = try $ do + guardEnabled Ext_escaped_line_breaks + char '\\' + lookAhead (char '\n') -- don't consume the newline (see #3730) + return $ return B.linebreak + escapedChar :: PandocMonad m => MarkdownParser m (F Inlines) escapedChar = do result <- escapedChar' case result of ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space - '\n' -> guardEnabled Ext_escaped_line_breaks >> - return (return B.linebreak) -- "\[newline]" is a linebreak _ -> return $ return $ B.str [result] ltSign :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1629,9 +1563,9 @@ ender c n = try $ do three :: PandocMonad m => Char -> MarkdownParser m (F Inlines) three c = do contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) - (ender c 3 >> return ((B.strong . B.emph) <$> contents)) - <|> (ender c 2 >> one c (B.strong <$> contents)) - <|> (ender c 1 >> two c (B.emph <$> contents)) + (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents)) + <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents)) + <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents)) <|> return (return (B.str [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. @@ -1639,7 +1573,8 @@ three c = do two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) - (ender c 2 >> return (B.strong <$> (prefix' <> contents))) + (ender c 2 >> updateLastStrPos >> + return (B.strong <$> (prefix' <> contents))) <|> return (return (B.str [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. @@ -1650,7 +1585,7 @@ one c prefix' = do <|> try (string [c,c] >> notFollowedBy (ender c 1) >> two c mempty) ) - (ender c 1 >> return (B.emph <$> (prefix' <> contents))) + (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents))) <|> return (return (B.str [c]) <> (prefix' <> contents)) strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1814,15 +1749,17 @@ referenceLink :: PandocMonad m referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False (_,raw') <- option (mempty, "") $ - lookAhead (try (guardEnabled Ext_citations >> - spnl >> normalCite >> return (mempty, ""))) + lookAhead (try (do guardEnabled Ext_citations + guardDisabled Ext_spaced_reference_links <|> spnl + normalCite + return (mempty, ""))) <|> - try (spnl >> reference) + try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference) when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' - parsedRaw <- parseFromString (mconcat <$> many inline) raw' - fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + parsedRaw <- parseFromString' (mconcat <$> many inline) raw' + fallback <- parseFromString' (mconcat <$> many inline) $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references let makeFallback = do @@ -1887,16 +1824,17 @@ note :: PandocMonad m => MarkdownParser m (F Inlines) note = try $ do guardEnabled Ext_footnotes ref <- noteMarker + updateState $ \st -> st{ stateNoteRefs = Set.insert ref (stateNoteRefs st) } return $ do notes <- asksF stateNotes' - case lookup ref notes of + case M.lookup ref notes of Nothing -> return $ B.str $ "[^" ++ ref ++ "]" - Just contents -> do + Just (_pos, contents) -> do st <- askF -- process the note in a context that doesn't resolve -- notes, to avoid infinite looping with notes inside -- notes: - let contents' = runF contents st{ stateNotes' = [] } + let contents' = runF contents st{ stateNotes' = M.empty } return $ B.note contents' inlineNote :: PandocMonad m => MarkdownParser m (F Inlines) @@ -2028,7 +1966,7 @@ textualCite = try $ do let (spaces',raw') = span isSpace raw spc | null spaces' = mempty | otherwise = B.space - lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw' + lab <- parseFromString' (mconcat <$> many inline) $ dropBrackets raw' fallback <- referenceLink B.linkWith (lab,raw') return $ do fallback' <- fallback diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index b35f39aad..a3ff60c14 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -3,7 +3,7 @@ {-# LANGUAGE TypeSynonymInstances #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- - Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> + Copyright (C) 2012-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.MediaWiki - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -41,6 +41,7 @@ module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isDigit, isSpace) +import Data.Text (Text, unpack) import qualified Data.Foldable as F import Data.List (intercalate, intersperse, isPrefixOf) import qualified Data.Map as M @@ -64,7 +65,7 @@ import Text.Pandoc.XML (fromEntities) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readMediaWiki opts s = do parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts @@ -74,8 +75,9 @@ readMediaWiki opts s = do , mwHeaderMap = M.empty , mwIdentifierList = Set.empty , mwLogMessages = [] + , mwInTT = False } - (s ++ "\n") + (unpack s ++ "\n") case parsed of Right result -> return result Left e -> throwError e @@ -87,6 +89,7 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwHeaderMap :: M.Map Inlines String , mwIdentifierList :: Set.Set String , mwLogMessages :: [LogMessage] + , mwInTT :: Bool } type MWParser m = ParserT [Char] MWState m @@ -569,7 +572,12 @@ inlineTag = do TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" TagOpen "code" _ -> encode <$> inlinesInTags "code" - TagOpen "tt" _ -> encode <$> inlinesInTags "tt" + TagOpen "tt" _ -> do + inTT <- mwInTT <$> getState + updateState $ \st -> st{ mwInTT = True } + result <- encode <$> inlinesInTags "tt" + updateState $ \st -> st{ mwInTT = inTT } + return result TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) @@ -688,6 +696,10 @@ strong = B.strong <$> nested (inlinesBetween start end) end = try $ sym "'''" doubleQuotes :: PandocMonad m => MWParser m Inlines -doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) +doubleQuotes = do + guardEnabled Ext_smart + inTT <- mwInTT <$> getState + guard (not inTT) + B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar closeDoubleQuote = try $ sym "\"" diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 2e307fa4f..abc2ed38a 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Native - Copyright : Copyright (C) 2011-2015 John MacFarlane + Copyright : Copyright (C) 2011-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -37,6 +37,7 @@ import Text.Pandoc.Shared (safeRead) import Control.Monad.Except (throwError) import Text.Pandoc.Class import Text.Pandoc.Error +import Data.Text (Text, unpack) -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, @@ -50,22 +51,22 @@ import Text.Pandoc.Error -- readNative :: PandocMonad m => ReaderOptions - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of + case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of Right doc -> return doc Left _ -> throwError $ PandocParseError "couldn't read native" -readBlocks :: String -> Either PandocError [Block] -readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) +readBlocks :: Text -> Either PandocError [Block] +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s)) -readBlock :: String -> Either PandocError Block -readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) +readBlock :: Text -> Either PandocError Block +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s)) -readInlines :: String -> Either PandocError [Inline] -readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) +readInlines :: Text -> Either PandocError [Inline] +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s)) -readInline :: String -> Either PandocError Inline -readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ s) Right (safeRead s) +readInline :: Text -> Either PandocError Inline +readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s)) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index cf1c8f479..591d7590e 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -2,6 +2,7 @@ module Text.Pandoc.Readers.OPML ( readOPML ) where import Control.Monad.State import Data.Char (toUpper) +import Data.Text (Text, unpack, pack) import Data.Default import Data.Generics import Text.HTML.TagSoup.Entity (lookupEntity) @@ -28,9 +29,10 @@ instance Default OPMLState where , opmlDocDate = mempty } -readOPML :: PandocMonad m => ReaderOptions -> String -> m Pandoc +readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML _ inp = do - (bs, st') <- flip runStateT def (mapM parseBlock $ normalizeTree $ parseXML inp) + (bs, st') <- flip runStateT def + (mapM parseBlock $ normalizeTree $ parseXML (unpack inp)) return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ @@ -69,10 +71,10 @@ asHtml :: PandocMonad m => String -> OPML m Inlines asHtml s = (\(Pandoc _ bs) -> case bs of [Plain ils] -> fromList ils - _ -> mempty) <$> (lift $ readHtml def s) + _ -> mempty) <$> (lift $ readHtml def (pack s)) asMarkdown :: PandocMonad m => String -> OPML m Blocks -asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def s) +asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def (pack s)) getBlocks :: PandocMonad m => Element -> OPML m Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs index b056f1ecc..3d716ba19 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -59,10 +59,6 @@ withState :: (state -> a -> (state, b)) -> ArrowState state a b withState = ArrowState . uncurry -- | Constructor -withState' :: ((state, a) -> (state, b)) -> ArrowState state a b -withState' = ArrowState - --- | Constructor modifyState :: (state -> state ) -> ArrowState state a a modifyState = ArrowState . first @@ -79,10 +75,6 @@ extractFromState :: (state -> b ) -> ArrowState state x b extractFromState f = ArrowState $ \(state,_) -> (state, f state) -- | Constructor -withUnchangedState :: (state -> a -> b ) -> ArrowState state a b -withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a) - --- | Constructor tryModifyState :: (state -> Either f state) -> ArrowState state a (Either f a) tryModifyState f = ArrowState $ \(state,a) @@ -107,43 +99,9 @@ instance ArrowChoice (ArrowState state) where Left l -> (s, Left l) Right r -> second Right $ runArrowState a (s,r) -instance ArrowLoop (ArrowState state) where - loop a = ArrowState $ \(s, x) - -> let (s', (x', _d)) = runArrowState a (s, (x, _d)) - in (s', x') - instance ArrowApply (ArrowState state) where app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b) - --- | Embedding of a state arrow in a state arrow with a different state type. -switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y -switchState there back a = ArrowState $ first there - >>> runArrowState a - >>> first back - --- | Lift a state arrow to modify the state of an arrow --- with a different state type. -liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x -liftToState unlift a = modifyState $ unlift &&& id - >>> runArrowState a - >>> snd - --- | Switches the type of the state temporarily. --- Drops the intermediate result state, behaving like the identity arrow, --- save for side effects in the state. -withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x -withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst - --- | Switches the type of the state temporarily. --- Returns the resulting sub-state. -withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s' -withSubState' unlift a = ArrowState $ runArrowState unlift - >>> switch - >>> runArrowState a - >>> switch - where switch (x,y) = (y,x) - -- | Switches the type of the state temporarily. -- Drops the intermediate result state, behaving like a fallible -- identity arrow, save for side effects in the state. @@ -175,42 +133,6 @@ foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f where a' x (s',m) = second (m <>) $ runArrowState a (s',x) --- | Fold a state arrow through something 'Foldable'. Collect the results --- in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. -foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m -foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f - where a' (s',m) x = second (m <>) $ runArrowState a (s',x) - --- | Fold a fallible state arrow through something 'Foldable'. Collect the --- results in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. --- If the iteration fails, the state will be reset to the initial one. -foldS' :: (Foldable f, Monoid m) - => ArrowState s x (Either e m) - -> ArrowState s (f x) (Either e m) -foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f - where a' s x (s',Right m) = case runArrowState a (s',x) of - (s'',Right m') -> (s'', Right (m <> m')) - (_ ,Left e ) -> (s , Left e) - a' _ _ e = e - --- | Fold a fallible state arrow through something 'Foldable'. Collect the --- results in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. --- If the iteration fails, the state will be reset to the initial one. -foldSL' :: (Foldable f, Monoid m) - => ArrowState s x (Either e m) - -> ArrowState s (f x) (Either e m) -foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f - where a' s (s',Right m) x = case runArrowState a (s',x) of - (s'',Right m') -> (s'', Right (m <> m')) - (_ ,Left e ) -> (s , Left e) - a' _ e _ = e - -- | Fold a state arrow through something 'Foldable'. Collect the results in a -- 'MonadPlus'. iterateS :: (Foldable f, MonadPlus m) @@ -239,15 +161,3 @@ iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f (s'',Right m') -> (s'',Right $ mplus m $ return m') (_ ,Left e ) -> (s ,Left e ) a' _ _ e = e - --- | Fold a fallible state arrow through something 'Foldable'. --- Collect the results in a 'MonadPlus'. --- If the iteration fails, the state will be reset to the initial one. -iterateSL' :: (Foldable f, MonadPlus m) - => ArrowState s x (Either e y ) - -> ArrowState s (f x) (Either e (m y)) -iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f - where a' s (s',Right m) x = case runArrowState a (s',x) of - (s'',Right m') -> (s'',Right $ mplus m $ return m') - (_ ,Left e ) -> (s ,Left e ) - a' _ e _ = e diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs index 218a85661..ecef8b6e3 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -40,10 +40,7 @@ with an equivalent return value. module Text.Pandoc.Readers.Odt.Arrows.Utils where import Control.Arrow -import Control.Monad ( join, MonadPlus(..) ) - -import qualified Data.Foldable as F -import Data.Monoid +import Control.Monad ( join ) import Text.Pandoc.Readers.Odt.Generic.Fallible import Text.Pandoc.Readers.Odt.Generic.Utils @@ -63,12 +60,6 @@ and5 :: (Arrow a) and6 :: (Arrow a) => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5 -> a b (c0,c1,c2,c3,c4,c5 ) -and7 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6 - -> a b (c0,c1,c2,c3,c4,c5,c6 ) -and8 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7 - -> a b (c0,c1,c2,c3,c4,c5,c6,c7) and3 a b c = (and2 a b ) &&& c >>^ \((z,y ) , x) -> (z,y,x ) @@ -78,10 +69,6 @@ and5 a b c d e = (and4 a b c d ) &&& e >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v ) and6 a b c d e f = (and5 a b c d e ) &&& f >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u ) -and7 a b c d e f g = (and6 a b c d e f ) &&& g - >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t ) -and8 a b c d e f g h = (and7 a b c d e f g) &&& h - >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s) liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z liftA2 f a b = a &&& b >>^ uncurry f @@ -98,19 +85,11 @@ liftA5 :: (Arrow a) => (z->y->x->w->v -> r) liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r) -> a b z->a b y->a b x->a b w->a b v->a b u -> a b r -liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u->a b t - -> a b r -liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s - -> a b r liftA3 fun a b c = and3 a b c >>^ uncurry3 fun liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun -liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun -liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun liftA :: (Arrow a) => (y -> z) -> a b y -> a b z liftA fun a = a >>^ fun @@ -124,28 +103,12 @@ liftA fun a = a >>^ fun duplicate :: (Arrow a) => a b (b,b) duplicate = arr $ join (,) --- | Lifts the combination of two values into an arrow. -joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z -joinOn = arr.uncurry - -- | Applies a function to the uncurried result-pair of an arrow-application. -- (The %-symbol was chosen to evoke an association with pairs.) (>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d a >>% f = a >>^ uncurry f --- | '(>>%)' with its arguments flipped -(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d -(%<<) = flip (>>%) - --- | Precomposition with an uncurried function -(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r -f %>> a = uncurry f ^>> a - --- | Precomposition with an uncurried function (right to left variant) -(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r -(<<%) = flip (%>>) - -infixr 2 >>%, %<<, %>>, <<% +infixr 2 >>% -- | Duplicate a value and apply an arrow to the second instance. @@ -156,56 +119,6 @@ infixr 2 >>%, %<<, %>>, <<% keepingTheValue :: (Arrow a) => a b c -> a b (b,c) keepingTheValue a = returnA &&& a --- | Duplicate a value and apply an arrow to the first instance. --- Aequivalent to --- > \a -> duplicate >>> first a --- or --- > \a -> a &&& returnA -keepingTheValue' :: (Arrow a) => a b c -> a b (c,b) -keepingTheValue' a = a &&& returnA - --- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'. --- Actually, it's the more complex '(>=>)', because 'bind' alone does not --- combine as nicely in arrow form. --- The current implementation is not the most efficient one, because it can --- not return directly if a 'Nothing' is encountered. That in turn follows --- from the type system, as 'Nothing' has an "invisible" type parameter that --- can not be dropped early. --- --- Also, there probably is a way to generalize this to other monads --- or applicatives, but I'm leaving that as an exercise to the reader. --- I have a feeling there is a new Arrow-typeclass to be found that is less --- restrictive than 'ArrowApply'. If it is already out there, --- I have not seen it yet. ('ArrowPlus' for example is not general enough.) -(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c) -a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join - -infixr 2 >>>= - --- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required. --- (But still different from a true bind) -(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b) -(>++<) = liftA2 mplus - --- | Left-compose with a pure function -leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r) -leftLift = left.arr - --- | Right-compose with a pure function -rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r') -rightLift = right.arr - - -( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c') -( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c') -( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c') - -l ^+++ r = leftLift l >>> right r -l +++^ r = left l >>> rightLift r -l ^+++^ r = leftLift l >>> rightLift r - -infixr 2 ^+++, +++^, ^+++^ - ( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d ( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d ( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d @@ -218,33 +131,12 @@ infixr 2 ^||| , |||^, ^|||^ ( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c') ( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c') -( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c') l ^&&& r = arr l &&& r l &&&^ r = l &&& arr r -l ^&&&^ r = arr l &&& arr r - -infixr 3 ^&&&, &&&^, ^&&&^ -( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c') -( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c') -( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c') +infixr 3 ^&&&, &&&^ -l ^*** r = arr l *** r -l ***^ r = l *** arr r -l ^***^ r = arr l *** arr r - -infixr 3 ^***, ***^, ^***^ - --- | A version of --- --- >>> \p -> arr (\x -> if p x the Right x else Left x) --- --- but with p being an arrow -choose :: (ArrowChoice a) => a b Bool -> a b (Either b b) -choose checkValue = keepingTheValue checkValue >>^ select - where select (x,True ) = Right x - select (x,False ) = Left x -- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@. choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r) @@ -258,130 +150,15 @@ maybeToChoice = arr maybeToEither returnV :: (Arrow a) => c -> a x c returnV = arr.const --- | 'returnA' dropping everything -returnA_ :: (Arrow a) => a _b () -returnA_ = returnV () - --- | Wrapper for an arrow that can be evaluated im parallel. All --- Arrows can be evaluated in parallel, as long as they return a --- monoid. -newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } - deriving (Eq, Ord, Show) - -instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where - mempty = CoEval $ returnV mempty - (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend - --- | Evaluates a collection of arrows in a parallel fashion. --- --- This is in essence a fold of '(&&&)' over the collection, --- so the actual execution order and parallelity depends on the --- implementation of '(&&&)' in the arrow in question. --- The default implementation of '(&&&)' for example keeps the --- order as given in the collection. --- --- This function can be seen as a generalization of --- 'Control.Applicative.sequenceA' to arrows or as an alternative to --- a fold with 'Control.Applicative.WrappedArrow', which --- substitutes the monoid with function application. --- -coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m -coEval = evalParallelArrow . (F.foldMap CoEval) - -- | Defines Left as failure, Right as success type FallibleArrow a input failure success = a input (Either failure success) -type ReFallibleArrow a failure success success' - = FallibleArrow a (Either failure success) failure success' - --- | Wrapper for fallible arrows. Fallible arrows are all arrows that return --- an Either value where left is a faliure and right is a success value. -newtype AlternativeArrow a input failure success - = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success } - - -instance (ArrowChoice a, Monoid failure) - => Monoid (AlternativeArrow a input failure success) where - mempty = TryArrow $ returnV $ Left mempty - (TryArrow a) `mappend` (TryArrow b) - = TryArrow $ a &&& b - >>^ \(a',~b') - -> ( (\a'' -> left (mappend a'') b') ||| Right ) - a' - --- | Evaluates a collection of fallible arrows, trying each one in succession. --- Left values are interpreted as failures, right values as successes. --- --- The evaluation is stopped once an arrow succeeds. --- Up to that point, all failures are collected in the failure-monoid. --- Note that '()' is a monoid, and thus can serve as a failure-collector if --- you are uninterested in the exact failures. --- --- This is in essence a fold of '(&&&)' over the collection, enhanced with a --- little bit of repackaging, so the actual execution order depends on the --- implementation of '(&&&)' in the arrow in question. --- The default implementation of '(&&&)' for example keeps the --- order as given in the collection. --- -tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure) - => f (FallibleArrow a b failure success) - -> FallibleArrow a b failure success -tryArrows = evalAlternativeArrow . (F.foldMap TryArrow) - --- -liftSuccess :: (ArrowChoice a) - => (success -> success') - -> ReFallibleArrow a failure success success' -liftSuccess = rightLift - -- liftAsSuccess :: (ArrowChoice a) => a x success -> FallibleArrow a x failure success liftAsSuccess a = a >>^ Right --- -asFallibleArrow :: (ArrowChoice a) - => a x success - -> FallibleArrow a x failure success -asFallibleArrow a = a >>^ Right - --- | Raises an error into a 'ReFallibleArrow' if the arrow is already in --- "error mode" -liftError :: (ArrowChoice a, Monoid failure) - => failure - -> ReFallibleArrow a failure success success -liftError e = leftLift (e <>) - --- | Raises an error into a 'FallibleArrow', droping both the arrow input --- and any previously stored error value. -_raiseA :: (ArrowChoice a) - => failure - -> FallibleArrow a x failure success -_raiseA e = returnV (Left e) - --- | Raises an empty error into a 'FallibleArrow', droping both the arrow input --- and any previously stored error value. -_raiseAEmpty :: (ArrowChoice a, Monoid failure) - => FallibleArrow a x failure success -_raiseAEmpty = _raiseA mempty - --- | Raises an error into a 'ReFallibleArrow', possibly appending the new error --- to an existing one -raiseA :: (ArrowChoice a, Monoid failure) - => failure - -> ReFallibleArrow a failure success success -raiseA e = arr $ Left.(either (<> e) (const e)) - --- | Raises an empty error into a 'ReFallibleArrow'. If there already is an --- error, nothing changes. --- (Note that this function is only aequivalent to @raiseA mempty@ iff the --- failure monoid follows the monoid laws.) -raiseAEmpty :: (ArrowChoice a, Monoid failure) - => ReFallibleArrow a failure success success -raiseAEmpty = arr (fromRight (const mempty) >>> Left) - - -- | Execute the second arrow if the first succeeds (>>?) :: (ArrowChoice a) => FallibleArrow a x failure success @@ -410,20 +187,6 @@ a >>?^? b = a >>> Left ^|||^ b -> FallibleArrow a x failure success' a ^>>? b = a ^>> Left ^||| b --- | Execute the lifted second arrow if the lifted first arrow succeeds -(^>>?^) :: (ArrowChoice a) - => (x -> Either failure success) - -> (success -> success') - -> FallibleArrow a x failure success' -a ^>>?^ f = arr $ a >>> right f - --- | Execute the lifted second arrow if the lifted first arrow succeeds -(^>>?^?) :: (ArrowChoice a) - => (x -> Either failure success) - -> (success -> Either failure success') - -> FallibleArrow a x failure success' -a ^>>?^? f = a ^>> Left ^|||^ f - -- | Execute the second, non-fallible arrow if the first arrow succeeds (>>?!) :: (ArrowChoice a) => FallibleArrow a x failure success @@ -453,33 +216,9 @@ a ^>>?% f = arr a >>?^ (uncurry f) a >>?%? f = a >>?^? (uncurry f) infixr 1 >>?, >>?^, >>?^? -infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! +infixr 1 ^>>?, >>?! infixr 1 >>?%, ^>>?%, >>?%? --- | Keep values that are Right, replace Left values by a constant. -ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v -ifFailedUse v = arr $ either (const v) id - --- | '(&&)' lifted into an arrow -(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool -(<&&>) = liftA2 (&&) - --- | '(||)' lifted into an arrow -(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool -(<||>) = liftA2 (||) - --- | An equivalent of '(&&)' in a fallible arrow -(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s - -> FallibleArrow a x f s' - -> FallibleArrow a x f (s,s') -(>&&<) = liftA2 chooseMin - --- | An equivalent of '(||)' in some forms of fallible arrows -(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s - -> FallibleArrow a x f s - -> FallibleArrow a x f s -(>||<) = liftA2 chooseMax - -- | An arrow version of a short-circuit (<|>) ifFailedDo :: (ArrowChoice a) => FallibleArrow a x f y @@ -489,7 +228,4 @@ ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right) where repackage (x , Left _) = Left x repackage (_ , Right y) = Right y -infixr 4 <&&>, <||>, >&&<, >||< infixr 1 `ifFailedDo` - - diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index a1bd8cb59..777c10df5 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -94,8 +94,6 @@ data ReaderState , envMedia :: Media -- | Hold binary resources used in the document , odtMediaBag :: MediaBag --- , sequences --- , trackedChangeIDs } deriving ( Show ) @@ -899,9 +897,6 @@ read_reference_ref = matchingElement NsText "reference-ref" -- Entry point ---------------------- ---read_plain_content :: OdtReaderSafe _x Inlines ---read_plain_content = strContent >>^ text - read_text :: OdtReaderSafe _x Pandoc read_text = matchChildContent' [ read_header , read_paragraph diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs index 877443543..4d6a67b8e 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -39,10 +39,6 @@ compatible instances of "ArrowChoice". -- We export everything module Text.Pandoc.Readers.Odt.Generic.Fallible where -import Control.Applicative -import Control.Monad - -import qualified Data.Foldable as F import Data.Monoid ((<>)) -- | Default for now. Will probably become a class at some point. @@ -51,16 +47,6 @@ type Failure = () type Fallible a = Either Failure a --- | False -> Left (), True -> Right () -boolToEither :: Bool -> Fallible () -boolToEither False = Left () -boolToEither True = Right () - --- | False -> Left (), True -> Right () -boolToChoice :: Bool -> Fallible () -boolToChoice False = Left () -boolToChoice True = Right () - -- maybeToEither :: Maybe a -> Fallible a maybeToEither (Just a) = Right a @@ -71,21 +57,11 @@ eitherToMaybe :: Either _l a -> Maybe a eitherToMaybe (Left _) = Nothing eitherToMaybe (Right a) = Just a --- | > untagEither === either id id -untagEither :: Either a a -> a -untagEither (Left a) = a -untagEither (Right a) = a - -- | > fromLeft f === either f id fromLeft :: (a -> b) -> Either a b -> b fromLeft f (Left a) = f a fromLeft _ (Right b) = b --- | > fromRight f === either id f -fromRight :: (a -> b) -> Either b a -> b -fromRight _ (Left b) = b -fromRight f (Right a) = f a - -- | > recover a === fromLeft (const a) === either (const a) id recover :: a -> Either _f a -> a recover a (Left _) = a @@ -110,24 +86,6 @@ collapseEither (Left f ) = Left f collapseEither (Right (Left f)) = Left f collapseEither (Right (Right x)) = Right x --- | If either of the values represents an error, the result is a --- (possibly combined) error. If both values represent a success, --- both are returned. -chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b') -chooseMin = chooseMinWith (,) - --- | If either of the values represents an error, the result is a --- (possibly combined) error. If both values represent a success, --- a combination is returned. -chooseMinWith :: (Monoid a) => (b -> b' -> c) - -> Either a b - -> Either a b' - -> Either a c -chooseMinWith (><) (Right a) (Right b) = Right $ a >< b -chooseMinWith _ (Left a) (Left b) = Left $ a <> b -chooseMinWith _ (Left a) _ = Left a -chooseMinWith _ _ (Left b) = Left b - -- | If either of the values represents a non-error, the result is a -- (possibly combined) non-error. If both values represent an error, an error -- is returned. @@ -152,87 +110,11 @@ chooseMaxWith _ _ (Right b) = Right b class ChoiceVector v where spreadChoice :: v (Either f a) -> Either f (v a) --- Let's do a few examples first - -instance ChoiceVector Maybe where - spreadChoice (Just (Left f)) = Left f - spreadChoice (Just (Right x)) = Right (Just x) - spreadChoice Nothing = Right Nothing - -instance ChoiceVector (Either l) where - spreadChoice (Right (Left f)) = Left f - spreadChoice (Right (Right x)) = Right (Right x) - spreadChoice (Left x ) = Right (Left x) - instance ChoiceVector ((,) a) where spreadChoice (_, Left f) = Left f spreadChoice (x, Right y) = Right (x,y) -- Wasn't there a newtype somewhere with the elements flipped? --- --- More instances later, first some discussion. --- --- I'll have to freshen up on type system details to see how (or if) to do --- something like --- --- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where --- > : --- --- But maybe it would be even better to use something like --- --- > class ChoiceVector v v' f | v -> v' f where --- > spreadChoice :: v -> Either f v' --- --- That way, more places in @v@ could spread the cheer, e.g.: --- --- As before: --- -- ( a , Either f b) (a , b) f --- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where --- > spreadChoice (_, Left f) = Left f --- > spreadChoice (a, Right b) = Right (a,b) --- --- But also: --- -- ( Either f a , b) (a , b) f --- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where --- > spreadChoice (Right a,b) = Right (a,b) --- > spreadChoice (Left f,_) = Left f --- --- And maybe even: --- -- ( Either f a , Either f b) (a , b) f --- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where --- > spreadChoice (Right a , Right b) = Right (a,b) --- > spreadChoice (Left f , _ ) = Left f --- > spreadChoice ( _ , Left f) = Left f --- --- Of course that would lead to a lot of overlapping instances... --- But I can't think of a different way. A selector function might help, --- but not even a "Data.Traversable" is powerful enough for that. --- But maybe someone has already solved all this with a lens library. --- --- Well, it's an interesting academic question. But for practical purposes, --- I have more than enough right now. - -instance ChoiceVector ((,,) a b) where - spreadChoice (_,_, Left f) = Left f - spreadChoice (a,b, Right x) = Right (a,b,x) - -instance ChoiceVector ((,,,) a b c) where - spreadChoice (_,_,_, Left f) = Left f - spreadChoice (a,b,c, Right x) = Right (a,b,c,x) - -instance ChoiceVector ((,,,,) a b c d) where - spreadChoice (_,_,_,_, Left f) = Left f - spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x) - -instance ChoiceVector (Const a) where - spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types - --- | Fails on the first error -instance ChoiceVector [] where - spreadChoice = sequence -- using the monad instance of Either. - -- Could be generalized to "Data.Traversable" - but why play - -- with UndecidableInstances unless this is really needed. - -- | Wrapper for a list. While the normal list instance of 'ChoiceVector' -- fails whenever it can, this type will never fail. newtype SuccessList a = SuccessList { collectNonFailing :: [a] } @@ -247,14 +129,3 @@ instance ChoiceVector SuccessList where collectRights :: [Either _l r] -> [r] collectRights = collectNonFailing . untag . spreadChoice . SuccessList where untag = fromLeft (error "Unexpected Left") - --- | A version of 'collectRights' generalized to other containers. The --- container must be both "reducible" and "buildable". Most general containers --- should fullfill these requirements, but there is no single typeclass --- (that I know of) for that. --- Therefore, they are split between 'Foldable' and 'MonadPlus'. --- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.) -collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r -collectRightsF = F.foldr unTagRight mzero - where unTagRight (Right x) = mplus $ return x - unTagRight _ = id diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs index 6c10ed61d..4af4242b6 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -38,8 +38,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils , uncurry4 , uncurry5 , uncurry6 -, uncurry7 -, uncurry8 , swap , reverseComposition , bool @@ -148,15 +146,11 @@ uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z -uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z -uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z uncurry3 fun (a,b,c ) = fun a b c uncurry4 fun (a,b,c,d ) = fun a b c d uncurry5 fun (a,b,c,d,e ) = fun a b c d e uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f -uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g -uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h swap :: (a,b) -> (b,a) swap (a,b) = (b,a) @@ -168,4 +162,3 @@ findBy :: (a -> Maybe b) -> [a] -> Maybe b findBy _ [] = Nothing findBy f ((f -> Just x):_ ) = Just x findBy f ( _:xs) = findBy f xs - diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 8c03d1a09..1c3e08a7f 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -41,50 +41,17 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , XMLConverterState , XMLConverter , FallibleXMLConverter -, swapPosition -, runConverter -, runConverter'' , runConverter' -, runConverterF' -, runConverterF -, getCurrentElement , getExtraState , setExtraState , modifyExtraState -, convertingExtraState , producingExtraState -, lookupNSiri -, lookupNSprefix -, readNSattributes -, elemName -, elemNameIs -, strContent -, elContent -, currentElem -, currentElemIs -, expectElement -, elChildren -, findChildren -, filterChildren -, filterChildrenName , findChild' -, findChild -, filterChild' -, filterChild -, filterChildName' -, filterChildName -, isSet , isSet' , isSetWithDefault -, hasAttrValueOf' -, failIfNotAttrValueOf -, isThatTheAttrValue -, searchAttrIn -, searchAttrWith , searchAttr , lookupAttr , lookupAttr' -, lookupAttrWithDefault , lookupDefaultingAttr , findAttr' , findAttr @@ -93,25 +60,9 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , readAttr' , readAttrWithDefault , getAttr --- , (>/<) --- , (?>/<) , executeIn -, collectEvery , withEveryL -, withEvery , tryAll -, tryAll' -, IdXMLConverter -, MaybeEConverter -, ElementMatchConverter -, MaybeCConverter -, ContentMatchConverter -, makeMatcherE -, makeMatcherC -, prepareMatchersE -, prepareMatchersC -, matchChildren -, matchContent'' , matchContent' , matchContent ) where @@ -121,7 +72,6 @@ import Control.Monad ( MonadPlus ) import Control.Arrow import qualified Data.Map as M -import qualified Data.Foldable as F import Data.Default import Data.Maybe @@ -210,17 +160,6 @@ currentElement state = head (parentElements state) -- | Replace the current position by another, modifying the extra state -- in the process -swapPosition :: (extraState -> extraState') - -> [XML.Element] - -> XMLConverterState nsID extraState - -> XMLConverterState nsID extraState' -swapPosition f stack state - = state { parentElements = stack - , moreState = f (moreState state) - } - --- | Replace the current position by another, modifying the extra state --- in the process swapStack' :: XMLConverterState nsID extraState -> [XML.Element] -> ( XMLConverterState nsID extraState , [XML.Element] ) @@ -264,14 +203,6 @@ runConverter :: XMLConverter nsID extraState input output -> output runConverter converter state input = snd $ runArrowState converter (state,input) --- -runConverter'' :: (NameSpaceID nsID) - => XMLConverter nsID extraState (Fallible ()) output - -> extraState - -> XML.Element - -> output -runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) () - runConverter' :: (NameSpaceID nsID) => FallibleXMLConverter nsID extraState () success -> extraState @@ -280,20 +211,6 @@ runConverter' :: (NameSpaceID nsID) runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) () -- -runConverterF' :: FallibleXMLConverter nsID extraState x y - -> XMLConverterState nsID extraState - -> Fallible x -> Fallible y -runConverterF' a s e = runConverter (returnV e >>? a) s e - --- -runConverterF :: (NameSpaceID nsID) - => FallibleXMLConverter nsID extraState XML.Element x - -> extraState - -> Fallible XML.Element -> Fallible x -runConverterF a s = either failWith - (\e -> runConverter a (createStartState e s) e) - --- getCurrentElement :: XMLConverter nsID extraState x XML.Element getCurrentElement = extractFromState currentElement @@ -430,57 +347,15 @@ elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName -------------------------------------------------------------------------------- -- -strContent :: XMLConverter nsID extraState x String -strContent = getCurrentElement - >>^ XML.strContent - --- elContent :: XMLConverter nsID extraState x [XML.Content] elContent = getCurrentElement >>^ XML.elContent -------------------------------------------------------------------------------- --- Current element --------------------------------------------------------------------------------- - --- -currentElem :: XMLConverter nsID extraState x (XML.QName) -currentElem = getCurrentElement - >>^ XML.elName - -currentElemIs :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState x Bool -currentElemIs nsID name = getCurrentElement - >>> elemNameIs nsID name - - - -{- -currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> - (XML.qName >>^ (&&).(== name) ) - ^&&&^ - (XML.qIRI >>^ (==) ) - ) >>% (.) - ) &&& lookupNSiri nsID >>% ($) --} - --- -expectElement :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState x () -expectElement nsID name = currentElemIs nsID name - >>^ boolToChoice - --------------------------------------------------------------------------------- -- Chilren -------------------------------------------------------------------------------- -- -elChildren :: XMLConverter nsID extraState x [XML.Element] -elChildren = getCurrentElement - >>^ XML.elChildren - -- findChildren :: (NameSpaceID nsID) => nsID -> ElementName @@ -490,18 +365,6 @@ findChildren nsID name = elemName nsID name >>% XML.findChildren -- -filterChildren :: (XML.Element -> Bool) - -> XMLConverter nsID extraState x [XML.Element] -filterChildren p = getCurrentElement - >>^ XML.filterChildren p - --- -filterChildrenName :: (XML.QName -> Bool) - -> XMLConverter nsID extraState x [XML.Element] -filterChildrenName p = getCurrentElement - >>^ XML.filterChildrenName p - --- findChild' :: (NameSpaceID nsID) => nsID -> ElementName @@ -517,45 +380,12 @@ findChild :: (NameSpaceID nsID) findChild nsID name = findChild' nsID name >>> maybeToChoice --- -filterChild' :: (XML.Element -> Bool) - -> XMLConverter nsID extraState x (Maybe XML.Element) -filterChild' p = getCurrentElement - >>^ XML.filterChild p - --- -filterChild :: (XML.Element -> Bool) - -> FallibleXMLConverter nsID extraState x XML.Element -filterChild p = filterChild' p - >>> maybeToChoice - --- -filterChildName' :: (XML.QName -> Bool) - -> XMLConverter nsID extraState x (Maybe XML.Element) -filterChildName' p = getCurrentElement - >>^ XML.filterChildName p - --- -filterChildName :: (XML.QName -> Bool) - -> FallibleXMLConverter nsID extraState x XML.Element -filterChildName p = filterChildName' p - >>> maybeToChoice - -------------------------------------------------------------------------------- -- Attributes -------------------------------------------------------------------------------- -- -isSet :: (NameSpaceID nsID) - => nsID -> AttributeName - -> (Either Failure Bool) - -> FallibleXMLConverter nsID extraState x Bool -isSet nsID attrName deflt - = findAttr' nsID attrName - >>^ maybe deflt stringToBool - --- isSet' :: (NameSpaceID nsID) => nsID -> AttributeName -> XMLConverter nsID extraState x (Maybe Bool) @@ -570,34 +400,6 @@ isSetWithDefault nsID attrName def' = isSet' nsID attrName >>^ fromMaybe def' --- -hasAttrValueOf' :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> XMLConverter nsID extraState x Bool -hasAttrValueOf' nsID attrName attrValue - = findAttr nsID attrName - >>> ( const False ^|||^ (==attrValue)) - --- -failIfNotAttrValueOf :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> FallibleXMLConverter nsID extraState x () -failIfNotAttrValueOf nsID attrName attrValue - = hasAttrValueOf' nsID attrName attrValue - >>^ boolToChoice - --- | Is the value that is currently transported in the arrow the value of --- the specified attribute? -isThatTheAttrValue :: (NameSpaceID nsID) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState AttributeValue Bool -isThatTheAttrValue nsID attrName - = keepingTheValue - (findAttr nsID attrName) - >>% right.(==) - -- | Lookup value in a dictionary, fail if no attribute found or value -- not in dictionary searchAttrIn :: (NameSpaceID nsID) @@ -608,18 +410,6 @@ searchAttrIn nsID attrName dict = findAttr nsID attrName >>?^? maybeToChoice.(`lookup` dict ) - --- | Lookup value in a dictionary. Fail if no attribute found. If value not in --- dictionary, return default value -searchAttrWith :: (NameSpaceID nsID) - => nsID -> AttributeName - -> a - -> [(AttributeValue,a)] - -> FallibleXMLConverter nsID extraState x a -searchAttrWith nsID attrName defV dict - = findAttr nsID attrName - >>?^ (fromMaybe defV).(`lookup` dict ) - -- | Lookup value in a dictionary. If attribute or value not found, -- return default value searchAttr :: (NameSpaceID nsID) @@ -789,16 +579,6 @@ prepareIteration nsID name = keepingTheValue (findChildren nsID name) >>% distributeValue --- | Applies a converter to every child element of a specific type. --- Collects results in a 'Monoid'. --- Fails completely if any conversion fails. -collectEvery :: (NameSpaceID nsID, Monoid m) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a m - -> FallibleXMLConverter nsID extraState a m -collectEvery nsID name a = prepareIteration nsID name - >>> foldS' (switchingTheStack a) - -- withEveryL :: (NameSpaceID nsID) => nsID -> ElementName @@ -826,16 +606,6 @@ tryAll nsID name a = prepareIteration nsID name >>> iterateS (switchingTheStack a) >>^ collectRights --- | Applies a converter to every child element of a specific type. --- Collects all successful results. -tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState b a - -> XMLConverter nsID extraState b (c a) -tryAll' nsID name a = prepareIteration nsID name - >>> iterateS (switchingTheStack a) - >>^ collectRightsF - -------------------------------------------------------------------------------- -- Matching children -------------------------------------------------------------------------------- @@ -843,15 +613,6 @@ tryAll' nsID name a = prepareIteration nsID name type IdXMLConverter nsID moreState x = XMLConverter nsID moreState x x -type MaybeEConverter nsID moreState x - = Maybe (IdXMLConverter nsID moreState (x, XML.Element)) - --- Chainable converter that helps deciding which converter to actually use. -type ElementMatchConverter nsID extraState x - = IdXMLConverter nsID - extraState - (MaybeEConverter nsID extraState x, XML.Element) - type MaybeCConverter nsID moreState x = Maybe (IdXMLConverter nsID moreState (x, XML.Content)) @@ -862,26 +623,6 @@ type ContentMatchConverter nsID extraState x (MaybeCConverter nsID extraState x, XML.Content) -- Helper function: The @c@ is actually a converter that is to be selected by --- matching XML elements to the first two parameters. --- The fold used to match elements however is very simple, so to use it, --- this function wraps the converter in another converter that unifies --- the accumulator. Think of a lot of converters with the resulting type --- chained together. The accumulator not only transports the element --- unchanged to the next matcher, it also does the actual selecting by --- combining the intermediate results with '(<|>)'. -makeMatcherE :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a a - -> ElementMatchConverter nsID extraState a -makeMatcherE nsID name c = ( second ( - elemNameIs nsID name - >>^ bool Nothing (Just tryC) - ) - >>% (<|>) - ) &&&^ snd - where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd - --- Helper function: The @c@ is actually a converter that is to be selected by -- matching XML content to the first two parameters. -- The fold used to match elements however is very simple, so to use it, -- this function wraps the converter in another converter that unifies @@ -914,13 +655,6 @@ makeMatcherC nsID name c = ( second ( contentToElem _ -> failEmpty -- Creates and chains a bunch of matchers -prepareMatchersE :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] - -> ElementMatchConverter nsID extraState x ---prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE) -prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE) - --- Creates and chains a bunch of matchers prepareMatchersC :: (NameSpaceID nsID) => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] -> ContentMatchConverter nsID extraState x @@ -928,52 +662,6 @@ prepareMatchersC :: (NameSpaceID nsID) prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC) -- | Takes a list of element-data - converter groups and --- * Finds all children of the current element --- * Matches each group to each child in order (at most one group per child) --- * Filters non-matched children --- * Chains all found converters in child-order --- * Applies the chain to the input element -matchChildren :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchChildren lookups = let matcher = prepareMatchersE lookups - in keepingTheValue ( - elChildren - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m) - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the element and drop the element - -- in the return value - swallowElem element converter = (,element) ^>> converter >>^ fst - --- -matchContent'' :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchContent'' lookups = let matcher = prepareMatchersC lookups - in keepingTheValue ( - elContent - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m) - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the content and drop the content - -- in the return value - swallowContent content converter = (,content) ^>> converter >>^ fst - - --- | Takes a list of element-data - converter groups and -- * Finds all content of the current element -- * Matches each group to each piece of content in order -- (at most one group per piece of content) @@ -1018,14 +706,6 @@ matchContent lookups fallback -- Internals -------------------------------------------------------------------------------- -stringToBool :: (Monoid failure) => String -> Either failure Bool -stringToBool val -- stringToBool' val >>> maybeToChoice - | val `elem` trueValues = succeedWith True - | val `elem` falseValues = succeedWith False - | otherwise = failEmpty - where trueValues = ["true" ,"on" ,"1"] - falseValues = ["false","off","0"] - stringToBool' :: String -> Maybe Bool stringToBool' val | val `elem` trueValues = Just True | val `elem` falseValues = Just False diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 26ba6df82..87a6dc91c 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -50,23 +50,11 @@ module Text.Pandoc.Readers.Odt.StyleReader , ListLevelType (..) , LengthOrPercent (..) , lookupStyle -, getTextProperty -, getTextProperty' -, getParaProperty -, getListStyle , getListLevelStyle , getStyleFamily -, lookupDefaultStyle , lookupDefaultStyle' , lookupListStyleByName -, getPropertyChain -, textPropertyChain -, stylePropertyChain -, stylePropertyChain' -, getStylePropertyChain , extendedStylePropertyChain -, extendedStylePropertyChain' -, liftStyles , readStylesAt ) where @@ -83,7 +71,6 @@ import Data.Maybe import qualified Text.XML.Light as XML -import Text.Pandoc.Readers.Odt.Arrows.State import Text.Pandoc.Readers.Odt.Arrows.Utils import Text.Pandoc.Readers.Odt.Generic.Utils @@ -624,20 +611,11 @@ lookupStyle :: StyleName -> Styles -> Maybe Style lookupStyle name Styles{..} = M.lookup name stylesByName -- -lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties -lookupDefaultStyle family Styles{..} = fromMaybe def - (M.lookup family defaultStyleMap) - --- lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties lookupDefaultStyle' Styles{..} family = fromMaybe def (M.lookup family defaultStyleMap) -- -getListStyle :: Style -> Styles -> Maybe ListStyle -getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles) - --- lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle lookupListStyleByName name Styles{..} = M.lookup name listStylesByName @@ -681,64 +659,3 @@ extendedStylePropertyChain [style] styles = (stylePropertyChain style s ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles) ++ (extendedStylePropertyChain trace styles) --- Optimizable with Data.Sequence - --- -extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties] -extendedStylePropertyChain' [] _ = Nothing -extendedStylePropertyChain' [style] styles = Just ( - (stylePropertyChain style styles) - ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) - ) -extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++) - (extendedStylePropertyChain' trace styles) - --- -stylePropertyChain' :: Styles -> Style -> [StyleProperties] -stylePropertyChain' = flip stylePropertyChain - --- -getStylePropertyChain :: StyleName -> Styles -> [StyleProperties] -getStylePropertyChain name styles = maybe [] - (`stylePropertyChain` styles) - (lookupStyle name styles) - --- -getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a] -getPropertyChain extract style styles = catMaybes - $ map extract - $ stylePropertyChain style styles - --- -textPropertyChain :: Style -> Styles -> [TextProperties] -textPropertyChain = getPropertyChain textProperties - --- -paraPropertyChain :: Style -> Styles -> [ParaProperties] -paraPropertyChain = getPropertyChain paraProperties - --- -getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a -getTextProperty extract style styles = fmap extract - $ listToMaybe - $ textPropertyChain style styles - --- -getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a -getTextProperty' extract style styles = F.asum - $ map extract - $ textPropertyChain style styles - --- -getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a -getParaProperty extract style styles = fmap extract - $ listToMaybe - $ paraPropertyChain style styles - --- | Lifts the reader into another readers' state. -liftStyles :: (OdtConverterState s -> OdtConverterState Styles) - -> (OdtConverterState Styles -> OdtConverterState s ) - -> XMLReader s x x -liftStyles extract inject = switchState extract inject - $ convertingExtraState M.empty readAllStyles - diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5e509178d..5e0d67d10 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -40,15 +40,18 @@ import Text.Pandoc.Parsing (reportLogMessages) import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) +import Data.Text (Text) +import qualified Data.Text as T -- | Parse org-mode string and return a Pandoc document. readOrg :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readOrg opts s = do parsed <- flip runReaderT def $ - readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n") + readWithM parseOrg (optionsToParserState opts) + (T.unpack s ++ "\n\n") case parsed of Right result -> return result Left _ -> throwError $ PandocParseError "problem parsing org" diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index cc2e82d5b..9c6614c99 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.BlockStarts + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -61,8 +61,12 @@ headerStart = try $ tableStart :: Monad m => OrgParser m Char tableStart = try $ skipSpaces *> char '|' +gridTableStart :: Monad m => OrgParser m () +gridTableStart = try $ skipSpaces <* char '+' <* char '-' + + latexEnvStart :: Monad m => OrgParser m String -latexEnvStart = try $ do +latexEnvStart = try $ skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" @@ -93,8 +97,7 @@ orderedListStart = genericListStart orderedListMarker where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") drawerStart :: Monad m => OrgParser m String -drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline +drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline where drawerName = char ':' *> manyTill nonspaceChar (char ':') metaLineStart :: Monad m => OrgParser m () @@ -116,8 +119,8 @@ noteMarker = try $ do -- | Succeeds if the parser is at the end of a block. endOfBlock :: Monad m => OrgParser m () -endOfBlock = lookAhead . try $ do - void blankline <|> anyBlockStart +endOfBlock = lookAhead . try $ + void blankline <|> anyBlockStart where -- Succeeds if there is a new block starting at this position. anyBlockStart :: Monad m => OrgParser m () @@ -126,6 +129,7 @@ endOfBlock = lookAhead . try $ do , hline , metaLineStart , commentLineStart + , gridTableStart , void noteMarker , void tableStart , void drawerStart @@ -134,4 +138,3 @@ endOfBlock = lookAhead . try $ do , void bulletListStart , void orderedListStart ] - diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index b0a19b833..3e0ab0127 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,3 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE ViewPatterns #-} {- Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -18,9 +15,10 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} - +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} {- | - Module : Text.Pandoc.Readers.Org.Options + Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above @@ -34,6 +32,7 @@ module Text.Pandoc.Readers.Org.Blocks ) where import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.DocumentTree (documentTree, headlineToBlocks) import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState @@ -52,211 +51,21 @@ import Control.Monad (foldM, guard, mzero, void) import Data.Char (isSpace, toLower, toUpper) import Data.Default (Default) import Data.List (foldl', isPrefixOf) -import Data.Maybe (fromMaybe, isNothing) +import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) -- --- Org headers --- -newtype Tag = Tag { fromTag :: String } - deriving (Show, Eq) - --- | Create a tag containing the given string. -toTag :: String -> Tag -toTag = Tag - --- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } - deriving (Show, Eq, Ord) - --- | Create a property key containing the given string. Org mode keys are --- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower - --- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } - --- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue -toPropertyValue = PropertyValue - --- | Check whether the property value is non-nil (i.e. truish). -isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] - --- | Key/value pairs from a PROPERTIES drawer -type Properties = [(PropertyKey, PropertyValue)] - --- | Org mode headline (i.e. a document subtree). -data Headline = Headline - { headlineLevel :: Int - , headlineTodoMarker :: Maybe TodoMarker - , headlineText :: Inlines - , headlineTags :: [Tag] - , headlineProperties :: Properties - , headlineContents :: Blocks - , headlineChildren :: [Headline] - } - --- --- Parsing headlines and subtrees --- - --- | Read an Org mode headline and its contents (i.e. a document subtree). --- @lvl@ gives the minimum acceptable level of the tree. -headline :: PandocMonad m => Int -> OrgParser m (F Headline) -headline lvl = try $ do - level <- headerStart - guard (lvl <= level) - todoKw <- optionMaybe todoKeyword - title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle - tags <- option [] headerTags - newline - properties <- option mempty propertiesDrawer - contents <- blocks - children <- many (headline (level + 1)) - return $ do - title' <- title - contents' <- contents - children' <- sequence children - return $ Headline - { headlineLevel = level - , headlineTodoMarker = todoKw - , headlineText = title' - , headlineTags = tags - , headlineProperties = properties - , headlineContents = contents' - , headlineChildren = children' - } - where - endOfTitle :: Monad m => OrgParser m () - endOfTitle = void . lookAhead $ optional headerTags *> newline - - headerTags :: Monad m => OrgParser m [Tag] - headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' - in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) - --- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks -headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -headlineToBlocks hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - case () of - _ | any isNoExportTag headlineTags -> return mempty - _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln - _ | isCommentTitle headlineText -> return mempty - _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln - _ | otherwise -> headlineToHeaderWithContents hdln - -isNoExportTag :: Tag -> Bool -isNoExportTag = (== toTag "noexport") - -isArchiveTag :: Tag -> Bool -isArchiveTag = (== toTag "ARCHIVE") - --- | Check if the title starts with COMMENT. --- FIXME: This accesses builder internals not intended for use in situations --- like these. Replace once keyword parsing is supported. -isCommentTitle :: Inlines -> Bool -isCommentTitle (B.toList -> (Str "COMMENT":_)) = True -isCommentTitle _ = False - -archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -archivedHeadlineToBlocks hdln = do - archivedTreesOption <- getExportSetting exportArchivedTrees - case archivedTreesOption of - ArchivedTreesNoExport -> return mempty - ArchivedTreesExport -> headlineToHeaderWithContents hdln - ArchivedTreesHeadlineOnly -> headlineToHeader hdln - -headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithList hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - header <- headlineToHeader hdln - listElements <- sequence (map headlineToBlocks headlineChildren) - let listBlock = if null listElements - then mempty - else B.orderedList listElements - let headerText = if maxHeadlineLevels == headlineLevel - then header - else flattenHeader header - return $ headerText <> headlineContents <> listBlock - where - flattenHeader :: Blocks -> Blocks - flattenHeader blks = - case B.toList blks of - (Header _ _ inlns:_) -> B.para (B.fromList inlns) - _ -> mempty - -headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithContents hdln@(Headline {..}) = do - header <- headlineToHeader hdln - childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren) - return $ header <> headlineContents <> childrenBlocks - -headlineToHeader :: Monad m => Headline -> OrgParser m Blocks -headlineToHeader (Headline {..}) = do - exportTodoKeyword <- getExportSetting exportWithTodoKeywords - let todoText = if exportTodoKeyword - then case headlineTodoMarker of - Just kw -> todoKeywordToInlines kw <> B.space - Nothing -> mempty - else mempty - let text = tagTitle (todoText <> headlineText) headlineTags - let propAttr = propertiesToAttr headlineProperties - attr <- registerHeader propAttr headlineText - return $ B.headerWith attr headlineLevel text - -todoKeyword :: Monad m => OrgParser m TodoMarker -todoKeyword = try $ do - taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar) - choice (map kwParser taskStates) - -todoKeywordToInlines :: TodoMarker -> Inlines -todoKeywordToInlines tdm = - let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm - classes = [todoState, todoText] - in B.spanWith (mempty, classes, mempty) (B.str todoText) - -propertiesToAttr :: Properties -> Attr -propertiesToAttr properties = - let - toStringPair prop = (fromKey (fst prop), fromValue (snd prop)) - customIdKey = toPropertyKey "custom_id" - classKey = toPropertyKey "class" - unnumberedKey = toPropertyKey "unnumbered" - specialProperties = [customIdKey, classKey, unnumberedKey] - id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties - cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) - $ properties - isUnnumbered = - fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties - in - (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs') - -tagTitle :: Inlines -> [Tag] -> Inlines -tagTitle title tags = title <> (mconcat $ map tagToInline tags) - -tagToInline :: Tag -> Inlines -tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty - - --- -- parsing blocks -- -- | Get a list of blocks. blockList :: PandocMonad m => OrgParser m [Block] blockList = do - initialBlocks <- blocks - headlines <- sequence <$> manyTill (headline 1) eof + headlines <- documentTree blocks inline st <- getState - headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st - return . B.toList $ (runF initialBlocks st) <> headlineBlocks + headlineBlocks <- headlineToBlocks $ runF headlines st + -- ignore first headline, it's the document's title + return . drop 1 . B.toList $ headlineBlocks -- | Get the meta information saved in the state. meta :: Monad m => OrgParser m Meta @@ -274,6 +83,7 @@ block = choice [ mempty <$ blanklines , figure , example , genericDrawer + , include , specialLine , horizontalRule , list @@ -302,7 +112,7 @@ data BlockAttributes = BlockAttributes -- | Convert BlockAttributes into pandoc Attr attrFromBlockAttributes :: BlockAttributes -> Attr -attrFromBlockAttributes (BlockAttributes{..}) = +attrFromBlockAttributes BlockAttributes{..} = let ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of @@ -311,18 +121,18 @@ attrFromBlockAttributes (BlockAttributes{..}) = kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String) -stringyMetaAttribute attrCheck = try $ do +stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute = try $ do metaLineStart attrName <- map toUpper <$> many1Till nonspaceChar (char ':') - guard $ attrCheck attrName skipSpaces - attrValue <- anyLine + attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) blockAttributes :: PandocMonad m => OrgParser m BlockAttributes blockAttributes = try $ do - kv <- many (stringyMetaAttribute attrCheck) + kv <- many stringyMetaAttribute + guard $ all (attrCheck . fst) kv let caption = foldl' (appendValues "CAPTION") Nothing kv let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv let name = lookup "NAME" kv @@ -331,7 +141,7 @@ blockAttributes = try $ do Nothing -> return Nothing Just s -> Just <$> parseFromString inlines (s ++ "\n") kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs - return $ BlockAttributes + return BlockAttributes { blockAttrName = name , blockAttrLabel = label , blockAttrCaption = caption' @@ -339,13 +149,7 @@ blockAttributes = try $ do } where attrCheck :: String -> Bool - attrCheck attr = - case attr of - "NAME" -> True - "LABEL" -> True - "CAPTION" -> True - "ATTR_HTML" -> True - _ -> False + attrCheck x = x `elem` ["NAME", "LABEL", "CAPTION", "ATTR_HTML", "RESULTS"] appendValues :: String -> Maybe String -> (String, String) -> Maybe String appendValues attrName accValue (key, value) = @@ -355,6 +159,7 @@ blockAttributes = try $ do Just acc -> Just $ acc ++ ' ':value Nothing -> Just value +-- | Parse key-value pairs for HTML attributes keyValues :: Monad m => OrgParser m [(String, String)] keyValues = try $ manyTill ((,) <$> key <*> value) newline @@ -381,7 +186,7 @@ orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case (map toLower blkType) of + case map toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -402,10 +207,10 @@ orgBlock = try $ do lowercase = map toLower rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) -rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) +rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) -parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent)) +parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do @@ -433,8 +238,7 @@ rawBlockContent blockType = try $ do stripIndent strs = map (drop (shortestIndent strs)) strs shortestIndent :: [String] -> Int - shortestIndent = foldr min maxBound - . map (length . takeWhile isSpace) + shortestIndent = foldr (min . length . takeWhile isSpace) maxBound . filter (not . null) tabsToSpaces :: Int -> String -> String @@ -442,7 +246,7 @@ rawBlockContent blockType = try $ do tabsToSpaces tabLen cs'@(c:cs) = case c of ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs + '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs _ -> cs' commaEscaped :: String -> String @@ -490,16 +294,15 @@ codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) content <- rawBlockContent blockType - resultsContent <- trailingResultsBlock + resultsContent <- option mempty babelResultsBlock let id' = fromMaybe mempty $ blockAttrName blockAttrs let codeBlck = B.codeBlockWith ( id', classes, kv ) content let labelledBlck = maybe (pure codeBlck) (labelDiv codeBlck) (blockAttrCaption blockAttrs) - let resultBlck = fromMaybe mempty resultsContent return $ - (if exportsCode kv then labelledBlck else mempty) <> - (if exportsResults kv then resultBlck else mempty) + (if exportsCode kv then labelledBlck else mempty) <> + (if exportsResults kv then resultsContent else mempty) where labelDiv :: Blocks -> F Inlines -> F Blocks labelDiv blk value = @@ -514,12 +317,16 @@ codeBlock blockAttrs blockType = do exportsResults :: [(String, String)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks)) -trailingResultsBlock = optionMaybe . try $ do +-- | Parse the result of an evaluated babel code block. +babelResultsBlock :: PandocMonad m => OrgParser m (F Blocks) +babelResultsBlock = try $ do blanklines - stringAnyCase "#+RESULTS:" - blankline + resultsMarker <|> + (lookAhead . void . try $ + manyTill (metaLineStart *> anyLineNewline) resultsMarker) block + where + resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) @@ -527,13 +334,13 @@ codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord (switchClasses, switchKv) <- switchesAsAttributes parameters <- manyTill blockOption newline - return $ ( translateLang language : switchClasses - , originalLang language <> switchKv <> parameters - ) + return ( translateLang language : switchClasses + , originalLang language <> switchKv <> parameters + ) switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) switchesAsAttributes = try $ do - switches <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) return $ foldr addToAttr ([], []) switches where addToAttr :: (Char, Maybe String, SwitchPolarity) @@ -541,10 +348,10 @@ switchesAsAttributes = try $ do -> ([String], [(String, String)]) addToAttr ('n', lineNum, pol) (cls, kv) = let kv' = case lineNum of - Just num -> (("startFrom", num):kv) + Just num -> ("startFrom", num):kv Nothing -> kv cls' = case pol of - SwitchPlus -> "continuedSourceBlock":cls + SwitchPlus -> "continuedSourceBlock":cls SwitchMinus -> cls in ("numberLines":cls', kv') addToAttr _ x = x @@ -573,7 +380,7 @@ genericSwitch :: Monad m genericSwitch c p = try $ do polarity <- switchPolarity <* char c <* skipSpaces arg <- optionMaybe p - return $ (c, arg, polarity) + return (c, arg, polarity) -- | Reads a line number switch option. The line number switch can be used with -- example and source blocks. @@ -593,8 +400,8 @@ orgParamValue = try $ *> noneOf "\n\r" `many1Till` endOfValue <* skipSpaces where - endOfValue = lookAhead $ (try $ skipSpaces <* oneOf "\n\r") - <|> (try $ skipSpaces1 <* orgArgKey) + endOfValue = lookAhead $ try (skipSpaces <* oneOf "\n\r") + <|> try (skipSpaces1 <* orgArgKey) -- @@ -612,7 +419,7 @@ genericDrawer = try $ do -- Include drawer if it is explicitly included in or not explicitly excluded -- from the list of drawers that should be exported. PROPERTIES drawers are -- never exported. - case (exportDrawers . orgStateExportSettings $ state) of + case exportDrawers . orgStateExportSettings $ state of _ | name == "PROPERTIES" -> return mempty Left names | name `elem` names -> return mempty Right names | name `notElem` names -> return mempty @@ -631,25 +438,6 @@ drawerEnd :: Monad m => OrgParser m String drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline --- | Read a :PROPERTIES: drawer and return the key/value pairs contained --- within. -propertiesDrawer :: Monad m => OrgParser m Properties -propertiesDrawer = try $ do - drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" - manyTill property (try drawerEnd) - where - property :: Monad m => OrgParser m (PropertyKey, PropertyValue) - property = try $ (,) <$> key <*> value - - key :: Monad m => OrgParser m PropertyKey - key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') - - value :: Monad m => OrgParser m PropertyValue - value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) - -- -- Figures @@ -665,7 +453,7 @@ figure = try $ do Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) - let isFigure = not . isNothing $ blockAttrCaption figAttrs + let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where selfTarget :: PandocMonad m => OrgParser m String @@ -700,8 +488,7 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- | Example code marked up by a leading colon. example :: Monad m => OrgParser m (F Blocks) -example = try $ do - returnF . exampleCode =<< unlines <$> many1 exampleLine +example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine where exampleLine :: Monad m => OrgParser m String exampleLine = try $ exampleLineStart *> anyLine @@ -717,6 +504,34 @@ exampleCode = B.codeBlockWith ("", ["example"], []) specialLine :: PandocMonad m => OrgParser m (F Blocks) specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine +-- | Include the content of a file. +include :: PandocMonad m => OrgParser m (F Blocks) +include = try $ do + metaLineStart <* stringAnyCase "include:" <* skipSpaces + filename <- includeTarget + blockType <- optionMaybe $ skipSpaces *> many1 alphaNum + blocksParser <- case blockType of + Just "example" -> + return $ pure . B.codeBlock <$> parseRaw + Just "export" -> do + format <- skipSpaces *> many (noneOf "\n\r\t ") + return $ pure . B.rawBlock format <$> parseRaw + Just "src" -> do + language <- skipSpaces *> many (noneOf "\n\r\t ") + let attr = (mempty, [language], mempty) + return $ pure . B.codeBlockWith attr <$> parseRaw + _ -> return $ pure . B.fromList <$> blockList + anyLine + insertIncludedFileF blocksParser ["."] filename + where + includeTarget :: PandocMonad m => OrgParser m FilePath + includeTarget = do + char '"' + manyTill (noneOf "\n\r\t") (char '"') + + parseRaw :: PandocMonad m => OrgParser m String + parseRaw = many anyChar + rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart @@ -755,11 +570,15 @@ data OrgTable = OrgTable } table :: PandocMonad m => OrgParser m (F Blocks) -table = try $ do +table = gridTableWith blocks True <|> orgTable + +-- | A normal org table +orgTable :: PandocMonad m => OrgParser m (F Blocks) +orgTable = try $ do -- don't allow a table on the first line of a list item; org requires that -- tables start at first non-space character on the line - let isFirstInListItem st = (orgStateParserContext st == ListItemState) && - (orgStateLastPreCharPos st == Nothing) + let isFirstInListItem st = orgStateParserContext st == ListItemState && + isNothing (orgStateLastPreCharPos st) guard =<< not . isFirstInListItem <$> getState blockAttrs <- blockAttributes lookAhead tableStart @@ -772,7 +591,7 @@ orgToPandocTable :: OrgTable -> Inlines -> Blocks orgToPandocTable (OrgTable colProps heads lns) caption = - let totalWidth = if any (not . isNothing) (map columnRelWidth colProps) + let totalWidth = if any isJust (map columnRelWidth colProps) then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps else Nothing in B.table caption (map (convertColProp totalWidth) colProps) heads lns @@ -782,7 +601,7 @@ orgToPandocTable (OrgTable colProps heads lns) caption = let align' = fromMaybe AlignDefault $ columnAlignment colProp width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t)) - <$> (columnRelWidth colProp) + <$> columnRelWidth colProp <*> totalWidth in (align', width') @@ -808,7 +627,7 @@ tableAlignRow = try $ do columnPropertyCell :: Monad m => OrgParser m ColumnProperty columnPropertyCell = emptyCell <|> propCell <?> "alignment info" where - emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell) + emptyCell = ColumnProperty Nothing Nothing <$ try (skipSpaces *> endOfCell) propCell = try $ ColumnProperty <$> (skipSpaces *> char '<' @@ -854,28 +673,28 @@ normalizeTable (OrgTable colProps heads rows) = rowToContent :: OrgTable -> OrgTableRow -> F OrgTable -rowToContent orgTable row = +rowToContent tbl row = case row of OrgHlineRow -> return singleRowPromotedToHeader OrgAlignRow props -> return . setProperties $ props OrgContentRow cs -> appendToBody cs where singleRowPromotedToHeader :: OrgTable - singleRowPromotedToHeader = case orgTable of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - orgTable{ orgTableHeader = b , orgTableRows = [] } - _ -> orgTable + singleRowPromotedToHeader = case tbl of + OrgTable{ orgTableHeader = [], orgTableRows = [b] } -> + tbl{ orgTableHeader = b , orgTableRows = [] } + _ -> tbl setProperties :: [ColumnProperty] -> OrgTable - setProperties ps = orgTable{ orgTableColumnProperties = ps } + setProperties ps = tbl{ orgTableColumnProperties = ps } appendToBody :: F [Blocks] -> F OrgTable appendToBody frow = do newRow <- frow - let oldRows = orgTableRows orgTable + let oldRows = orgTableRows tbl -- NOTE: This is an inefficient O(n) operation. This should be changed -- if performance ever becomes a problem. - return orgTable{ orgTableRows = oldRows ++ [newRow] } + return tbl{ orgTableRows = oldRows ++ [newRow] } -- @@ -917,7 +736,7 @@ noteBlock = try $ do paraOrPlain :: PandocMonad m => OrgParser m (F Blocks) paraOrPlain = try $ do -- Make sure we are not looking at a headline - notFollowedBy' (char '*' *> (oneOf " *")) + notFollowedBy' (char '*' *> oneOf " *") ils <- inlines nl <- option False (newline *> return True) -- Read block as paragraph, except if we are in a list context and the block @@ -926,7 +745,7 @@ paraOrPlain = try $ do try (guard nl *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) *> return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) + <|> return (B.plain <$> ils) -- @@ -938,16 +757,16 @@ list = choice [ definitionList, bulletList, orderedList ] <?> "list" definitionList :: PandocMonad m => OrgParser m (F Blocks) definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactifyDL . sequence + fmap (B.definitionList . compactifyDL) . sequence <$> many1 (definitionListItem $ bulletListStart' (Just n)) bulletList :: PandocMonad m => OrgParser m (F Blocks) bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify . sequence + fmap (B.bulletList . compactify) . sequence <$> many1 (listItem (bulletListStart' $ Just n)) orderedList :: PandocMonad m => OrgParser m (F Blocks) -orderedList = fmap B.orderedList . fmap compactify . sequence +orderedList = fmap (B.orderedList . compactify) . sequence <$> many1 (listItem orderedListStart) bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int @@ -1004,16 +823,3 @@ listContinuation markerLength = try $ <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline - - -- indent by specified number of spaces (or equiv. tabs) - indentWith :: Monad m => Int -> OrgParser m String - indentWith num = do - tabStop <- getOption readerTabStop - if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) - , try (char '\t' >> count (num - tabStop) (char ' ')) ] - --- | Parse any line, include the final newline in the output. -anyLineNewline :: Monad m => OrgParser m String -anyLineNewline = (++ "\n") <$> anyLine diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs new file mode 100644 index 000000000..743f6cc0e --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -0,0 +1,304 @@ +{- +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ViewPatterns #-} +{- | + Module : Text.Pandoc.Readers.Org.DocumentTree + Copyright : Copyright (C) 2014-2017 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +Parsers for org-mode headlines and document subtrees +-} +module Text.Pandoc.Readers.Org.DocumentTree + ( documentTree + , headlineToBlocks + ) where + +import Control.Arrow ((***)) +import Control.Monad (guard, void) +import Data.Char (toLower, toUpper) +import Data.List (intersperse) +import Data.Monoid ((<>)) +import Text.Pandoc.Builder (Blocks, Inlines) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing + +import qualified Data.Map as Map +import qualified Text.Pandoc.Builder as B + +-- +-- Org headers +-- + +-- | Parse input as org document tree. +documentTree :: PandocMonad m + => OrgParser m (F Blocks) + -> OrgParser m (F Inlines) + -> OrgParser m (F Headline) +documentTree blocks inline = do + initialBlocks <- blocks + headlines <- sequence <$> manyTill (headline blocks inline 1) eof + title <- fmap (getTitle . unMeta) . orgStateMeta <$> getState + return $ do + headlines' <- headlines + initialBlocks' <- initialBlocks + title' <- title + return Headline + { headlineLevel = 0 + , headlineTodoMarker = Nothing + , headlineText = B.fromList title' + , headlineTags = mempty + , headlineProperties = mempty + , headlineContents = initialBlocks' + , headlineChildren = headlines' + } + where + getTitle :: Map.Map String MetaValue -> [Inline] + getTitle metamap = + case Map.lookup "title" metamap of + Just (MetaInlines inlns) -> inlns + _ -> [] + +newtype Tag = Tag { fromTag :: String } + deriving (Show, Eq) + +-- | Create a tag containing the given string. +toTag :: String -> Tag +toTag = Tag + +-- | The key (also called name or type) of a property. +newtype PropertyKey = PropertyKey { fromKey :: String } + deriving (Show, Eq, Ord) + +-- | Create a property key containing the given string. Org mode keys are +-- case insensitive and are hence converted to lower case. +toPropertyKey :: String -> PropertyKey +toPropertyKey = PropertyKey . map toLower + +-- | The value assigned to a property. +newtype PropertyValue = PropertyValue { fromValue :: String } + +-- | Create a property value containing the given string. +toPropertyValue :: String -> PropertyValue +toPropertyValue = PropertyValue + +-- | Check whether the property value is non-nil (i.e. truish). +isNonNil :: PropertyValue -> Bool +isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] + +-- | Key/value pairs from a PROPERTIES drawer +type Properties = [(PropertyKey, PropertyValue)] + +-- | Org mode headline (i.e. a document subtree). +data Headline = Headline + { headlineLevel :: Int + , headlineTodoMarker :: Maybe TodoMarker + , headlineText :: Inlines + , headlineTags :: [Tag] + , headlineProperties :: Properties + , headlineContents :: Blocks + , headlineChildren :: [Headline] + } + +-- | Read an Org mode headline and its contents (i.e. a document subtree). +-- @lvl@ gives the minimum acceptable level of the tree. +headline :: PandocMonad m + => OrgParser m (F Blocks) + -> OrgParser m (F Inlines) + -> Int + -> OrgParser m (F Headline) +headline blocks inline lvl = try $ do + level <- headerStart + guard (lvl <= level) + todoKw <- optionMaybe todoKeyword + title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle + tags <- option [] headerTags + newline + properties <- option mempty propertiesDrawer + contents <- blocks + children <- many (headline blocks inline (level + 1)) + return $ do + title' <- title + contents' <- contents + children' <- sequence children + return Headline + { headlineLevel = level + , headlineTodoMarker = todoKw + , headlineText = title' + , headlineTags = tags + , headlineProperties = properties + , headlineContents = contents' + , headlineChildren = children' + } + where + endOfTitle :: Monad m => OrgParser m () + endOfTitle = void . lookAhead $ optional headerTags *> newline + + headerTags :: Monad m => OrgParser m [Tag] + headerTags = try $ + let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' + in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) + +-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks +headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks +headlineToBlocks hdln@Headline {..} = do + maxHeadlineLevels <- getExportSetting exportHeadlineLevels + case () of + _ | any isNoExportTag headlineTags -> return mempty + _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln + _ | isCommentTitle headlineText -> return mempty + _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln + _ | otherwise -> headlineToHeaderWithContents hdln + +isNoExportTag :: Tag -> Bool +isNoExportTag = (== toTag "noexport") + +isArchiveTag :: Tag -> Bool +isArchiveTag = (== toTag "ARCHIVE") + +-- | Check if the title starts with COMMENT. +-- FIXME: This accesses builder internals not intended for use in situations +-- like these. Replace once keyword parsing is supported. +isCommentTitle :: Inlines -> Bool +isCommentTitle (B.toList -> (Str "COMMENT":_)) = True +isCommentTitle _ = False + +archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks +archivedHeadlineToBlocks hdln = do + archivedTreesOption <- getExportSetting exportArchivedTrees + case archivedTreesOption of + ArchivedTreesNoExport -> return mempty + ArchivedTreesExport -> headlineToHeaderWithContents hdln + ArchivedTreesHeadlineOnly -> headlineToHeader hdln + +headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks +headlineToHeaderWithList hdln@Headline {..} = do + maxHeadlineLevels <- getExportSetting exportHeadlineLevels + header <- headlineToHeader hdln + listElements <- mapM headlineToBlocks headlineChildren + let listBlock = if null listElements + then mempty + else B.orderedList listElements + let headerText = if maxHeadlineLevels == headlineLevel + then header + else flattenHeader header + return $ headerText <> headlineContents <> listBlock + where + flattenHeader :: Blocks -> Blocks + flattenHeader blks = + case B.toList blks of + (Header _ _ inlns:_) -> B.para (B.fromList inlns) + _ -> mempty + +headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks +headlineToHeaderWithContents hdln@Headline {..} = do + header <- headlineToHeader hdln + childrenBlocks <- mconcat <$> mapM headlineToBlocks headlineChildren + return $ header <> headlineContents <> childrenBlocks + +headlineToHeader :: Monad m => Headline -> OrgParser m Blocks +headlineToHeader Headline {..} = do + exportTodoKeyword <- getExportSetting exportWithTodoKeywords + exportTags <- getExportSetting exportWithTags + let todoText = if exportTodoKeyword + then case headlineTodoMarker of + Just kw -> todoKeywordToInlines kw <> B.space + Nothing -> mempty + else mempty + let text = todoText <> headlineText <> + if exportTags + then tagsToInlines headlineTags + else mempty + let propAttr = propertiesToAttr headlineProperties + attr <- registerHeader propAttr headlineText + return $ B.headerWith attr headlineLevel text + +todoKeyword :: Monad m => OrgParser m TodoMarker +todoKeyword = try $ do + taskStates <- activeTodoMarkers <$> getState + let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) <* spaceChar) + choice (map kwParser taskStates) + +todoKeywordToInlines :: TodoMarker -> Inlines +todoKeywordToInlines tdm = + let todoText = todoMarkerName tdm + todoState = map toLower . show $ todoMarkerState tdm + classes = [todoState, todoText] + in B.spanWith (mempty, classes, mempty) (B.str todoText) + +propertiesToAttr :: Properties -> Attr +propertiesToAttr properties = + let + toStringPair = fromKey *** fromValue + customIdKey = toPropertyKey "custom_id" + classKey = toPropertyKey "class" + unnumberedKey = toPropertyKey "unnumbered" + specialProperties = [customIdKey, classKey, unnumberedKey] + id' = maybe mempty fromValue . lookup customIdKey $ properties + cls = maybe mempty fromValue . lookup classKey $ properties + kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) + $ properties + isUnnumbered = + maybe False isNonNil . lookup unnumberedKey $ properties + in + (id', words cls ++ ["unnumbered" | isUnnumbered], kvs') + +tagsToInlines :: [Tag] -> Inlines +tagsToInlines [] = mempty +tagsToInlines tags = + (B.space <>) . mconcat . intersperse (B.str "\160") . map tagToInline $ tags + where + tagToInline :: Tag -> Inlines + tagToInline t = tagSpan t . B.smallcaps . B.str $ fromTag t + +-- | Wrap the given inline in a span, marking it as a tag. +tagSpan :: Tag -> Inlines -> Inlines +tagSpan t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) + + + + + +-- | Read a :PROPERTIES: drawer and return the key/value pairs contained +-- within. +propertiesDrawer :: Monad m => OrgParser m Properties +propertiesDrawer = try $ do + drawerType <- drawerStart + guard $ map toUpper drawerType == "PROPERTIES" + manyTill property (try endOfDrawer) + where + property :: Monad m => OrgParser m (PropertyKey, PropertyValue) + property = try $ (,) <$> key <*> value + + key :: Monad m => OrgParser m PropertyKey + key = fmap toPropertyKey . try $ + skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + + value :: Monad m => OrgParser m PropertyValue + value = fmap toPropertyValue . try $ + skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + + endOfDrawer :: Monad m => OrgParser m String + endOfDrawer = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index 1d6fdd7e1..11f0972d5 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.ExportSettings + Copyright : © 2016–2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -71,7 +71,7 @@ exportSetting = choice , ignoredSetting "pri" , ignoredSetting "prop" , ignoredSetting "stat" - , ignoredSetting "tags" + , booleanSetting "tags" (\val es -> es { exportWithTags = val }) , ignoredSetting "tasks" , ignoredSetting "tex" , ignoredSetting "timestamp" diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 64ffb8ef5..66273e05d 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Inlines + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -48,7 +48,7 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.TeXMath (DisplayType (..), readTeX, writePandoc) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Monad (guard, mplus, mzero, void, when) +import Control.Monad (guard, mplus, mzero, unless, void, when) import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) import Data.List (intersperse) @@ -63,7 +63,7 @@ import Prelude hiding (sequence) -- recordAnchorId :: PandocMonad m => String -> OrgParser m () recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + s{ orgStateAnchorIds = i : orgStateAnchorIds s } pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m () pushToInlineCharStack c = updateState $ \s -> @@ -120,6 +120,7 @@ inline = , superscript , inlineLaTeX , exportSnippet + , macro , smart , symbol ] <* (guard =<< newlinesCountWithinLimits) @@ -183,7 +184,7 @@ cite = try $ berkeleyCite <|> do , orgRefCite , berkeleyTextualCite ] - return $ (flip B.cite (B.text raw)) <$> cs + return $ flip B.cite (B.text raw) <$> cs -- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) @@ -208,7 +209,7 @@ normalOrgRefCite = try $ do orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation) orgRefCiteList citeMode = try $ do key <- orgRefCiteKey - returnF $ Citation + returnF Citation { citationId = key , citationPrefix = mempty , citationSuffix = mempty @@ -231,11 +232,11 @@ berkeleyCite = try $ do return $ if parens then toCite - . maybe id (\p -> alterFirst (prependPrefix p)) prefix - . maybe id (\s -> alterLast (appendSuffix s)) suffix + . maybe id (alterFirst . prependPrefix) prefix + . maybe id (alterLast . appendSuffix) suffix $ citationList else maybe mempty (<> " ") prefix - <> (toListOfCites $ map toInTextMode citationList) + <> toListOfCites (map toInTextMode citationList) <> maybe mempty (", " <>) suffix where toCite :: [Citation] -> Inlines @@ -249,7 +250,7 @@ berkeleyCite = try $ do alterFirst, alterLast :: (a -> a) -> [a] -> [a] alterFirst _ [] = [] - alterFirst f (c:cs) = (f c):cs + alterFirst f (c:cs) = f c : cs alterLast f = reverse . alterFirst f . reverse prependPrefix, appendSuffix :: Inlines -> Citation -> Citation @@ -270,7 +271,7 @@ berkeleyCitationList = try $ do skipSpaces commonPrefix <- optionMaybe (try $ citationListPart <* char ';') citations <- citeList - commonSuffix <- optionMaybe (try $ citationListPart) + commonSuffix <- optionMaybe (try citationListPart) char ']' return (BerkeleyCitationList parens <$> sequence commonPrefix @@ -338,8 +339,15 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. orgRefCiteKey :: PandocMonad m => OrgParser m String -orgRefCiteKey = try . many1 . satisfy $ \c -> - isAlphaNum c || c `elem` ("-_:\\./"::String) +orgRefCiteKey = + let citeKeySpecialChars = "-_:\\./," :: String + isCiteKeySpecialChar c = c `elem` citeKeySpecialChars + isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c + endOfCitation = try $ do + many $ satisfy isCiteKeySpecialChar + satisfy $ not . isCiteKeyChar + in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + -- | Supported citation types. Only a small subset of org-ref types is -- supported for now. TODO: rewrite this, use LaTeX reader as template. @@ -365,15 +373,16 @@ citation = try $ do return $ do x <- pref y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } + return Citation + { citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } where prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) @@ -395,7 +404,7 @@ inlineNote = try $ do ref <- many alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - when (not $ null ref) $ + unless (null ref) $ addToNotesTable ("fn:" ++ ref, note) return $ B.note <$> note @@ -405,7 +414,7 @@ referencedNote = try $ do return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" ++ ref ++ "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } @@ -429,7 +438,7 @@ explicitOrImageLink = try $ do src <- srcF case cleanLinkString title of Just imgSrc | isImageFilename imgSrc -> - pure $ B.link src "" $ B.image imgSrc mempty mempty + pure . B.link src "" $ B.image imgSrc mempty mempty _ -> linkToInlinesF src =<< title' @@ -686,13 +695,13 @@ mathEnd c = try $ do return res -enclosedInlines :: PandocMonad m => OrgParser m a +enclosedInlines :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b -> OrgParser m (F Inlines) enclosedInlines start end = try $ trimInlinesF . mconcat <$> enclosed start end inline -enclosedRaw :: PandocMonad m => OrgParser m a +enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b -> OrgParser m String enclosedRaw start end = try $ @@ -771,7 +780,7 @@ notAfterForbiddenBorderChar = do -- | Read a sub- or superscript expression subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ - choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") + choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") , simpleSubOrSuperString ] >>= parseFromString (mconcat <$> many inline) @@ -809,7 +818,7 @@ inlineLaTeX = try $ do enableExtension Ext_raw_tex (readerExtensions def) } } texMathToPandoc :: String -> Maybe [Inline] - texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline + texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just @@ -839,26 +848,49 @@ exportSnippet = try $ do snippet <- manyTill anyChar (try $ string "@@") returnF $ B.rawInline format snippet +macro :: PandocMonad m => OrgParser m (F Inlines) +macro = try $ do + recursionDepth <- orgStateMacroDepth <$> getState + guard $ recursionDepth < 15 + string "{{{" + name <- many alphaNum + args <- ([] <$ string "}}}") + <|> char '(' *> argument `sepBy` char ',' <* eoa + expander <- lookupMacro name <$> getState + case expander of + Nothing -> mzero + Just fn -> do + updateState $ \s -> s { orgStateMacroDepth = recursionDepth + 1 } + res <- parseFromString (mconcat <$> many inline) $ fn args + updateState $ \s -> s { orgStateMacroDepth = recursionDepth } + return res + where + argument = many $ notFollowedBy eoa *> noneOf "," + eoa = string ")}}}" + smart :: PandocMonad m => OrgParser m (F Inlines) -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) +smart = choice [doubleQuoted, singleQuoted, orgApostrophe, orgDash, orgEllipses] where orgDash = do - guard =<< getExportSetting exportSpecialStrings - dash <* updatePositions '-' + guardOrSmartEnabled =<< getExportSetting exportSpecialStrings + pure <$> dash <* updatePositions '-' orgEllipses = do - guard =<< getExportSetting exportSpecialStrings - ellipses <* updatePositions '.' - orgApostrophe = - (char '\'' <|> char '\8217') <* updateLastPreCharPos - <* updateLastForbiddenCharPos - *> return (B.str "\x2019") + guardOrSmartEnabled =<< getExportSetting exportSpecialStrings + pure <$> ellipses <* updatePositions '.' + orgApostrophe = do + guardEnabled Ext_smart + (char '\'' <|> char '\8217') <* updateLastPreCharPos + <* updateLastForbiddenCharPos + returnF (B.str "\x2019") + +guardOrSmartEnabled :: PandocMonad m => Bool -> OrgParser m () +guardOrSmartEnabled b = do + smartExtension <- extensionEnabled Ext_smart <$> getOption readerExtensions + guard (b || smartExtension) singleQuoted :: PandocMonad m => OrgParser m (F Inlines) singleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes + guardOrSmartEnabled =<< getExportSetting exportSmartQuotes singleQuoteStart updatePositions '\'' withQuoteContext InSingleQuote $ @@ -870,10 +902,13 @@ singleQuoted = try $ do -- in the same paragraph. doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) doubleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes + guardOrSmartEnabled =<< getExportSetting exportSmartQuotes doubleQuoteStart updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) + let doubleQuotedContent = withQuoteContext InDoubleQuote $ do + doubleQuoteEnd + updateLastForbiddenCharPos + return . fmap B.doubleQuoted . trimInlinesF $ contents + let leftQuoteAndContent = return $ pure (B.str "\8220") <> contents + doubleQuotedContent <|> leftQuoteAndContent diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 7938fc6c6..d22902eae 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -44,7 +44,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition -import Control.Monad (mzero, void) +import Control.Monad (mzero, void, when) import Data.Char (toLower) import Data.List (intersperse) import qualified Data.Map as M @@ -75,14 +75,16 @@ declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do key <- map toLower <$> metaKey (key', value) <- metaValue key - updateState $ \st -> st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } + let addMetaValue st = + st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } + when (key' /= "results") $ updateState addMetaValue metaKey :: Monad m => OrgParser m String metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces -metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue)) +metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue) metaValue key = let inclKey = "header-includes" in case key of @@ -109,7 +111,7 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do - itemStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',') + itemStrs <- many1 (noneOf ",\n") `sepBy1` char ',' newline items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs let toMetaInlines = MetaInlines . B.toList @@ -151,6 +153,7 @@ optionLine = try $ do "todo" -> todoSequence >>= updateState . registerTodoSequence "seq_todo" -> todoSequence >>= updateState . registerTodoSequence "typ_todo" -> todoSequence >>= updateState . registerTodoSequence + "macro" -> macroDefinition >>= updateState . registerMacro _ -> mzero addLinkFormat :: Monad m => String @@ -160,7 +163,7 @@ addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: Monad m => OrgParser m ((String, String -> String)) +parseLinkFormat :: Monad m => OrgParser m (String, String -> String) parseLinkFormat = try $ do linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat @@ -169,8 +172,7 @@ parseLinkFormat = try $ do -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. parseFormat :: Monad m => OrgParser m (String -> String) -parseFormat = try $ do - replacePlain <|> replaceUrl <|> justAppend +parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend where -- inefficient, but who cares replacePlain = try $ (\x -> concat . flip intersperse x) @@ -218,3 +220,27 @@ todoSequence = try $ do let todoMarkers = map (TodoMarker Todo) todo doneMarkers = map (TodoMarker Done) done in todoMarkers ++ doneMarkers + +macroDefinition :: Monad m => OrgParser m (String, [String] -> String) +macroDefinition = try $ do + macroName <- many1 nonspaceChar <* skipSpaces + firstPart <- expansionPart + (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart) + let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder + return (macroName, expander) + where + placeholder :: Monad m => OrgParser m Int + placeholder = try . fmap read $ char '$' *> many1 digit + + expansionPart :: Monad m => OrgParser m String + expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r") + + alternate :: [a] -> [a] -> [a] + alternate [] ys = ys + alternate xs [] = xs + alternate (x:xs) (y:ys) = x : y : alternate xs ys + + reorder :: [Int] -> [String] -> [String] + reorder perm xs = + let element n = take 1 $ drop (n - 1) xs + in concatMap element perm diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index bdd1dc951..92f868516 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -1,8 +1,7 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,8 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.ParserState + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -39,6 +38,9 @@ module Text.Pandoc.Readers.Org.ParserState , TodoState (..) , activeTodoMarkers , registerTodoSequence + , MacroExpander + , lookupMacro + , registerMacro , F , askF , asksF @@ -58,14 +60,14 @@ import qualified Data.Set as Set import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Definition (Meta (..), nullMeta) -import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Logging -import Text.Pandoc.Parsing (HasHeaderMap (..), HasIdentifierList (..), - HasLogMessages (..), - HasLastStrPosition (..), HasQuoteContext (..), +import Text.Pandoc.Options (ReaderOptions (..)) +import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..), + HasIncludeFiles (..), HasLastStrPosition (..), + HasLogMessages (..), HasQuoteContext (..), HasReaderOptions (..), ParserContext (..), - QuoteContext (..), SourcePos, Future, - askF, asksF, returnF, runF, trimInlinesF) + QuoteContext (..), SourcePos, askF, asksF, returnF, + runF, trimInlinesF) -- | This is used to delay evaluation until all relevant information has been -- parsed and made available in the parser state. @@ -78,6 +80,8 @@ type OrgNoteTable = [OrgNoteRecord] -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. type OrgLinkFormatters = M.Map String (String -> String) +-- | Macro expander function +type MacroExpander = [String] -> String -- | The states in which a todo item can be data TodoState = Todo | Done @@ -101,10 +105,13 @@ data OrgParserState = OrgParserState , orgStateExportSettings :: ExportSettings , orgStateHeaderMap :: M.Map Inlines String , orgStateIdentifiers :: Set.Set String + , orgStateIncludeFiles :: [String] , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters + , orgStateMacros :: M.Map String MacroExpander + , orgStateMacroDepth :: Int , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable , orgStateOptions :: ReaderOptions @@ -141,6 +148,12 @@ instance HasLogMessages OrgParserState where addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st } getLogMessages st = reverse $ orgLogMessages st +instance HasIncludeFiles OrgParserState where + getIncludeFiles = orgStateIncludeFiles + addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st } + dropLatestIncludeFile st = + st { orgStateIncludeFiles = drop 1 $ orgStateIncludeFiles st } + instance Default OrgParserState where def = defaultOrgParserState @@ -152,10 +165,13 @@ defaultOrgParserState = OrgParserState , orgStateExportSettings = def , orgStateHeaderMap = M.empty , orgStateIdentifiers = Set.empty + , orgStateIncludeFiles = [] , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty + , orgStateMacros = M.empty + , orgStateMacroDepth = 0 , orgStateMeta = return nullMeta , orgStateNotes' = [] , orgStateOptions = def @@ -185,6 +201,15 @@ activeTodoSequences st = activeTodoMarkers :: OrgParserState -> TodoSequence activeTodoMarkers = concat . activeTodoSequences +lookupMacro :: String -> OrgParserState -> Maybe MacroExpander +lookupMacro macroName = M.lookup macroName . orgStateMacros + +registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState +registerMacro (name, expander) st = + let curMacros = orgStateMacros st + in st{ orgStateMacros = M.insert name expander curMacros } + + -- -- Export Settings @@ -213,6 +238,7 @@ data ExportSettings = ExportSettings , exportWithAuthor :: Bool -- ^ Include author in final meta-data , exportWithCreator :: Bool -- ^ Include creator in final meta-data , exportWithEmail :: Bool -- ^ Include email in final meta-data + , exportWithTags :: Bool -- ^ Keep tags as part of headlines , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers } @@ -225,11 +251,12 @@ defaultExportSettings = ExportSettings , exportDrawers = Left ["LOGBOOK"] , exportEmphasizedText = True , exportHeadlineLevels = 3 - , exportSmartQuotes = True + , exportSmartQuotes = False , exportSpecialStrings = True , exportSubSuperscripts = True , exportWithAuthor = True , exportWithCreator = True , exportWithEmail = True + , exportWithTags = True , exportWithTodoKeywords = True } diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 464ef9ca6..3273c92e4 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Parsing + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -31,6 +31,8 @@ functions are adapted to Org-mode specific functionality. module Text.Pandoc.Readers.Org.Parsing ( OrgParser , anyLine + , anyLineNewline + , indentWith , blanklines , newline , parseFromString @@ -70,6 +72,8 @@ module Text.Pandoc.Readers.Org.Parsing , dash , ellipses , citeKey + , gridTableWith + , insertIncludedFileF -- * Re-exports from Text.Pandoc.Parsec , runParser , runParserT diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index f89ce6732..952082ec1 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Shared + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -56,7 +56,7 @@ cleanLinkString s = '.':'/':_ -> Just s -- relative path '.':'.':'/':_ -> Just s -- relative path -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' + 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s' _ | isUrl s -> Just s -- URL _ -> Nothing where diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7564998ff..fb5f6f2d4 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -32,10 +32,11 @@ Conversion from reStructuredText to 'Pandoc' document. -} module Text.Pandoc.Readers.RST ( readRST ) where import Control.Monad (guard, liftM, mzero, when) +import Control.Monad.Identity (Identity(..)) import Control.Monad.Except (throwError) import Data.Char (isHexDigit, isSpace, toLower, toUpper) -import Data.List (deleteFirstsBy, findIndex, intercalate, isInfixOf, isSuffixOf, - nub, sort, transpose, union) +import Data.List (deleteFirstsBy, findIndex, intercalate, isInfixOf, + isSuffixOf, nub, sort, transpose, union) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust) import Data.Monoid ((<>)) @@ -52,20 +53,22 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared import Text.Printf (printf) +import Data.Text (Text) +import qualified Data.Text as T -- TODO: -- [ ] .. parsed-literal -- [ ] :widths: attribute in .. table -- [ ] .. csv-table --- [ ] .. list-table -- | Parse reStructuredText string and return Pandoc document. readRST :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readRST opts s = do - parsed <- (readWithM parseRST) def{ stateOptions = opts } (s ++ "\n\n") + parsed <- (readWithM parseRST) def{ stateOptions = opts } + (T.unpack s ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -131,7 +134,10 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" - $ M.mapKeys (\k -> if k == "authors" then "author" else k) + $ M.mapKeys (\k -> + if k == "authors" + then "author" + else k) $ metamap toPlain (MetaBlocks [Para xs]) = MetaInlines xs toPlain x = x @@ -193,7 +199,7 @@ parseRST = do parseCitation :: PandocMonad m => (String, String) -> RSTParser m (Inlines, [Blocks]) parseCitation (ref, raw) = do - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw return $ (B.spanWith (ref, ["citation-label"], []) (B.str ref), [contents]) @@ -243,7 +249,7 @@ fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent term <- parseInlineFromString name - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw optional blanklines return (term, [contents]) @@ -442,7 +448,7 @@ blockQuote :: PandocMonad m => RSTParser m Blocks blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ raw ++ "\n\n" return $ B.blockQuote contents {- @@ -530,7 +536,7 @@ definitionListItem = try $ do term <- trimInlines . mconcat <$> many1Till inline endline raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n" + contents <- parseFromString' parseBlocks $ raw ++ "\n" return (term, [contents]) definitionList :: PandocMonad m => RSTParser m Blocks @@ -558,26 +564,16 @@ listLine :: Monad m => Int -> RSTParser m [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength - line <- anyLine - return $ line ++ "\n" - --- indent by specified number of spaces (or equiv. tabs) -indentWith :: Monad m => Int -> RSTParser m [Char] -indentWith num = do - tabStop <- getOption readerTabStop - if (num < tabStop) - then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] + anyLineNewline -- parse raw text for one list item, excluding start marker and continuations rawListItem :: Monad m => RSTParser m Int -> RSTParser m (Int, [Char]) rawListItem start = try $ do markerLength <- start - firstLine <- anyLine + firstLine <- anyLineNewline restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + return (markerLength, firstLine ++ concat restLines) -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. @@ -602,13 +598,17 @@ listItem start = try $ do let oldContext = stateParserContext state setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString parseBlocks $ concat (first:rest) ++ "\n" + parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n" updateState (\st -> st {stateParserContext = oldContext}) return $ case B.toList parsed of - [Para xs] -> B.singleton $ Plain xs - [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys] - [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys] - [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys] + [Para xs] -> + B.singleton $ Plain xs + [Para xs, BulletList ys] -> + B.fromList [Plain xs, BulletList ys] + [Para xs, OrderedList s ys] -> + B.fromList [Plain xs, OrderedList s ys] + [Para xs, DefinitionList ys] -> + B.fromList [Plain xs, DefinitionList ys] _ -> parsed orderedList :: PandocMonad m => RSTParser m Blocks @@ -685,22 +685,23 @@ directive' = do (lengthToDim . filter (not . isSpace)) case label of "table" -> tableDirective top fields body' + "list-table" -> listTableDirective top fields body' "line-block" -> lineBlockDirective body' "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields - "container" -> parseFromString parseBlocks body' + "container" -> parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey parseInlineFromString (trim top) "unicode" -> B.para <$> -- consumed by substKey parseInlineFromString (trim $ unicodeTransform top) - "compound" -> parseFromString parseBlocks body' - "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body' - "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body' - "highlights" -> B.blockQuote <$> parseFromString parseBlocks body' + "compound" -> parseFromString' parseBlocks body' + "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body' + "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body' + "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body' "rubric" -> B.para . B.strong <$> parseInlineFromString top _ | label `elem` ["attention","caution","danger","error","hint", "important","note","tip","warning","admonition"] -> - do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' + do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' let lab = case label of "admonition" -> mempty (l:ls) -> B.divWith ("",["admonition-title"],[]) @@ -713,11 +714,11 @@ directive' = do (trim top ++ if null subtit then "" else (": " ++ subtit)) - bod <- parseFromString parseBlocks body' + bod <- parseFromString' parseBlocks body' return $ B.divWith ("",["sidebar"],[]) $ tit <> bod "topic" -> do tit <- B.para . B.strong <$> parseInlineFromString top - bod <- parseFromString parseBlocks body' + bod <- parseFromString' parseBlocks body' return $ B.divWith ("",["topic"],[]) $ tit <> bod "default-role" -> mempty <$ updateState (\s -> s { stateRstDefaultRole = @@ -733,9 +734,10 @@ directive' = do "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body "figure" -> do - (caption, legend) <- parseFromString extractCaption body' + (caption, legend) <- parseFromString' extractCaption body' let src = escapeURI $ trim top - return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend + return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" + caption) <> legend "image" -> do let src = escapeURI $ trim top let alt = B.str $ maybe "image" trim $ lookup "alt" fields @@ -746,38 +748,74 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) + let attrs = ("", (splitBy isSpace $ trim top), + map (\(k,v) -> (k, trimr v)) fields) -- directive content or the first immediately following element children <- case body of "" -> block - _ -> parseFromString parseBlocks body' + _ -> parseFromString' parseBlocks body' return $ B.divWith attrs children other -> do pos <- getPosition logMessage $ SkippedContent (".. " ++ other) pos - bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' + bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' return $ B.divWith ("",[other],[]) bod tableDirective :: PandocMonad m => String -> [(String, String)] -> String -> RSTParser m Blocks tableDirective top _fields body = do - bs <- parseFromString parseBlocks body + bs <- parseFromString' parseBlocks body case B.toList bs of [Table _ aligns' widths' header' rows'] -> do - title <- parseFromString (trimInlines . mconcat <$> many inline) top + title <- parseFromString' (trimInlines . mconcat <$> many inline) top -- TODO widths -- align is not applicable since we can't represent whole table align return $ B.singleton $ Table (B.toList title) aligns' widths' header' rows' _ -> return mempty + +-- TODO: :stub-columns:. +-- Only the first row becomes the header even if header-rows: > 1, +-- since Pandoc doesn't support a table with multiple header rows. +-- We don't need to parse :align: as it represents the whole table align. +listTableDirective :: PandocMonad m + => String -> [(String, String)] -> String + -> RSTParser m Blocks +listTableDirective top fields body = do + bs <- parseFromString' parseBlocks body + title <- parseFromString' (trimInlines . mconcat <$> many inline) top + let rows = takeRows $ B.toList bs + headerRowsNum = fromMaybe (0 :: Int) $ + lookup "header-rows" fields >>= safeRead + (headerRow,bodyRows,numOfCols) = case rows of + x:xs -> if headerRowsNum > 0 + then (x, xs, length x) + else ([], rows, length x) + _ -> ([],[],0) + widths = case trim <$> lookup "widths" fields of + Just "auto" -> replicate numOfCols 0 + Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $ + splitBy (`elem` (" ," :: String)) specs + _ -> replicate numOfCols 0 + return $ B.table title + (zip (replicate numOfCols AlignDefault) widths) + headerRow + bodyRows + where takeRows [BulletList rows] = map takeCells rows + takeRows _ = [] + takeCells [BulletList cells] = map B.fromList cells + takeCells _ = [] + normWidths ws = map (/ max 1 (sum ws)) ws + -- TODO: -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix -addNewRole :: PandocMonad m => String -> [(String, String)] -> RSTParser m Blocks +addNewRole :: PandocMonad m + => String -> [(String, String)] -> RSTParser m Blocks addNewRole roleString fields = do pos <- getPosition - (role, parentRole) <- parseFromString inheritedRole roleString + (role, parentRole) <- parseFromString' inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState let getBaseRole (r, f, a) roles = case M.lookup r roles of @@ -804,7 +842,8 @@ addNewRole roleString fields = do SkippedContent ":format: [because parent of role is not :raw:]" pos _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos when (parentRole == "raw" && countKeys "format" > 1) $ - logMessage $ SkippedContent ":format: [after first in definition of role]" + logMessage $ SkippedContent + ":format: [after first in definition of role]" pos when (parentRole == "code" && countKeys "language" > 1) $ logMessage $ SkippedContent @@ -819,7 +858,8 @@ addNewRole roleString fields = do where countKeys k = length . filter (== k) . map fst $ fields inheritedRole = - (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span") + (,) <$> roleName <*> ((char '(' *> roleName <* char ')') + <|> pure "span") -- Can contain character codes as decimal numbers or @@ -996,7 +1036,8 @@ substKey = try $ do [Para ils] -> return $ B.fromList ils _ -> mzero let key = toKey $ stripFirstAndLast ref - updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s } + updateState $ \s -> s{ stateSubstitutions = + M.insert key il $ stateSubstitutions s } anonymousKey :: Monad m => RSTParser m () anonymousKey = try $ do @@ -1005,7 +1046,8 @@ anonymousKey = try $ do pos <- getPosition let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } stripTicks :: String -> String stripTicks = reverse . stripTick . reverse . stripTick @@ -1020,7 +1062,8 @@ regularKey = try $ do src <- targetURI let key = toKey $ stripTicks ref --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } headerBlock :: PandocMonad m => RSTParser m [Char] headerBlock = do @@ -1087,7 +1130,7 @@ simpleTableRow indices = do let cols = map unlines . transpose $ firstLine : conLines ++ [replicate (length indices) "" | not (null conLines)] - mapM (parseFromString parseBlocks) cols + mapM (parseFromString' parseBlocks) cols simpleTableSplitLine :: [Int] -> String -> [String] simpleTableSplitLine indices line = @@ -1110,7 +1153,7 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - heads <- mapM (parseFromString (mconcat <$> many plain)) $ + heads <- mapM (parseFromString' (mconcat <$> many plain)) $ map trim rawHeads return (heads, aligns, indices) @@ -1119,8 +1162,12 @@ simpleTable :: PandocMonad m => Bool -- ^ Headerless table -> RSTParser m Blocks simpleTable headless = do - tbl <- tableWith (simpleTableHeader headless) simpleTableRow - sep simpleTableFooter + let wrapIdFst (a, b, c) = (Identity a, b, c) + wrapId = fmap Identity + tbl <- runIdentity <$> tableWith + (wrapIdFst <$> simpleTableHeader headless) + (wrapId <$> simpleTableRow) + sep simpleTableFooter -- Simple tables get 0s for relative column widths (i.e., use default) case B.toList tbl of [Table c a _w h l] -> return $ B.singleton $ @@ -1134,7 +1181,8 @@ simpleTable headless = do gridTable :: PandocMonad m => Bool -- ^ Headerless table -> RSTParser m Blocks -gridTable headerless = gridTableWith parseBlocks headerless +gridTable headerless = runIdentity <$> + gridTableWith (Identity <$> parseBlocks) headerless table :: PandocMonad m => RSTParser m Blocks table = gridTable False <|> simpleTable False <|> @@ -1161,7 +1209,7 @@ inline = choice [ note -- can start with whitespace, so try before ws , symbol ] <?> "inline" parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines -parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline) +parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) hyphens :: Monad m => RSTParser m Inlines hyphens = do @@ -1220,7 +1268,8 @@ interpretedRole = try $ do (role, contents) <- roleBefore <|> roleAfter renderRole contents Nothing role nullAttr -renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines +renderRole :: PandocMonad m + => String -> Maybe String -> String -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of "sup" -> return $ B.superscript $ B.str contents "superscript" -> return $ B.superscript $ B.str contents @@ -1353,7 +1402,8 @@ referenceLink = try $ do (k:_) -> return k ((src,tit), attr) <- lookupKey [] key -- if anonymous link, remove key so it won't be used again - when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } + when (isAnonKey key) $ updateState $ \s -> + s{ stateKeys = M.delete key keyTable } return $ B.linkWith attr src tit label' -- We keep a list of oldkeys so we can detect lookup loops. @@ -1423,7 +1473,7 @@ note = try $ do -- Note references inside other notes are allowed in reST, but -- not yet in this implementation. updateState $ \st -> st{ stateNotes = [] } - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw let newnotes = if (ref == "*" || ref == "#") -- auto-numbered -- delete the note so the next auto-numbered note -- doesn't get the same contents: diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index ecb609ae9..9e544c4ac 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -49,14 +49,17 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, macro, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) import Text.Pandoc.XML (fromEntities) +import Data.Text (Text) +import qualified Data.Text as T -- | Read twiki from an input string and return a Pandoc document. readTWiki :: PandocMonad m => ReaderOptions - -> String + -> Text -> m Pandoc readTWiki opts s = do - res <- readWithM parseTWiki def{ stateOptions = opts } (s ++ "\n\n") + res <- readWithM parseTWiki def{ stateOptions = opts } + (T.unpack s ++ "\n\n") case res of Left e -> throwError e Right d -> return d @@ -106,7 +109,7 @@ parseHtmlContentWithAttrs tag parser = do parsedContent <- try $ parseContent content return (attr, parsedContent) where - parseContent = parseFromString $ nested $ manyTill parser endOfContent + parseContent = parseFromString' $ nested $ manyTill parser endOfContent endOfContent = try $ skipMany blankline >> skipSpaces >> eof parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] @@ -233,7 +236,7 @@ listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat filterSpaces = reverse . dropWhile (== ' ') . reverse listContinuation = notFollowedBy (string prefix >> marker) >> string " " >> lineContent - parseContent = parseFromString $ many1 $ nestedList <|> parseInline + parseContent = parseFromString' $ many1 $ nestedList <|> parseInline parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>= return . B.plain . mconcat nestedList = list prefix @@ -297,7 +300,7 @@ noautolink = do setState $ st{ stateAllowLinks = True } return $ mconcat blocks where - parseContent = parseFromString $ many $ block + parseContent = parseFromString' $ many $ block para :: PandocMonad m => TWParser m B.Blocks para = many1Till inline endOfParaElement >>= return . result . mconcat @@ -349,13 +352,13 @@ linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) where lastNewline = eof >> return mempty innerNewline = return B.space -between :: (Monoid c, PandocMonad m) +between :: (Monoid c, PandocMonad m, Show b) => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c) -> TWParser m c between start end p = mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) -enclosed :: (Monoid b, PandocMonad m) +enclosed :: (Monoid b, PandocMonad m, Show a) => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b enclosed sep p = between sep (try $ sep <* endMarker) p where @@ -525,4 +528,4 @@ linkText = do return (url, "", content) where linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent - parseLinkContent = parseFromString $ many1 inline + parseLinkContent = parseFromString' $ many1 inline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 047aa061c..1669e3e51 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,6 +1,6 @@ {- -Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' - and John MacFarlane +Copyright (C) 2010-2012 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' + 2010-2017 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Textile - Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane + Copyright : Copyright (C) 2010-2012 Paul Rivier + 2010-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Paul Rivier <paul*rivier#demotera*com> @@ -69,14 +70,17 @@ import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared (trim) +import Data.Text (Text) +import qualified Data.Text as T -- | Parse a Textile text and return a Pandoc document. readTextile :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readTextile opts s = do - parsed <- readWithM parseTextile def{ stateOptions = opts } (s ++ "\n\n") + parsed <- readWithM parseTextile def{ stateOptions = opts } + (T.unpack s ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -314,7 +318,7 @@ definitionListItem = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) -- this ++ "\n\n" does not look very good - ds <- parseFromString parseBlocks (s ++ "\n\n") + ds <- parseFromString' parseBlocks (s ++ "\n\n") return [ds] -- raw content @@ -366,7 +370,7 @@ tableCell = try $ do notFollowedBy blankline raw <- trim <$> many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) - content <- mconcat <$> parseFromString (many inline) raw + content <- mconcat <$> parseFromString' (many inline) raw return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells @@ -388,7 +392,7 @@ table = try $ do _ <- attributes char '.' rawcapt <- trim <$> anyLine - parseFromString (mconcat <$> many inline) rawcapt + parseFromString' (mconcat <$> many inline) rawcapt rawrows <- many1 $ (skipMany ignorableRow) >> tableRow skipMany ignorableRow blanklines @@ -506,7 +510,7 @@ note = try $ do notes <- stateNotes <$> getState case lookup ref notes of Nothing -> fail "note not found" - Just raw -> B.note <$> parseFromString parseBlocks raw + Just raw -> B.note <$> parseFromString' parseBlocks raw -- | Special chars markupChars :: [Char] @@ -585,8 +589,9 @@ link = try $ do char ':' let stop = if bracketed then char ']' - else lookAhead $ space <|> - try (oneOf "!.,;:" *> (space <|> newline)) + else lookAhead $ space <|> eof' <|> + try (oneOf "!.,;:" *> + (space <|> newline <|> eof')) url <- many1Till nonspaceChar stop let name' = if B.toList name == [Str "$"] then B.str url else name return $ if attr == nullAttr @@ -690,7 +695,7 @@ langAttr = do return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) -- | Parses material surrounded by a parser. -surrounded :: PandocMonad m +surrounded :: (PandocMonad m, Show t) => ParserT [Char] st m t -- ^ surrounding parser -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) -> ParserT [Char] st m [a] @@ -727,3 +732,5 @@ groupedInlineMarkup = try $ do singleton :: a -> [a] singleton x = [x] +eof' :: Monad m => ParserT [Char] s m Char +eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 33f785109..260bb7fff 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -42,11 +42,11 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (macro, space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) ---import Network.URI (isURI) -- Not sure whether to use this function import Control.Monad (guard, void, when) import Control.Monad.Reader (Reader, asks, runReader) import Data.Default - +import Data.Text (Text) +import qualified Data.Text as T import Control.Monad.Except (catchError, throwError) import Data.Time.Format (formatTime) import Text.Pandoc.Class (PandocMonad) @@ -91,11 +91,11 @@ getT2TMeta = do -- | Read Txt2Tags from an input string returning a Pandoc document readTxt2Tags :: PandocMonad m => ReaderOptions - -> String + -> Text -> m Pandoc readTxt2Tags opts s = do meta <- getT2TMeta - let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") + let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (T.unpack s ++ "\n\n") case parsed of Right result -> return $ result Left e -> throwError e @@ -213,7 +213,7 @@ quote :: T2T Blocks quote = try $ do lookAhead tab rawQuote <- many1 (tab *> optional spaces *> anyLine) - contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n") + contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n") return $ B.blockQuote contents commentLine :: T2T Inlines @@ -265,7 +265,7 @@ listItem start end = try $ do firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) - parseFromString end $ firstLine ++ blank ++ rest + parseFromString' end $ firstLine ++ blank ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. @@ -277,12 +277,6 @@ listContinuation markerLength = try $ <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline -anyLineNewline :: T2T String -anyLineNewline = (++ "\n") <$> anyLine - -indentWith :: Int -> T2T String -indentWith n = count n space - -- Table table :: T2T Blocks @@ -446,7 +440,7 @@ inlineMarkup p f c special = try $ do Just middle -> do lastChar <- anyChar end <- many1 (char c) - let parser inp = parseFromString (mconcat <$> many p) inp + let parser inp = parseFromString' (mconcat <$> many p) inp let start' = case drop 2 start of "" -> mempty xs -> special xs diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 53cb4a4b5..55df147b6 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2011-2016 John MacFarlane + Copyright : Copyright (C) 2011-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,13 +35,14 @@ import Codec.Compression.GZip as Gzip import Control.Applicative ((<|>)) import Control.Monad.Except (throwError) import Control.Monad.Trans (lift) +import Data.Monoid ((<>)) import Data.ByteString (ByteString) import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as L import Data.Char (isAlphaNum, isAscii, toLower) import Data.List (isPrefixOf) -import Network.URI (URI (..), escapeURIString, isURI, parseURI) +import Network.URI (URI (..), escapeURIString, parseURI) import System.FilePath (takeDirectory, takeExtension, (</>)) import Text.HTML.TagSoup import Text.Pandoc.Class (PandocMonad (..), fetchItem, report) @@ -49,7 +50,7 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (WriterOptions (..)) -import Text.Pandoc.Shared (renderTags', trim) +import Text.Pandoc.Shared (isURI, renderTags', trim) import Text.Pandoc.UTF8 (toString) import Text.Parsec (ParsecT, runParserT) import qualified Text.Parsec as P @@ -95,9 +96,9 @@ convertTags sourceURL (t@(TagOpen "script" as):TagClose "script":ts) = (("src",dataUri) : [(x,y) | (x,y) <- as, x /= "src"]) : TagClose "script" : rest Right (mime, bs) - | (mime == "text/javascript" || - mime == "application/javascript" || - mime == "application/x-javascript") && + | ("text/javascript" `isPrefixOf` mime || + "application/javascript" `isPrefixOf` mime || + "application/x-javascript" `isPrefixOf` mime) && not ("</script" `B.isInfixOf` bs) -> return $ TagOpen "script" [("type", typeAttr)|not (null typeAttr)] @@ -121,11 +122,12 @@ convertTags sourceURL (t@(TagOpen "link" as):ts) = (("href",dataUri) : [(x,y) | (x,y) <- as, x /= "href"]) : rest Right (mime, bs) - | mime == "text/css" && not ("</" `B.isInfixOf` bs) -> do + | "text/css" `isPrefixOf` mime + && not ("</" `B.isInfixOf` bs) -> do rest <- convertTags sourceURL $ dropWhile (==TagClose "link") ts return $ - TagOpen "style" [("type", "text/css")] + TagOpen "style" [("type", mime)] : TagText (toString bs) : TagClose "style" : rest @@ -149,7 +151,21 @@ cssURLs sourceURL d orig = do parseCSSUrls :: PandocMonad m => Maybe String -> FilePath -> ParsecT ByteString () m ByteString parseCSSUrls sourceURL d = B.concat <$> P.many - (pCSSWhite <|> pCSSComment <|> pCSSUrl sourceURL d <|> pCSSOther) + (pCSSWhite <|> pCSSComment <|> pCSSImport sourceURL d <|> + pCSSUrl sourceURL d <|> pCSSOther) + +pCSSImport :: PandocMonad m => Maybe String -> FilePath + -> ParsecT ByteString () m ByteString +pCSSImport sourceURL d = P.try $ do + P.string "@import" + P.spaces + res <- (pQuoted <|> pUrl) >>= handleCSSUrl sourceURL d + P.spaces + P.char ';' + P.spaces + case res of + Left b -> return $ B.pack "@import " <> b + Right (_, b) -> return b -- Note: some whitespace in CSS is significant, so we can't collapse it! pCSSWhite :: PandocMonad m => ParsecT ByteString () m ByteString @@ -170,6 +186,24 @@ pCSSOther = do pCSSUrl :: PandocMonad m => Maybe String -> FilePath -> ParsecT ByteString () m ByteString pCSSUrl sourceURL d = P.try $ do + res <- pUrl >>= handleCSSUrl sourceURL d + case res of + Left b -> return b + Right (mt,b) -> do + let enc = makeDataURI (mt, b) + return (B.pack $ "url(" ++ enc ++ ")") + +pQuoted :: PandocMonad m + => ParsecT ByteString () m (String, ByteString) +pQuoted = P.try $ do + quote <- P.oneOf "\"'" + url <- P.manyTill P.anyChar (P.char quote) + let fallback = B.pack ([quote] ++ trim url ++ [quote]) + return (url, fallback) + +pUrl :: PandocMonad m + => ParsecT ByteString () m (String, ByteString) +pUrl = P.try $ do P.string "url(" P.spaces quote <- P.option Nothing (Just <$> P.oneOf "\"'") @@ -178,12 +212,29 @@ pCSSUrl sourceURL d = P.try $ do P.char ')' let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++ maybe "" (:[]) quote ++ ")") - case trim url of - '#':_ -> return fallback - 'd':'a':'t':'a':':':_ -> return fallback + return (url, fallback) + +handleCSSUrl :: PandocMonad m + => Maybe String -> FilePath -> (String, ByteString) + -> ParsecT ByteString () m + (Either ByteString (MimeType, ByteString)) +handleCSSUrl sourceURL d (url, fallback) = do + -- pipes are used in URLs provided by Google Code fonts + -- but parseURI doesn't like them, so we escape them: + case escapeURIString (/='|') (trim url) of + '#':_ -> return $ Left fallback + 'd':'a':'t':'a':':':_ -> return $ Left fallback u -> do let url' = if isURI u then u else d </> u - enc <- lift $ getDataURI sourceURL "" url' - return (B.pack $ "url(" ++ enc ++ ")") + res <- lift $ getData sourceURL "" url' + case res of + Left uri -> return $ Left (B.pack $ "url(" ++ uri ++ ")") + Right (mt, raw) -> do + -- note that the downloaded CSS may + -- itself contain url(...). + b <- if "text/css" `isPrefixOf` mt + then cssURLs sourceURL d raw + else return raw + return $ Right (mt, b) getDataURI :: PandocMonad m => Maybe String -> MimeType -> String -> m String getDataURI sourceURL mimetype src = do @@ -215,7 +266,7 @@ getData sourceURL mimetype src = do uriQuery = "", uriFragment = "" } _ -> Nothing - result <- if mime == "text/css" + result <- if "text/css" `isPrefixOf` mime then cssURLs cssSourceURL (takeDirectory src) raw' else return raw' return $ Right (mime, result) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 8256d14c0..745e809d0 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -2,7 +2,7 @@ FlexibleContexts, ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -70,6 +70,7 @@ module Text.Pandoc.Shared ( isTightList, addMetaField, makeMeta, + eastAsianLineBreakFilter, -- * TagSoup HTML handling renderTags', -- * File handling @@ -81,6 +82,9 @@ module Text.Pandoc.Shared ( openURL, collapseFilePath, filteredFilesFromArchive, + -- * URI handling + schemes, + isURI, -- * Error handling mapLeft, -- * for squashing blocks @@ -104,7 +108,7 @@ import Data.List ( find, stripPrefix, intercalate ) import Data.Maybe (mapMaybe) import Data.Version ( showVersion ) import qualified Data.Map as M -import Network.URI ( escapeURIString, unEscapeString ) +import Network.URI ( URI(uriScheme), escapeURIString, unEscapeString, parseURI ) import qualified Data.Set as Set import System.Directory import System.FilePath (splitDirectories, isPathSeparator) @@ -117,6 +121,7 @@ import qualified Control.Monad.State as S import qualified Control.Exception as E import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) +import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Compat.Time import Data.Time.Clock.POSIX import System.IO.Error @@ -128,7 +133,7 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) -import qualified Data.Text as T (toUpper, pack, unpack) +import qualified Data.Text as T import Data.ByteString.Lazy (toChunks, fromChunks) import qualified Data.ByteString.Lazy as BL import Paths_pandoc (version) @@ -140,9 +145,9 @@ import Text.Pandoc.Data (dataFiles) #else import Paths_pandoc (getDataFileName) #endif -#ifdef HTTP_CLIENT import Network.HTTP.Client (httpLbs, responseBody, responseHeaders, - Request(port,host,requestHeaders)) + Request(port,host,requestHeaders), + HttpException) import Network.HTTP.Client (parseRequest) import Network.HTTP.Client (newManager) import Network.HTTP.Client.Internal (addProxy) @@ -150,12 +155,6 @@ import Network.HTTP.Client.TLS (tlsManagerSettings) import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType, hUserAgent) import Network (withSocketsDo) -#else -import Network.URI (parseURI) -import Network.HTTP (findHeader, rspBody, - RequestMethod(..), HeaderName(..), mkRequest) -import Network.Browser (browse, setAllowRedirects, setOutHandler, request) -#endif -- | Version number of pandoc library. pandocVersion :: String @@ -280,26 +279,20 @@ escapeURI = escapeURIString (not . needsEscaping) where needsEscaping c = isSpace c || c `elem` ['<','>','|','"','{','}','[',']','^', '`'] - -- | Convert tabs to spaces and filter out DOS line endings. -- Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop - -> String -- ^ Input - -> String -tabFilter tabStop = - let go _ [] = "" - go _ ('\n':xs) = '\n' : go tabStop xs - go _ ('\r':'\n':xs) = '\n' : go tabStop xs - go _ ('\r':xs) = '\n' : go tabStop xs - go spsToNextStop ('\t':xs) = - if tabStop == 0 - then '\t' : go tabStop xs - else replicate spsToNextStop ' ' ++ go tabStop xs - go 1 (x:xs) = - x : go tabStop xs - go spsToNextStop (x:xs) = - x : go (spsToNextStop - 1) xs - in go tabStop + -> T.Text -- ^ Input + -> T.Text +tabFilter tabStop = T.filter (/= '\r') . T.unlines . + (if tabStop == 0 then id else map go) . T.lines + where go s = + let (s1, s2) = T.break (== '\t') s + in if T.null s2 + then s1 + else s1 <> T.replicate + (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ") + <> go (T.drop 1 s2) -- -- Date/time @@ -581,6 +574,16 @@ makeMeta title authors date = $ addMetaField "date" (B.fromList date) $ nullMeta +-- | Remove soft breaks between East Asian characters. +eastAsianLineBreakFilter :: Pandoc -> Pandoc +eastAsianLineBreakFilter = bottomUp go + where go (x:SoftBreak:y:zs) = + case (stringify x, stringify y) of + (xs@(_:_), (c:_)) + | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs + _ -> x:SoftBreak:y:zs + go xs = xs + -- -- TagSoup HTML handling -- @@ -709,14 +712,13 @@ readDataFileUTF8 userDir fname = UTF8.toString `fmap` readDataFile userDir fname -- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (BS.ByteString, Maybe MimeType) +openURL :: String -> IO (Either HttpException (BS.ByteString, Maybe MimeType)) openURL u | Just u'' <- stripPrefix "data:" u = let mime = takeWhile (/=',') u'' contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'' - in return (decodeLenient contents, Just mime) -#ifdef HTTP_CLIENT - | otherwise = withSocketsDo $ do + in return $ Right (decodeLenient contents, Just mime) + | otherwise = E.try $ withSocketsDo $ do let parseReq = parseRequest (proxy :: Either IOError String) <- tryIOError $ getEnv "http_proxy" @@ -738,19 +740,6 @@ openURL u resp <- newManager tlsManagerSettings >>= httpLbs req'' return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) -#else - | otherwise = getBodyAndMimeType `fmap` browse - (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." - setOutHandler $ const (return ()) - setAllowRedirects True - request (getRequest' u')) - where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r) - getRequest' uriString = case parseURI uriString of - Nothing -> error ("Not a valid URL: " ++ - uriString) - Just v -> mkRequest GET v - u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI -#endif -- -- Error reporting @@ -794,6 +783,71 @@ filteredFilesFromArchive zf f = fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) + +-- +-- IANA URIs +-- + +-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus +-- the unofficial schemes doi, javascript, isbn, pmid. +schemes :: Set.Set String +schemes = Set.fromList + -- Official IANA schemes + [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs" + , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin" + , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension" + , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs" + , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle" + , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed" + , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg" + , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham" + , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon" + , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6" + , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs" + , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap" + , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market" + , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access" + , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel" + , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath" + , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint" + , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller" + , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode" + , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular" + , "ms-settings-cloudstorage", "ms-settings-connectabledevices" + , "ms-settings-displays-topology", "ms-settings-emailandaccounts" + , "ms-settings-language", "ms-settings-location", "ms-settings-lock" + , "ms-settings-nfctransactions", "ms-settings-notifications" + , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity" + , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace" + , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad" + , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word" + , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs" + , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd" + , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop" + , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis" + , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp" + , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn" + , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews" + , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam" + , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid" + , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn" + , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi" + , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid" + , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire" + , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r" + , "z39.50s" + -- Inofficial schemes + , "doi", "isbn", "javascript", "pmid" + ] + +-- | Check if the string is a valid URL with a IANA or frequently used but +-- unofficial scheme (see @schemes@). +isURI :: String -> Bool +isURI = maybe False hasKnownScheme . parseURI + where + hasKnownScheme = (`Set.member` schemes) . map toLower . + filter (/= ':') . uriScheme + --- --- Squash blocks into inlines --- diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index b53e0eb6d..cd7695dbe 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Slides - Copyright : Copyright (C) 2012-2016 John MacFarlane + Copyright : Copyright (C) 2012-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 26aeb9a73..9b635a97b 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -3,7 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeSynonymInstances #-} {- -Copyright (C) 2009-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2009-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Templates - Copyright : Copyright (C) 2009-2016 John MacFarlane + Copyright : Copyright (C) 2009-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index d88a44948..663f30d92 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010-2016 John MacFarlane + Copyright : Copyright (C) 2010-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,16 +29,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7. -} module Text.Pandoc.UTF8 ( readFile - , writeFile , getContents + , writeFileWith + , writeFile + , putStrWith , putStr + , putStrLnWith , putStrLn + , hPutStrWith , hPutStr + , hPutStrLnWith , hPutStrLn , hGetContents , toString + , toText , fromString + , fromText , toStringLazy + , fromTextLazy + , toTextLazy , fromStringLazy , encodePath , decodeArg @@ -46,7 +56,7 @@ module Text.Pandoc.UTF8 ( readFile where import qualified Data.ByteString.Char8 as B -import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as TL @@ -61,23 +71,43 @@ readFile f = do h <- openFile (encodePath f) ReadMode hGetContents h -writeFile :: FilePath -> String -> IO () -writeFile f s = withFile (encodePath f) WriteMode $ \h -> hPutStr h s - getContents :: IO String getContents = hGetContents stdin +writeFileWith :: Newline -> FilePath -> String -> IO () +writeFileWith eol f s = + withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s + +writeFile :: FilePath -> String -> IO () +writeFile = writeFileWith nativeNewline + +putStrWith :: Newline -> String -> IO () +putStrWith eol s = hPutStrWith eol stdout s + putStr :: String -> IO () -putStr s = hPutStr stdout s +putStr = putStrWith nativeNewline + +putStrLnWith :: Newline -> String -> IO () +putStrLnWith eol s = hPutStrLnWith eol stdout s putStrLn :: String -> IO () -putStrLn s = hPutStrLn stdout s +putStrLn = putStrLnWith nativeNewline + +hPutStrWith :: Newline -> Handle -> String -> IO () +hPutStrWith eol h s = + hSetNewlineMode h (NewlineMode eol eol) >> + hSetEncoding h utf8 >> IO.hPutStr h s hPutStr :: Handle -> String -> IO () -hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s +hPutStr = hPutStrWith nativeNewline + +hPutStrLnWith :: Newline -> Handle -> String -> IO () +hPutStrLnWith eol h s = + hSetNewlineMode h (NewlineMode eol eol) >> + hSetEncoding h utf8 >> IO.hPutStrLn h s hPutStrLn :: Handle -> String -> IO () -hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s +hPutStrLn = hPutStrLnWith nativeNewline hGetContents :: Handle -> IO String hGetContents = fmap toString . B.hGetContents @@ -85,34 +115,47 @@ hGetContents = fmap toString . B.hGetContents -- >> hSetNewlineMode h universalNewlineMode -- >> IO.hGetContents h --- | Drop BOM (byte order marker) if present at beginning of string. --- Note that Data.Text converts the BOM to code point FEFF, zero-width --- no-break space, so if the string begins with this we strip it off. -dropBOM :: String -> String -dropBOM ('\xFEFF':xs) = xs -dropBOM xs = xs - -filterCRs :: String -> String -filterCRs ('\r':'\n':xs) = '\n': filterCRs xs -filterCRs ('\r':xs) = '\n' : filterCRs xs -filterCRs (x:xs) = x : filterCRs xs -filterCRs [] = [] +-- | Convert UTF8-encoded ByteString to Text, also +-- removing '\r' characters. +toText :: B.ByteString -> T.Text +toText = T.decodeUtf8 . filterCRs . dropBOM + where dropBOM bs = + if "\xEF\xBB\xBF" `B.isPrefixOf` bs + then B.drop 3 bs + else bs + filterCRs = B.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toString :: B.ByteString -> String -toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8 +toString = T.unpack . toText -fromString :: String -> B.ByteString -fromString = T.encodeUtf8 . T.pack +-- | Convert UTF8-encoded ByteString to Text, also +-- removing '\r' characters. +toTextLazy :: BL.ByteString -> TL.Text +toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM + where dropBOM bs = + if "\xEF\xBB\xBF" `BL.isPrefixOf` bs + then BL.drop 3 bs + else bs + filterCRs = BL.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toStringLazy :: BL.ByteString -> String -toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8 +toStringLazy = TL.unpack . toTextLazy + +fromText :: T.Text -> B.ByteString +fromText = T.encodeUtf8 + +fromTextLazy :: TL.Text -> BL.ByteString +fromTextLazy = TL.encodeUtf8 + +fromString :: String -> B.ByteString +fromString = fromText . T.pack fromStringLazy :: String -> BL.ByteString -fromStringLazy = TL.encodeUtf8 . TL.pack +fromStringLazy = fromTextLazy . TL.pack encodePath :: FilePath -> FilePath encodePath = id diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs index 9446c4692..989dd20c6 100644 --- a/src/Text/Pandoc/UUID.hs +++ b/src/Text/Pandoc/UUID.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UUID - Copyright : Copyright (C) 2010-2016 John MacFarlane + Copyright : Copyright (C) 2010-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index 0181f41c9..dbe55449f 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -83,6 +83,7 @@ module Text.Pandoc.Writers import Data.Aeson import Data.List (intercalate) +import Data.Text (Text) import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Options @@ -120,59 +121,59 @@ import Text.Parsec.Error import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.ByteString.Lazy as BL -data Writer m = StringWriter (WriterOptions -> Pandoc -> m String) +data Writer m = TextWriter (WriterOptions -> Pandoc -> m Text) | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString) -- | Association list of formats and writers. writers :: PandocMonad m => [ ( String, Writer m) ] writers = [ - ("native" , StringWriter writeNative) - ,("json" , StringWriter $ \o d -> return $ writeJSON o d) + ("native" , TextWriter writeNative) + ,("json" , TextWriter $ \o d -> return $ writeJSON o d) ,("docx" , ByteStringWriter writeDocx) ,("odt" , ByteStringWriter writeODT) ,("epub" , ByteStringWriter writeEPUB3) ,("epub2" , ByteStringWriter writeEPUB2) ,("epub3" , ByteStringWriter writeEPUB3) - ,("fb2" , StringWriter writeFB2) - ,("html" , StringWriter writeHtml5String) - ,("html4" , StringWriter writeHtml4String) - ,("html5" , StringWriter writeHtml5String) - ,("icml" , StringWriter writeICML) - ,("s5" , StringWriter writeS5) - ,("slidy" , StringWriter writeSlidy) - ,("slideous" , StringWriter writeSlideous) - ,("dzslides" , StringWriter writeDZSlides) - ,("revealjs" , StringWriter writeRevealJs) - ,("docbook" , StringWriter writeDocbook5) - ,("docbook4" , StringWriter writeDocbook4) - ,("docbook5" , StringWriter writeDocbook5) - ,("jats" , StringWriter writeJATS) - ,("opml" , StringWriter writeOPML) - ,("opendocument" , StringWriter writeOpenDocument) - ,("latex" , StringWriter writeLaTeX) - ,("beamer" , StringWriter writeBeamer) - ,("context" , StringWriter writeConTeXt) - ,("texinfo" , StringWriter writeTexinfo) - ,("man" , StringWriter writeMan) - ,("ms" , StringWriter writeMs) - ,("markdown" , StringWriter writeMarkdown) - ,("markdown_strict" , StringWriter writeMarkdown) - ,("markdown_phpextra" , StringWriter writeMarkdown) - ,("markdown_github" , StringWriter writeMarkdown) - ,("markdown_mmd" , StringWriter writeMarkdown) - ,("plain" , StringWriter writePlain) - ,("rst" , StringWriter writeRST) - ,("mediawiki" , StringWriter writeMediaWiki) - ,("dokuwiki" , StringWriter writeDokuWiki) - ,("zimwiki" , StringWriter writeZimWiki) - ,("textile" , StringWriter writeTextile) - ,("rtf" , StringWriter writeRTF) - ,("org" , StringWriter writeOrg) - ,("asciidoc" , StringWriter writeAsciiDoc) - ,("haddock" , StringWriter writeHaddock) - ,("commonmark" , StringWriter writeCommonMark) - ,("tei" , StringWriter writeTEI) - ,("muse" , StringWriter writeMuse) + ,("fb2" , TextWriter writeFB2) + ,("html" , TextWriter writeHtml5String) + ,("html4" , TextWriter writeHtml4String) + ,("html5" , TextWriter writeHtml5String) + ,("icml" , TextWriter writeICML) + ,("s5" , TextWriter writeS5) + ,("slidy" , TextWriter writeSlidy) + ,("slideous" , TextWriter writeSlideous) + ,("dzslides" , TextWriter writeDZSlides) + ,("revealjs" , TextWriter writeRevealJs) + ,("docbook" , TextWriter writeDocbook5) + ,("docbook4" , TextWriter writeDocbook4) + ,("docbook5" , TextWriter writeDocbook5) + ,("jats" , TextWriter writeJATS) + ,("opml" , TextWriter writeOPML) + ,("opendocument" , TextWriter writeOpenDocument) + ,("latex" , TextWriter writeLaTeX) + ,("beamer" , TextWriter writeBeamer) + ,("context" , TextWriter writeConTeXt) + ,("texinfo" , TextWriter writeTexinfo) + ,("man" , TextWriter writeMan) + ,("ms" , TextWriter writeMs) + ,("markdown" , TextWriter writeMarkdown) + ,("markdown_strict" , TextWriter writeMarkdown) + ,("markdown_phpextra" , TextWriter writeMarkdown) + ,("markdown_github" , TextWriter writeMarkdown) + ,("markdown_mmd" , TextWriter writeMarkdown) + ,("plain" , TextWriter writePlain) + ,("rst" , TextWriter writeRST) + ,("mediawiki" , TextWriter writeMediaWiki) + ,("dokuwiki" , TextWriter writeDokuWiki) + ,("zimwiki" , TextWriter writeZimWiki) + ,("textile" , TextWriter writeTextile) + ,("rtf" , TextWriter writeRTF) + ,("org" , TextWriter writeOrg) + ,("asciidoc" , TextWriter writeAsciiDoc) + ,("haddock" , TextWriter writeHaddock) + ,("commonmark" , TextWriter writeCommonMark) + ,("tei" , TextWriter writeTEI) + ,("muse" , TextWriter writeMuse) ] getWriter :: PandocMonad m => String -> Either String (Writer m) @@ -182,12 +183,12 @@ getWriter s Right (writerName, setExts) -> case lookup writerName writers of Nothing -> Left $ "Unknown writer: " ++ writerName - Just (StringWriter r) -> Right $ StringWriter $ + Just (TextWriter r) -> Right $ TextWriter $ \o -> r o{ writerExtensions = setExts $ getDefaultExtensions writerName } Just (ByteStringWriter r) -> Right $ ByteStringWriter $ \o -> r o{ writerExtensions = setExts $ getDefaultExtensions writerName } -writeJSON :: WriterOptions -> Pandoc -> String -writeJSON _ = UTF8.toStringLazy . encode +writeJSON :: WriterOptions -> Pandoc -> Text +writeJSON _ = UTF8.toText . BL.toStrict . encode diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 20fa7c209..46dbe6eaf 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -43,6 +43,7 @@ import Data.Char (isPunctuation, isSpace) import Data.List (intercalate, intersperse, stripPrefix) import qualified Data.Map as M import Data.Maybe (fromMaybe) +import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition @@ -62,7 +63,7 @@ data WriterState = WriterState { defListMarker :: String } -- | Convert Pandoc to AsciiDoc. -writeAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeAsciiDoc opts document = evalStateT (pandocToAsciiDoc opts document) WriterState{ defListMarker = "::" @@ -74,16 +75,18 @@ writeAsciiDoc opts document = type ADW = StateT WriterState -- | Return asciidoc representation of document. -pandocToAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> ADW m String +pandocToAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> ADW m Text pandocToAsciiDoc opts (Pandoc meta blocks) = do let titleblock = not $ null (docTitle meta) && null (docAuthors meta) && null (docDate meta) let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToAsciiDoc opts) - (fmap (render colwidth) . inlineListToAsciiDoc opts) + (fmap render' . blockListToAsciiDoc opts) + (fmap render' . inlineListToAsciiDoc opts) meta let addTitleLine (String t) = String $ t <> "\n" <> T.replicate (T.length t) "=" diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 5e0a06bf0..ed316ced9 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -34,6 +34,7 @@ module Text.Pandoc.Writers.CommonMark (writeCommonMark) where import CMark import Control.Monad.State (State, get, modify, runState) import Data.Foldable (foldrM) +import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition @@ -45,7 +46,7 @@ import Text.Pandoc.Writers.HTML (writeHtml5String) import Text.Pandoc.Writers.Shared -- | Convert Pandoc to CommonMark. -writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeCommonMark opts (Pandoc meta blocks) = do let (blocks', notes) = runState (walkM processNotes blocks) [] notes' = if null notes @@ -71,7 +72,7 @@ processNotes x = return x node :: NodeType -> [Node] -> Node node = Node Nothing -blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m String +blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m Text blocksToCommonMark opts bs = do let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] colwidth = if writerWrapText opts == WrapAuto @@ -79,14 +80,12 @@ blocksToCommonMark opts bs = do else Nothing nodes <- blocksToNodes bs return $ - T.unpack $ nodeToCommonmark cmarkOpts colwidth $ node DOCUMENT nodes -inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m String +inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m Text inlinesToCommonMark opts ils = return $ - T.unpack $ nodeToCommonmark cmarkOpts colwidth - $ node PARAGRAPH (inlinesToNodes ils) + nodeToCommonmark cmarkOpts colwidth $ node PARAGRAPH (inlinesToNodes ils) where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts @@ -139,7 +138,7 @@ blockToNodes (DefinitionList items) ns = blockToNodes (BulletList items') ns Para term : concat xs blockToNodes t@(Table _ _ _ _ _) ns = do s <- writeHtml5String def $! Pandoc nullMeta [t] - return (node (HTML_BLOCK (T.pack $! s)) [] : ns) + return (node (HTML_BLOCK s) [] : ns) blockToNodes Null ns = return ns inlinesToNodes :: [Inline] -> [Node] diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 57f920259..2da6a7f9a 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2015 John MacFarlane + Copyright : Copyright (C) 2007-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -33,7 +33,8 @@ import Control.Monad.State import Data.Char (ord) import Data.List (intercalate, intersperse) import Data.Maybe (catMaybes) -import Network.URI (isURI, unEscapeString) +import Data.Text (Text) +import Network.URI (unEscapeString) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition @@ -56,7 +57,7 @@ orderedListStyles :: [Char] orderedListStyles = cycle "narg" -- | Convert Pandoc to ConTeXt. -writeConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeConTeXt options document = let defaultWriterState = WriterState { stNextRef = 1 , stOrderedListLevel = 0 @@ -66,17 +67,19 @@ writeConTeXt options document = type WM = StateT WriterState -pandocToConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> WM m String +pandocToConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> WM m Text pandocToConTeXt options (Pandoc meta blocks) = do let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON options - (fmap (render colwidth) . blockListToConTeXt) - (fmap (render colwidth) . inlineListToConTeXt) + (fmap render' . blockListToConTeXt) + (fmap render' . inlineListToConTeXt) meta body <- mapM (elementToConTeXt options) $ hierarchicalize blocks - let main = (render colwidth . vcat) body + let main = (render' . vcat) body let layoutFromMargins = intercalate [','] $ catMaybes $ map (\(x,y) -> ((x ++ "=") ++) <$> getField y metadata) diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index ce90e4834..1314ef844 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -6,7 +6,7 @@ #else {-# LANGUAGE OverlappingInstances #-} #endif -{- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> +{- Copyright (C) 2012-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -41,6 +41,7 @@ import Control.Monad (when) import Data.Char (toLower) import Data.List (intersperse) import qualified Data.Map as M +import Data.Text (Text, pack) import Data.Typeable import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8) import Scripting.Lua (LuaState, StackValue, callfunc) @@ -116,7 +117,7 @@ data PandocLuaException = PandocLuaException String instance Exception PandocLuaException -- | Convert Pandoc to custom markup. -writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String +writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO Text writeCustom luaFile opts doc@(Pandoc meta _) = do luaScript <- UTF8.readFile luaFile enc <- getForeignEncoding @@ -139,8 +140,9 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do setForeignEncoding enc let body = rendered case writerTemplate opts of - Nothing -> return body - Just tpl -> return $ renderTemplate' tpl $ setField "body" body context + Nothing -> return $ pack body + Just tpl -> return $ pack $ + renderTemplate' tpl $ setField "body" body context docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String docToCustom lua opts (Pandoc (Meta metamap) blocks) = do diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index dce2cbd3e..02ffbf831 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -32,6 +32,7 @@ Conversion of 'Pandoc' documents to Docbook XML. module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where import Control.Monad.Reader import Data.Char (toLower) +import Data.Text (Text) import Data.Generics (everywhere, mkT) import Data.List (intercalate, isPrefixOf, isSuffixOf, stripPrefix) import Data.Monoid (Any (..)) @@ -81,22 +82,23 @@ authorToDocbook opts name' = do in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ inTagsSimple "surname" (text $ escapeStringForXML lastname) -writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeDocbook4 opts d = runReaderT (writeDocbook opts d) DocBook4 -writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeDocbook5 opts d = runReaderT (writeDocbook opts d) DocBook5 -- | Convert Pandoc document to string in Docbook format. -writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m String +writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m Text writeDocbook opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - let render' = render colwidth + let render' :: Doc -> Text + render' = render colwidth let opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr) (writerTemplate opts) && TopLevelDefault == writerTopLevelDivision opts) @@ -111,10 +113,10 @@ writeDocbook opts (Pandoc meta blocks) = do auths' <- mapM (authorToDocbook opts) $ docAuthors meta let meta' = B.setMeta "author" auths' meta metadata <- metaToJSON opts - (fmap (render colwidth . vcat) . + (fmap (render' . vcat) . (mapM (elementToDocbook opts' startLvl) . hierarchicalize)) - (fmap (render colwidth) . inlinesToDocbook opts') + (fmap render' . inlinesToDocbook opts') meta' main <- (render' . vcat) <$> (mapM (elementToDocbook opts' startLvl) elements) let context = defField "body" main diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index fddec91cc..63bb8a5ae 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -4,7 +4,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,7 +23,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -55,7 +55,6 @@ import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class as P import Text.Pandoc.Compat.Time import Text.Pandoc.Definition -import Text.Pandoc.Error import Text.Pandoc.Generic import Text.Pandoc.Highlighting (highlight) import Text.Pandoc.ImageSize @@ -497,6 +496,11 @@ writeDocx opts doc@(Pandoc meta _) = do , qName (elName e) == "abstractNum" ] ++ [Elem e | e <- allElts , qName (elName e) == "num" ] } + + let keywords = case lookupMeta "keywords" meta of + Just (MetaList xs) -> map stringify xs + _ -> [] + let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -506,6 +510,7 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ mknode "dc:title" [] (stringify $ docTitle meta) : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) + : mknode "cp:keywords" [] (intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) @@ -876,7 +881,7 @@ blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do let prop = pCustomStyle $ if null alt then "Figure" - else "FigureWithCaption" + else "CaptionedFigure" paraProps <- local (\env -> env { envParaProperties = prop : envParaProperties env }) (getParaProps False) contents <- inlinesToOpenXML opts [Image attr alt (src,tit)] captionNode <- withParaProp (pCustomStyle "ImageCaption") @@ -954,7 +959,7 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do caption' ++ [mknode "w:tbl" [] ( mknode "w:tblPr" [] - ( mknode "w:tblStyle" [("w:val","TableNormal")] () : + ( mknode "w:tblStyle" [("w:val","Table")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () @@ -1060,13 +1065,24 @@ withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a withParaPropM = (. flip withParaProp) . (>>=) formattedString :: PandocMonad m => String -> WS m [Element] -formattedString str = do - props <- getTextProps +formattedString str = + -- properly handle soft hyphens + case splitBy (=='\173') str of + [w] -> formattedString' w + ws -> do + sh <- formattedRun [mknode "w:softHyphen" [] ()] + (intercalate sh) <$> mapM formattedString' ws + +formattedString' :: PandocMonad m => String -> WS m [Element] +formattedString' str = do inDel <- asks envInDel - return [ mknode "w:r" [] $ - props ++ - [ mknode (if inDel then "w:delText" else "w:t") - [("xml:space","preserve")] (stripInvalidChars str) ] ] + formattedRun [ mknode (if inDel then "w:delText" else "w:t") + [("xml:space","preserve")] (stripInvalidChars str) ] + +formattedRun :: PandocMonad m => [Element] -> WS m [Element] +formattedRun els = do + props <- getTextProps + return [ mknode "w:r" [] $ props ++ els ] setFirstPara :: PandocMonad m => WS m () setFirstPara = modify $ \s -> s { stFirstPara = True } @@ -1076,7 +1092,8 @@ inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] -inlineToOpenXML' _ (Str str) = formattedString str +inlineToOpenXML' _ (Str str) = + formattedString str inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do @@ -1303,12 +1320,10 @@ inlineToOpenXML' opts (Image attr alt (src, title)) = do M.insert src (ident, imgpath, mbMimeType, imgElt, img) $ stImages st } return [imgElt]) - (\e -> do case e of - PandocIOError _ e' -> - report $ CouldNotFetchResource src (show e') - e' -> report $ CouldNotFetchResource src (show e') - -- emit alt text - inlinesToOpenXML opts alt) + (\e -> do + report $ CouldNotFetchResource src (show e) + -- emit alt text + inlinesToOpenXML opts alt) br :: Element br = breakElement "textWrapping" diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 5e29acbaf..551a1b0b5 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.DokuWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Clare Macrae <clare.macrae@googlemail.com> @@ -44,13 +44,13 @@ import Control.Monad.Reader (ReaderT, ask, local, runReaderT) import Control.Monad.State (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) import Data.List (intercalate, intersect, isPrefixOf, transpose) -import Network.URI (isURI) +import Data.Text (Text, pack) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition import Text.Pandoc.ImageSize import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) -import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, linesToPara, +import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, isURI, linesToPara, removeFormatting, substitute, trimr) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Shared (defField, metaToJSON) @@ -76,7 +76,7 @@ instance Default WriterEnvironment where type DokuWiki m = ReaderT WriterEnvironment (StateT WriterState m) -- | Convert Pandoc to DokuWiki. -writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeDokuWiki opts document = runDokuWiki (pandocToDokuWiki opts document) @@ -85,7 +85,7 @@ runDokuWiki = flip evalStateT def . flip runReaderT def -- | Return DokuWiki representation of document. pandocToDokuWiki :: PandocMonad m - => WriterOptions -> Pandoc -> DokuWiki m String + => WriterOptions -> Pandoc -> DokuWiki m Text pandocToDokuWiki opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (fmap trimr . blockListToDokuWiki opts) @@ -97,7 +97,7 @@ pandocToDokuWiki opts (Pandoc meta blocks) = do then "" -- TODO Was "\n<references />" Check whether I can really remove this: -- if it is definitely to do with footnotes, can remove this whole bit else "" - let main = body ++ notes + let main = pack $ body ++ notes let context = defField "body" main $ defField "toc" (writerTableOfContents opts) $ metadata diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 5b64564ce..d68283007 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -3,7 +3,7 @@ {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -40,6 +40,7 @@ import Control.Monad.State (State, StateT, evalState, evalStateT, get, gets, lift, modify, put) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 +import qualified Data.Text.Lazy as TL import Data.Char (isAlphaNum, isDigit, toLower) import Data.List (intercalate, isInfixOf, isPrefixOf) import qualified Data.Map as M @@ -373,8 +374,8 @@ pandocToEPUB :: PandocMonad m -> E m B.ByteString pandocToEPUB version opts doc@(Pandoc meta _) = do let epub3 = version == EPUB3 - let writeHtml o = fmap UTF8.fromStringLazy . - writeHtmlStringForEPUB version o + let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) . + writeHtmlStringForEPUB version o epochtime <- floor <$> lift P.getPOSIXTime metadata <- getEPUBMetadata opts meta let mkEntry path content = toEntry path epochtime content diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index fb232e278..213756330 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -1,8 +1,8 @@ {-# LANGUAGE PatternGuards #-} {- -Copyright (c) 2011-2012, Sergey Astanin -All rights reserved. +Copyright (c) 2011-2012 Sergey Astanin + 2012-2017 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,17 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. +{- | +Module : Text.Pandoc.Writers.FB2 +Copyright : Copyright (C) 2011-2012 Sergey Astanin + 2012-2017 John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : John MacFarlane +Stability : alpha +Portability : portable + +Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. FictionBook is an XML-based e-book format. For more information see: <http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1> @@ -34,9 +44,9 @@ import Data.ByteString.Base64 (encode) import qualified Data.ByteString.Char8 as B8 import Data.Char (isAscii, isControl, isSpace, toLower) import Data.Either (lefts, rights) +import Data.Text (Text, pack) import Data.List (intercalate, intersperse, isPrefixOf, stripPrefix) import Network.HTTP (urlEncode) -import Network.URI (isURI) import Text.XML.Light import qualified Text.XML.Light as X import qualified Text.XML.Light.Cursor as XC @@ -47,7 +57,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) -import Text.Pandoc.Shared (capitalize, isHeaderBlock, linesToPara, +import Text.Pandoc.Shared (capitalize, isHeaderBlock, isURI, linesToPara, orderedListMarkers) -- | Data to be written at the end of the document: @@ -77,13 +87,13 @@ instance Show ImageMode where writeFB2 :: PandocMonad m => WriterOptions -- ^ conversion options -> Pandoc -- ^ document to convert - -> m String -- ^ FictionBook2 document (not encoded yet) + -> m Text -- ^ FictionBook2 document (not encoded yet) writeFB2 opts doc = flip evalStateT newFB $ pandocToFB2 opts doc pandocToFB2 :: PandocMonad m => WriterOptions -> Pandoc - -> FBM m String + -> FBM m Text pandocToFB2 opts (Pandoc meta blocks) = do modify (\s -> s { writerOptions = opts }) desc <- description meta @@ -94,7 +104,7 @@ pandocToFB2 opts (Pandoc meta blocks) = do (imgs,missing) <- liftM imagesToFetch get >>= \s -> lift (fetchImages s) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ xml_head ++ (showContent fb2_xml) ++ "\n" + return $ pack $ xml_head ++ (showContent fb2_xml) ++ "\n" where xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" fb2_attrs = diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 9f41f77d1..5ee8ab4ce 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -45,6 +45,8 @@ module Text.Pandoc.Writers.HTML ( ) where import Control.Monad.State import Data.Char (ord, toLower) +import Data.Text (Text) +import qualified Data.Text.Lazy as TL import Data.List (intersperse, isPrefixOf) import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) @@ -67,7 +69,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.XML (escapeStringForXML, fromEntities) #if MIN_VERSION_blaze_markup(0,6,3) #else -import Text.Blaze.Internal (preEscapedString) +import Text.Blaze.Internal (preEscapedString, preEscapedText) #endif #if MIN_VERSION_blaze_html(0,5,1) import qualified Text.Blaze.XHtml5 as H5 @@ -77,7 +79,7 @@ import qualified Text.Blaze.Html5 as H5 import Control.Monad.Except (throwError) import Data.Aeson (Value) import System.FilePath (takeExtension, takeBaseName) -import Text.Blaze.Html.Renderer.String (renderHtml) +import Text.Blaze.Html.Renderer.Text (renderHtml) import qualified Text.Blaze.XHtml1.Transitional as H import qualified Text.Blaze.XHtml1.Transitional.Attributes as A import Text.Pandoc.Class (PandocMonad, report) @@ -123,7 +125,7 @@ nl opts = if writerWrapText opts == WrapNone else preEscapedString "\n" -- | Convert Pandoc document to Html 5 string. -writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeHtml5String = writeHtmlString' defaultWriterState{ stHtml5 = True } @@ -132,7 +134,7 @@ writeHtml5 :: PandocMonad m => WriterOptions -> Pandoc -> m Html writeHtml5 = writeHtml' defaultWriterState{ stHtml5 = True } -- | Convert Pandoc document to Html 4 string. -writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeHtml4String = writeHtmlString' defaultWriterState{ stHtml5 = False } @@ -142,38 +144,39 @@ writeHtml4 = writeHtml' defaultWriterState{ stHtml5 = False } -- | Convert Pandoc document to Html appropriate for an epub version. writeHtmlStringForEPUB :: PandocMonad m - => EPUBVersion -> WriterOptions -> Pandoc -> m String -writeHtmlStringForEPUB version = writeHtmlString' + => EPUBVersion -> WriterOptions -> Pandoc + -> m Text +writeHtmlStringForEPUB version o = writeHtmlString' defaultWriterState{ stHtml5 = version == EPUB3, - stEPUBVersion = Just version } + stEPUBVersion = Just version } o -- | Convert Pandoc document to Reveal JS HTML slide show. writeRevealJs :: PandocMonad m - => WriterOptions -> Pandoc -> m String + => WriterOptions -> Pandoc -> m Text writeRevealJs = writeHtmlSlideShow' RevealJsSlides -- | Convert Pandoc document to S5 HTML slide show. writeS5 :: PandocMonad m - => WriterOptions -> Pandoc -> m String + => WriterOptions -> Pandoc -> m Text writeS5 = writeHtmlSlideShow' S5Slides -- | Convert Pandoc document to Slidy HTML slide show. writeSlidy :: PandocMonad m - => WriterOptions -> Pandoc -> m String + => WriterOptions -> Pandoc -> m Text writeSlidy = writeHtmlSlideShow' SlidySlides -- | Convert Pandoc document to Slideous HTML slide show. writeSlideous :: PandocMonad m - => WriterOptions -> Pandoc -> m String + => WriterOptions -> Pandoc -> m Text writeSlideous = writeHtmlSlideShow' SlideousSlides -- | Convert Pandoc document to DZSlides HTML slide show. writeDZSlides :: PandocMonad m - => WriterOptions -> Pandoc -> m String + => WriterOptions -> Pandoc -> m Text writeDZSlides = writeHtmlSlideShow' DZSlides writeHtmlSlideShow' :: PandocMonad m - => HTMLSlideVariant -> WriterOptions -> Pandoc -> m String + => HTMLSlideVariant -> WriterOptions -> Pandoc -> m Text writeHtmlSlideShow' variant = writeHtmlString' defaultWriterState{ stSlideVariant = variant , stHtml5 = case variant of @@ -185,12 +188,15 @@ writeHtmlSlideShow' variant = writeHtmlString' NoSlides -> False } +renderHtml' :: Html -> Text +renderHtml' = TL.toStrict . renderHtml + writeHtmlString' :: PandocMonad m - => WriterState -> WriterOptions -> Pandoc -> m String + => WriterState -> WriterOptions -> Pandoc -> m Text writeHtmlString' st opts d = do (body, context) <- evalStateT (pandocToHtml opts d) st case writerTemplate opts of - Nothing -> return $ renderHtml body + Nothing -> return $ renderHtml' body Just tpl -> do -- warn if empty lang when (isNothing (getField "lang" context :: Maybe String)) $ @@ -205,12 +211,12 @@ writeHtmlString' st opts d = do report $ NoTitleElement fallback return $ resetField "pagetitle" fallback context return $ renderTemplate' tpl $ - defField "body" (renderHtml body) context' + defField "body" (renderHtml' body) context' writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html writeHtml' st opts d = do case writerTemplate opts of - Just _ -> preEscapedString <$> writeHtmlString' st opts d + Just _ -> preEscapedText <$> writeHtmlString' st opts d Nothing -> do (body, _) <- evalStateT (pandocToHtml opts d) st return body @@ -222,8 +228,8 @@ pandocToHtml :: PandocMonad m -> StateT WriterState m (Html, Value) pandocToHtml opts (Pandoc meta blocks) = do metadata <- metaToJSON opts - (fmap renderHtml . blockListToHtml opts) - (fmap renderHtml . inlineListToHtml opts) + (fmap renderHtml' . blockListToHtml opts) + (fmap renderHtml' . inlineListToHtml opts) meta let stringifyHTML = escapeStringForXML . stringify let authsMeta = map stringifyHTML $ docAuthors meta @@ -277,10 +283,10 @@ pandocToHtml opts (Pandoc meta blocks) = do Nothing -> id else id) $ (if stMath st - then defField "math" (renderHtml math) + then defField "math" (renderHtml' math) else id) $ defField "quotes" (stQuotes st) $ - maybe id (defField "toc" . renderHtml) toc $ + maybe id (defField "toc" . renderHtml') toc $ defField "author-meta" authsMeta $ maybe id (defField "date-meta") (normalizeDate dateMeta) $ defField "pagetitle" (stringifyHTML (docTitle meta)) $ @@ -463,7 +469,7 @@ parseMailto s = do obfuscateLink :: PandocMonad m => WriterOptions -> Attr -> Html -> String -> m Html obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ txt -obfuscateLink opts attr (renderHtml -> txt) s = +obfuscateLink opts attr (TL.unpack . renderHtml -> txt) s = let meth = writerEmailObfuscation opts s' = map toLower (take 7 s) ++ drop 7 s in case parseMailto s' of @@ -521,7 +527,7 @@ attrsToHtml opts (id',classes',keyvals) = imgAttrsToHtml :: WriterOptions -> Attr -> [Attribute] imgAttrsToHtml opts attr = attrsToHtml opts (ident,cls,kvs') ++ - toAttrs (dimensionsToAttrList opts attr) + toAttrs (dimensionsToAttrList attr) where (ident,cls,kvs) = attr kvs' = filter isNotDim kvs @@ -529,14 +535,13 @@ imgAttrsToHtml opts attr = isNotDim ("height", _) = False isNotDim _ = True -dimensionsToAttrList :: WriterOptions -> Attr -> [(String, String)] -dimensionsToAttrList opts attr = (go Width) ++ (go Height) +dimensionsToAttrList :: Attr -> [(String, String)] +dimensionsToAttrList attr = (go Width) ++ (go Height) where go dir = case (dimension dir attr) of - (Just (Percent a)) -> [("style", show dir ++ ":" ++ show (Percent a))] - (Just dim) -> [(show dir, showInPixel opts dim)] - _ -> [] - + (Just (Pixel a)) -> [(show dir, show a)] + (Just x) -> [("style", show dir ++ ":" ++ show x)] + Nothing -> [] imageExts :: [String] imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", @@ -974,7 +979,7 @@ inlineToHtml opts inline = do (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt lift $ obfuscateLink opts attr linkText s - (Link attr txt (s,tit)) -> do + (Link (ident,classes,kvs) txt (s,tit)) -> do linkText <- inlineListToHtml opts txt slideVariant <- gets stSlideVariant let s' = case s of @@ -984,13 +989,13 @@ inlineToHtml opts inline = do in '#' : prefix ++ xs _ -> s let link = H.a ! A.href (toValue s') $ linkText - let link' = if txt == [Str (unEscapeString s)] - then link ! A.class_ "uri" - else link - let link'' = addAttrs opts attr link' + let attr = if txt == [Str (unEscapeString s)] + then (ident, "uri" : classes, kvs) + else (ident, classes, kvs) + let link' = addAttrs opts attr link return $ if null tit - then link'' - else link'' ! A.title (toValue tit) + then link' + else link' ! A.title (toValue tit) (Image attr txt (s,tit)) | treatAsImage s -> do let alternate' = stringify txt slideVariant <- gets stSlideVariant diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index eae1377cd..1ad9acd40 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {- -Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2014-2015, 2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Haddock - Copyright : Copyright (C) 2014 John MacFarlane + Copyright : Copyright (C) 2014-2015,2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,8 +35,8 @@ Haddock: <http://www.haskell.org/haddock/doc/html/> module Text.Pandoc.Writers.Haddock (writeHaddock) where import Control.Monad.State import Data.Default +import Data.Text (Text) import Data.List (intersperse, transpose) -import Network.URI (isURI) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging @@ -53,14 +53,14 @@ instance Default WriterState where def = WriterState{ stNotes = [] } -- | Convert Pandoc to Haddock. -writeHaddock :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeHaddock :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeHaddock opts document = evalStateT (pandocToHaddock opts{ writerWrapText = writerWrapText opts } document) def -- | Return haddock representation of document. pandocToHaddock :: PandocMonad m - => WriterOptions -> Pandoc -> StateT WriterState m String + => WriterOptions -> Pandoc -> StateT WriterState m Text pandocToHaddock opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts @@ -68,13 +68,13 @@ pandocToHaddock opts (Pandoc meta blocks) = do body <- blockListToHaddock opts blocks st <- get notes' <- notesToHaddock opts (reverse $ stNotes st) - let render' :: Doc -> String + let render' :: Doc -> Text render' = render colwidth let main = render' $ body <> (if isEmpty notes' then empty else blankline <> notes') metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToHaddock opts) - (fmap (render colwidth) . inlineListToHaddock opts) + (fmap render' . blockListToHaddock opts) + (fmap render' . inlineListToHaddock opts) meta let context = defField "body" main $ metadata diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index cd3cac5a7..2884bc532 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -4,7 +4,7 @@ {- | Module : Text.Pandoc.Writers.ICML - Copyright : Copyright (C) 2013-2016 github.com/mb21 + Copyright : Copyright (C) 2013-2017 github.com/mb21 License : GNU GPL, version 2 or above Stability : alpha @@ -21,16 +21,15 @@ import Control.Monad.State import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix) import qualified Data.Set as Set import Data.Text as Text (breakOnAll, pack) -import Network.URI (isURI) +import Data.Text (Text) import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition -import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty -import Text.Pandoc.Shared (linesToPara, splitBy) +import Text.Pandoc.Shared (isURI, linesToPara, splitBy) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Math (texMathToInlines) import Text.Pandoc.Writers.Shared @@ -129,11 +128,12 @@ footnoteName = "Footnote" citeName = "Cite" -- | Convert Pandoc document to string in ICML format. -writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeICML opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + render' :: Doc -> Text render' = render colwidth renderMeta f s = liftM (render' . fst) $ runStateT (f opts [] s) defaultWriterState metadata <- metaToJSON opts @@ -550,10 +550,7 @@ imageICML opts style attr (src, _) = do report $ CouldNotDetermineImageSize src msg return def) (\e -> do - case e of - PandocIOError _ e' -> - report $ CouldNotFetchResource src (show e') - e' -> report $ CouldNotFetchResource src (show e') + report $ CouldNotFetchResource src (show e) return def) let (ow, oh) = sizeInPoints imgS (imgWidth, imgHeight) = desiredSizeInPoints opts attr imgS diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index aca7dc969..1a8d80747 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,6 +33,7 @@ https://jats.nlm.nih.gov/publishing/tag-library/1.1d3/element/mml-math.html module Text.Pandoc.Writers.JATS ( writeJATS ) where import Control.Monad.Reader import Data.Char (toLower) +import Data.Text (Text) import Data.Generics (everywhere, mkT) import Data.List (intercalate, isSuffixOf, partition) import Data.Maybe (fromMaybe) @@ -81,12 +82,12 @@ authorToJATS opts name' = do in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ inTagsSimple "surname" (text $ escapeStringForXML lastname) -writeJATS :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeJATS :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeJATS opts d = runReaderT (docToJATS opts d) JATS1_1 -- | Convert Pandoc document to string in JATS format. -docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> DB m String +docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> DB m Text docToJATS opts (Pandoc meta blocks) = do let isBackBlock (Div ("refs",_,_) _) = True isBackBlock _ = False @@ -96,7 +97,8 @@ docToJATS opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - let render' = render colwidth + let render' :: Doc -> Text + render' = render colwidth let opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr) (writerTemplate opts) && TopLevelDefault == writerTopLevelDivision opts) @@ -111,10 +113,10 @@ docToJATS opts (Pandoc meta blocks) = do auths' <- mapM (authorToJATS opts) $ docAuthors meta let meta' = B.setMeta "author" auths' meta metadata <- metaToJSON opts - (fmap (render colwidth . vcat) . + (fmap (render' . vcat) . (mapM (elementToJATS opts' startLvl) . hierarchicalize)) - (fmap (render colwidth) . inlinesToJATS opts') + (fmap render' . inlinesToJATS opts') meta' main <- (render' . vcat) <$> (mapM (elementToJATS opts' startLvl) elements) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 000f4f8fb..80606d510 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -2,7 +2,7 @@ {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -42,8 +42,9 @@ import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord, import Data.List (foldl', intercalate, intersperse, isInfixOf, nub, nubBy, stripPrefix, (\\)) import Data.Maybe (catMaybes, fromMaybe, isJust) +import Data.Text (Text) import qualified Data.Text as T -import Network.URI (isURI, unEscapeString) +import Network.URI (unEscapeString) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, @@ -114,13 +115,13 @@ startingState options = WriterState { , stEmptyLine = True } -- | Convert Pandoc to LaTeX. -writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeLaTeX options document = evalStateT (pandocToLaTeX options document) $ startingState options -- | Convert Pandoc to LaTeX Beamer. -writeBeamer :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeBeamer :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeBeamer options document = evalStateT (pandocToLaTeX options document) $ (startingState options){ stBeamer = True } @@ -128,7 +129,7 @@ writeBeamer options document = type LW m = StateT WriterState m pandocToLaTeX :: PandocMonad m - => WriterOptions -> Pandoc -> LW m String + => WriterOptions -> Pandoc -> LW m Text pandocToLaTeX options (Pandoc meta blocks) = do -- Strip off final 'references' header if --natbib or --biblatex let method = writerCiteMethod options @@ -146,9 +147,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON options - (fmap (render colwidth) . blockListToLaTeX) - (fmap (render colwidth) . inlineListToLaTeX) + (fmap render' . blockListToLaTeX) + (fmap render' . inlineListToLaTeX) meta let bookClasses = ["memoir","book","report","scrreprt","scrbook"] let documentClass = case P.parse pDocumentClass "template" template of @@ -180,8 +183,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do then toSlides blocks'' else return blocks'' body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''' - (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader - let main = render colwidth $ vsep body + (biblioTitle :: Text) <- render' <$> inlineListToLaTeX lastHeader + let main = render' $ vsep body st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta @@ -1062,6 +1065,9 @@ inlineToLaTeX (Link _ txt (src, _)) = src' <- stringToLaTeX URLString (escapeURI src) return $ text ("\\href{" ++ src' ++ "}{") <> contents <> char '}' +inlineToLaTeX il@(Image _ _ ('d':'a':'t':'a':':':_, _)) = do + report $ InlineNotRendered il + return empty inlineToLaTeX (Image attr _ (source, _)) = do setEmptyLine False modify $ \s -> s{ stGraphics = True } diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 1f3e17c16..0fc6afbdc 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2015 John MacFarlane + Copyright : Copyright (C) 2007-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,6 +35,8 @@ import Control.Monad.State import Data.List (intercalate, intersperse, stripPrefix, sort) import qualified Data.Map as Map import Data.Maybe (fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Builder (deleteMeta) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition @@ -62,36 +65,37 @@ defaultWriterState = WriterState { stNotes = [] , stHasTables = False } -- | Convert Pandoc to Man. -writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMan opts document = evalStateT (pandocToMan opts document) defaultWriterState -- | Return groff man representation of document. -pandocToMan :: PandocMonad m => WriterOptions -> Pandoc -> StateT WriterState m String +pandocToMan :: PandocMonad m => WriterOptions -> Pandoc -> StateT WriterState m Text pandocToMan opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - let render' = render colwidth + let render' :: Doc -> Text + render' = render colwidth titleText <- inlineListToMan opts $ docTitle meta let title' = render' titleText let setFieldsFromTitle = - case break (== ' ') title' of - (cmdName, rest) -> case break (=='(') cmdName of - (xs, '(':ys) | not (null ys) && - last ys == ')' -> + case T.break (== ' ') title' of + (cmdName, rest) -> case T.break (=='(') cmdName of + (xs, ys) | "(" `T.isPrefixOf` ys + && ")" `T.isSuffixOf` ys -> defField "title" xs . - defField "section" (init ys) . - case splitBy (=='|') rest of + defField "section" (T.init $ T.drop 1 ys) . + case T.splitOn "|" rest of (ft:hds) -> - defField "footer" (trim ft) . + defField "footer" (T.strip ft) . defField "header" - (trim $ concat hds) + (T.strip $ mconcat hds) [] -> id _ -> defField "title" title' metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToMan opts) - (fmap (render colwidth) . inlineListToMan opts) + (fmap render' . blockListToMan opts) + (fmap render' . inlineListToMan opts) $ deleteMeta "title" meta body <- blockListToMan opts blocks notes <- gets stNotes diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 8e3ac3665..3ac677943 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,26 +34,25 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text. Markdown: <http://daringfireball.net/projects/markdown/> -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where -import Control.Monad.Except (throwError) import Control.Monad.Reader import Control.Monad.State import Data.Char (chr, isPunctuation, isSpace, ord) import Data.Default import qualified Data.HashMap.Strict as H +import qualified Data.Map as M import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose) import Data.Maybe (fromMaybe) import Data.Monoid (Any (..)) import Data.Ord (comparing) import qualified Data.Set as Set +import Data.Text (Text) import qualified Data.Text as T import qualified Data.Vector as V import Data.Yaml (Value (Array, Bool, Number, Object, String)) import Network.HTTP (urlEncode) -import Network.URI (isURI) import Text.HTML.TagSoup (Tag (..), isTagText, parseTags) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) @@ -91,6 +90,9 @@ instance Default WriterEnv data WriterState = WriterState { stNotes :: Notes , stRefs :: Refs + , stKeys :: M.Map Key + (M.Map (Target, Attr) Int) + , stLastIdx :: Int , stIds :: Set.Set String , stNoteNum :: Int } @@ -98,12 +100,14 @@ data WriterState = WriterState { stNotes :: Notes instance Default WriterState where def = WriterState{ stNotes = [] , stRefs = [] + , stKeys = M.empty + , stLastIdx = 0 , stIds = Set.empty , stNoteNum = 1 } -- | Convert Pandoc to Markdown. -writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMarkdown opts document = evalMD (pandocToMarkdown opts{ writerWrapText = if isEnabled Ext_hard_line_breaks opts @@ -113,7 +117,7 @@ writeMarkdown opts document = -- | Convert Pandoc to plain text (like markdown, but without links, -- pictures, or inline formatting). -writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m String +writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m Text writePlain opts document = evalMD (pandocToMarkdown opts document) def{ envPlain = True } def @@ -177,15 +181,17 @@ jsonToYaml (Number n) = text $ show n jsonToYaml _ = empty -- | Return markdown representation of document. -pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m String +pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m Text pandocToMarkdown opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing isPlain <- asks envPlain + let render' :: Doc -> Text + render' = render colwidth . chomp metadata <- metaToJSON' - (fmap (render colwidth) . blockListToMarkdown opts) - (fmap (render colwidth) . blockToMarkdown opts . Plain) + (fmap render' . blockListToMarkdown opts) + (fmap render' . blockToMarkdown opts . Plain) meta let title' = maybe empty text $ getField "title" metadata let authors' = maybe [] (map text) $ getField "author" metadata @@ -213,8 +219,6 @@ pandocToMarkdown opts (Pandoc meta blocks) = do else blocks body <- blockListToMarkdown opts blocks' notesAndRefs' <- notesAndRefs opts - let render' :: Doc -> String - render' = render colwidth . chomp let main = render' $ body <> notesAndRefs' let context = defField "toc" (render' toc) $ defField "body" main @@ -241,7 +245,7 @@ keyToMarkdown opts (label', (src, tit), attr) = do else space <> "\"" <> text tit <> "\"" return $ nest 2 $ hang 2 ("[" <> label' <> "]:" <> space) (text src <> tit') - <> linkAttributes opts attr + <+> linkAttributes opts attr -- | Return markdown representation of notes. notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc @@ -471,6 +475,8 @@ blockToMarkdown' opts (Header level attr inlines) = do space <> attrsToMarkdown attr | otherwise -> empty contents <- inlineListToMarkdown opts $ + -- ensure no newlines; see #3736 + walk lineBreakToSpace $ if level == 1 && plain then capitalize inlines else inlines @@ -568,7 +574,7 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do gridTable opts blockListToMarkdown (all null headers) aligns' widths' headers rows | isEnabled Ext_raw_html opts -> fmap (id,) $ - text <$> + (text . T.unpack) <$> (writeHtml5String def $ Pandoc nullMeta [t]) | otherwise -> return $ (id, text "[TABLE]") return $ nst $ tbl $$ caption'' $$ blankline @@ -788,7 +794,7 @@ blockListToMarkdown opts blocks = do isListBlock _ = False commentSep = if isEnabled Ext_raw_html opts then RawBlock "html" "<!-- -->\n" - else RawBlock "markdown" " " + else RawBlock "markdown" " \n" mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat getKey :: Doc -> Key @@ -798,20 +804,49 @@ getKey = toKey . render Nothing -- Prefer label if possible; otherwise, generate a unique key. getReference :: PandocMonad m => Attr -> Doc -> Target -> MD m Doc getReference attr label target = do - st <- get - let keys = map (\(l,_,_) -> getKey l) (stRefs st) - case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of + refs <- gets stRefs + case find (\(_,t,a) -> t == target && a == attr) refs of Just (ref, _, _) -> return ref Nothing -> do - label' <- case getKey label `elem` keys of - True -> -- label is used; generate numerical label - case find (\n -> Key n `notElem` keys) $ - map show [1..(10000 :: Integer)] of - Just x -> return $ text x - Nothing -> throwError $ PandocSomeError "no unique label" - False -> return label - modify (\s -> s{ stRefs = (label', target, attr) : stRefs st }) - return label' + keys <- gets stKeys + case M.lookup (getKey label) keys of + Nothing -> do -- no other refs with this label + (lab', idx) <- if isEmpty label + then do + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + return (text (show i), i) + else return (label, 0) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) idx mempty) + (stKeys s) }) + return lab' + + Just km -> do -- we have refs with this label + case M.lookup (target, attr) km of + Just i -> do + let lab' = label <> if i == 0 + then mempty + else text (show i) + -- make sure it's in stRefs; it may be + -- a duplicate that was printed in a previous + -- block: + when ((lab', target, attr) `notElem` refs) $ + modify (\s -> s{ + stRefs = (lab', target, attr) : refs }) + return lab' + Nothing -> do -- but this one is to a new target + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + let lab' = text (show i) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) i km) + (stKeys s) }) + return lab' -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc @@ -821,7 +856,8 @@ inlineListToMarkdown opts lst = do where go [] = return empty go (i:is) = case i of (Link _ _ _) -> case is of - -- If a link is followed by another link or '[' we don't shortcut + -- If a link is followed by another link, or '[', '(' or ':' + -- then we don't shortcut (Link _ _ _):_ -> unshortcutable Space:(Link _ _ _):_ -> unshortcutable Space:(Str('[':_)):_ -> unshortcutable @@ -831,9 +867,17 @@ inlineListToMarkdown opts lst = do SoftBreak:(Str('[':_)):_ -> unshortcutable SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable SoftBreak:(Cite _ _):_ -> unshortcutable + LineBreak:(Link _ _ _):_ -> unshortcutable + LineBreak:(Str('[':_)):_ -> unshortcutable + LineBreak:(RawInline _ ('[':_)):_ -> unshortcutable + LineBreak:(Cite _ _):_ -> unshortcutable (Cite _ _):_ -> unshortcutable Str ('[':_):_ -> unshortcutable + Str ('(':_):_ -> unshortcutable + Str (':':_):_ -> unshortcutable (RawInline _ ('[':_)):_ -> unshortcutable + (RawInline _ ('(':_)):_ -> unshortcutable + (RawInline _ (':':_)):_ -> unshortcutable (RawInline _ (' ':'[':_)):_ -> unshortcutable _ -> shortcutable _ -> shortcutable @@ -890,12 +934,14 @@ inlineToMarkdown opts (Span attrs ils) = do isEnabled Ext_native_spans opts -> tagWithAttrs "span" attrs <> contents <> text "</span>" | otherwise -> contents +inlineToMarkdown _ (Emph []) = return empty inlineToMarkdown opts (Emph lst) = do plain <- asks envPlain contents <- inlineListToMarkdown opts lst return $ if plain then "_" <> contents <> "_" else "*" <> contents <> "*" +inlineToMarkdown _ (Strong []) = return empty inlineToMarkdown opts (Strong lst) = do plain <- asks envPlain if plain @@ -903,6 +949,7 @@ inlineToMarkdown opts (Strong lst) = do else do contents <- inlineListToMarkdown opts lst return $ "**" <> contents <> "**" +inlineToMarkdown _ (Strikeout []) = return empty inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst return $ if isEnabled Ext_strikeout opts @@ -910,6 +957,7 @@ inlineToMarkdown opts (Strikeout lst) = do else if isEnabled Ext_raw_html opts then "<s>" <> contents <> "</s>" else contents +inlineToMarkdown _ (Superscript []) = return empty inlineToMarkdown opts (Superscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do contents <- inlineListToMarkdown opts lst @@ -922,6 +970,7 @@ inlineToMarkdown opts (Superscript lst) = in case mapM toSuperscript rendered of Just r -> text r Nothing -> text $ "^(" ++ rendered ++ ")" +inlineToMarkdown _ (Subscript []) = return empty inlineToMarkdown opts (Subscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do contents <- inlineListToMarkdown opts lst @@ -1064,7 +1113,8 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) + (text . T.unpack . T.strip) <$> + writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) | otherwise = do plain <- asks envPlain linktext <- inlineListToMarkdown opts txt @@ -1103,7 +1153,8 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [img]]) + (text . T.unpack . T.strip) <$> + writeHtml5String def (Pandoc nullMeta [Plain [img]]) | otherwise = do plain <- asks envPlain let txt = if null alternate || alternate == [Str source] @@ -1154,3 +1205,8 @@ toSubscript c Just $ chr (0x2080 + (ord c - 48)) | isSpace c = Just c | otherwise = Nothing + +lineBreakToSpace :: Inline -> Inline +lineBreakToSpace LineBreak = Space +lineBreakToSpace SoftBreak = Space +lineBreakToSpace x = x diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index def245e38..c70e5b786 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,7 +34,7 @@ import Control.Monad.Reader import Control.Monad.State import Data.List (intercalate) import qualified Data.Set as Set -import Network.URI (isURI) +import Data.Text (Text, pack) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition @@ -60,14 +60,14 @@ data WriterReader = WriterReader { type MediaWikiWriter m = ReaderT WriterReader (StateT WriterState m) -- | Convert Pandoc to MediaWiki. -writeMediaWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeMediaWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMediaWiki opts document = let initialState = WriterState { stNotes = False, stOptions = opts } env = WriterReader { options = opts, listLevel = [], useTags = False } in evalStateT (runReaderT (pandocToMediaWiki document) env) initialState -- | Return MediaWiki representation of document. -pandocToMediaWiki :: PandocMonad m => Pandoc -> MediaWikiWriter m String +pandocToMediaWiki :: PandocMonad m => Pandoc -> MediaWikiWriter m Text pandocToMediaWiki (Pandoc meta blocks) = do opts <- asks options metadata <- metaToJSON opts @@ -82,7 +82,8 @@ pandocToMediaWiki (Pandoc meta blocks) = do let main = body ++ notes let context = defField "body" main $ defField "toc" (writerTableOfContents opts) metadata - return $ case writerTemplate opts of + return $ pack + $ case writerTemplate opts of Nothing -> main Just tpl -> renderTemplate' tpl context diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 534f26a5a..c5c3d9f5b 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Ms - Copyright : Copyright (C) 2007-2015 John MacFarlane + Copyright : Copyright (C) 2007-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -44,6 +44,7 @@ import Text.Pandoc.Options import Text.Pandoc.Writers.Math import Text.Printf ( printf ) import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Map as Map import Data.Maybe ( catMaybes, fromMaybe ) import Data.List ( intersperse, intercalate, sort ) @@ -85,17 +86,18 @@ type Note = [Block] type MS = StateT WriterState -- | Convert Pandoc to Ms. -writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMs opts document = evalStateT (pandocToMs opts document) defaultWriterState -- | Return groff ms representation of document. -pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m String +pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m Text pandocToMs opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - let render' = render colwidth + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON opts (fmap render' . blockListToMs opts) (fmap render' . inlineListToMs' opts) @@ -108,9 +110,9 @@ pandocToMs opts (Pandoc meta blocks) = do hasHighlighting <- gets stHighlighting let highlightingMacros = if hasHighlighting then case writerHighlightStyle opts of - Nothing -> "" + Nothing -> mempty Just sty -> render' $ styleToMs sty - else "" + else mempty let context = defField "body" main $ defField "has-inline-math" hasInlineMath diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index 8f6493975..85e0b5467 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -43,6 +43,7 @@ even though it is supported only in Emacs Muse. -} module Text.Pandoc.Writers.Muse (writeMuse) where import Control.Monad.State +import Data.Text (Text) import Data.List (intersperse, transpose, isInfixOf) import System.FilePath (takeExtension) import Text.Pandoc.Class (PandocMonad) @@ -53,6 +54,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared +import qualified Data.Set as Set type Notes = [[Block]] data WriterState = @@ -60,33 +62,37 @@ data WriterState = , stOptions :: WriterOptions , stTopLevel :: Bool , stInsideBlock :: Bool + , stIds :: Set.Set String } -- | Convert Pandoc to Muse. writeMuse :: PandocMonad m => WriterOptions -> Pandoc - -> m String + -> m Text writeMuse opts document = let st = WriterState { stNotes = [] , stOptions = opts , stTopLevel = True , stInsideBlock = False + , stIds = Set.empty } in evalStateT (pandocToMuse document) st -- | Return Muse representation of document. pandocToMuse :: PandocMonad m => Pandoc - -> StateT WriterState m String + -> StateT WriterState m Text pandocToMuse (Pandoc meta blocks) = do opts <- gets stOptions let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render Nothing metadata <- metaToJSON opts - (fmap (render Nothing) . blockListToMuse) - (fmap (render Nothing) . inlineListToMuse) + (fmap render' . blockListToMuse) + (fmap render' . inlineListToMuse) meta body <- blockListToMuse blocks notes <- liftM (reverse . stNotes) get >>= notesToMuse @@ -184,8 +190,14 @@ blockToMuse (DefinitionList items) = do let ind = offset label'' return $ hang ind label'' contents blockToMuse (Header level (ident,_,_) inlines) = do + opts <- gets stOptions contents <- inlineListToMuse inlines - let attr' = if null ident + + ids <- gets stIds + let autoId = uniqueIdent inlines ids + modify $ \st -> st{ stIds = Set.insert autoId ids } + + let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) then empty else "#" <> text ident <> cr let header' = text $ replicate level '*' @@ -207,7 +219,7 @@ blockToMuse (Table caption _ _ headers rows) = do let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks where h = maximum (1 : map height blocks) sep' = lblock (length sep) $ vcat (map text $ replicate h sep) - let makeRow sep = hpipeBlocks sep . zipWith lblock widthsInChars + let makeRow sep = (" " <>) . (hpipeBlocks sep . zipWith lblock widthsInChars) let head' = makeRow " || " headers' let rowSeparator = if noHeaders then " | " else " | " rows'' <- mapM (\row -> do cols <- mapM blockListToMuse row @@ -215,7 +227,7 @@ blockToMuse (Table caption _ _ headers rows) = do let body = vcat rows'' return $ (if noHeaders then empty else head') $$ body - $$ (if null caption then empty else "|+ " <> caption' <> " +|") + $$ (if null caption then empty else " |+ " <> caption' <> " +|") $$ blankline blockToMuse (Div _ bs) = blockListToMuse bs blockToMuse Null = return empty diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index b031a0231..3ef33f05c 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,6 +30,7 @@ Conversion of a 'Pandoc' document to a string representation. -} module Text.Pandoc.Writers.Native ( writeNative ) where +import Data.Text (Text) import Data.List (intersperse) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition @@ -67,7 +68,7 @@ prettyBlock (Div attr blocks) = prettyBlock block = text $ show block -- | Prettyprint Pandoc document. -writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeNative opts (Pandoc meta blocks) = return $ let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 395ef0a96..1da051380 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,18 +35,18 @@ import Control.Monad.State import qualified Data.ByteString.Lazy as B import Data.List (isPrefixOf) import Data.Maybe (fromMaybe) +import qualified Data.Text.Lazy as TL import System.FilePath (takeDirectory, takeExtension, (<.>)) import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition -import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.Pandoc.Pretty import Text.Pandoc.Shared (stringify) -import Text.Pandoc.UTF8 (fromStringLazy) +import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.Writers.Shared (fixDisplayMath) @@ -89,7 +89,7 @@ pandocToODT opts doc@(Pandoc meta _) = do newContents <- lift $ writeOpenDocument opts{writerWrapText = WrapNone} doc' epochtime <- floor `fmap` (lift P.getPOSIXTime) let contentEntry = toEntry "content.xml" epochtime - $ fromStringLazy newContents + $ fromTextLazy $ TL.fromStrict newContents picEntries <- gets stEntries let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries @@ -178,10 +178,7 @@ transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError modify $ \st -> st{ stEntries = entry : entries } return $ Image newattr lab (newsrc, t)) (\e -> do - case e of - PandocIOError _ e' -> - report $ CouldNotFetchResource src (show e') - e' -> report $ CouldNotFetchResource src (show e') + report $ CouldNotFetchResource src (show e) return $ Emph lab) transformPicMath _ (Math t math) = do diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 98510c40f..4a0a317fa 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013-2015 John MacFarlane + Copyright : Copyright (C) 2013-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,6 +30,8 @@ Conversion of 'Pandoc' documents to OPML XML. -} module Text.Pandoc.Writers.OPML ( writeOPML) where import Control.Monad.Except (throwError) +import Data.Text (Text, unpack) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Compat.Time @@ -45,7 +47,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.XML -- | Convert Pandoc document to string in OPML format. -writeOPML :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeOPML :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeOPML opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks colwidth = if writerWrapText opts == WrapAuto @@ -54,7 +56,7 @@ writeOPML opts (Pandoc meta blocks) = do meta' = B.setMeta "date" (B.str $ convertDate $ docDate meta) meta metadata <- metaToJSON opts (writeMarkdown def . Pandoc nullMeta) - (\ils -> trimr <$> (writeMarkdown def $ Pandoc nullMeta [Plain ils])) + (\ils -> T.stripEnd <$> (writeMarkdown def $ Pandoc nullMeta [Plain ils])) meta' main <- (render colwidth . vcat) <$> (mapM (elementToOPML opts) elements) let context = defField "body" main metadata @@ -63,9 +65,9 @@ writeOPML opts (Pandoc meta blocks) = do Just tpl -> renderTemplate' tpl context -writeHtmlInlines :: PandocMonad m => [Inline] -> m String +writeHtmlInlines :: PandocMonad m => [Inline] -> m Text writeHtmlInlines ils = - trim <$> (writeHtml5String def $ Pandoc nullMeta [Plain ils]) + T.strip <$> (writeHtml5String def $ Pandoc nullMeta [Plain ils]) -- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT showDateTimeRFC822 :: UTCTime -> String @@ -95,9 +97,10 @@ elementToOPML opts (Sec _ _num _ title elements) = do (blocks, rest) = span isBlk elements htmlIls <- writeHtmlInlines title md <- if null blocks - then return [] + then return mempty else do blks <- mapM fromBlk blocks writeMarkdown def $ Pandoc nullMeta blks - let attrs = [("text", htmlIls)] ++ [("_note", md) | not (null blocks)] + let attrs = [("text", unpack htmlIls)] ++ + [("_note", unpack md) | not (null blocks)] o <- mapM (elementToOPML opts) rest return $ inTags True "outline" attrs $ vcat o diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 491069343..58295684e 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2008-2015 Andrea Rossato <andrea.rossato@ing.unitn.it> +Copyright (C) 2008-2017 Andrea Rossato <andrea.rossato@ing.unitn.it> and John MacFarlane. This program is free software; you can redistribute it and/or modify @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008-2015 Andrea Rossato and John MacFarlane + Copyright : Copyright (C) 2008-2017 Andrea Rossato and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it> @@ -36,6 +36,7 @@ import Control.Arrow ((***), (>>>)) import Control.Monad.State hiding (when) import Data.Char (chr) import Data.List (sortBy) +import Data.Text (Text) import qualified Data.Map as Map import Data.Ord (comparing) import qualified Data.Set as Set @@ -195,17 +196,18 @@ handleSpaces s rm [] = empty -- | Convert Pandoc document to string in OpenDocument format. -writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeOpenDocument opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - let render' = render colwidth + let render' :: Doc -> Text + render' = render colwidth ((body, metadata),s) <- flip runStateT defaultWriterState $ do m <- metaToJSON opts - (fmap (render colwidth) . blocksToOpenDocument opts) - (fmap (render colwidth) . inlinesToOpenDocument opts) + (fmap render' . blocksToOpenDocument opts) + (fmap render' . inlinesToOpenDocument opts) meta b <- render' `fmap` blocksToOpenDocument opts blocks return (b, m) diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index fc6608450..e8f48da00 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -1,8 +1,8 @@ {-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2010-2015 Puneeth Chaganti <punchagan@gmail.com> - Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>, - and John MacFarlane <jgm@berkeley.edu> + 2010-2017 John MacFarlane <jgm@berkeley.edu> + 2016-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,10 +21,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Org - Copyright : Copyright (C) 2010-2015 Puneeth Chaganti and John MacFarlane + Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com> + 2010-2017 John MacFarlane <jgm@berkeley.edu> + 2016-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> License : GNU GPL, version 2 or above - Maintainer : Puneeth Chaganti <punchagan@gmail.com> + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Stability : alpha Portability : portable @@ -32,9 +34,10 @@ Conversion of 'Pandoc' documents to Emacs Org-Mode. Org-Mode: <http://orgmode.org> -} -module Text.Pandoc.Writers.Org ( writeOrg) where +module Text.Pandoc.Writers.Org (writeOrg) where import Control.Monad.State import Data.Char (isAlphaNum, toLower) +import Data.Text (Text) import Data.List (intersect, intersperse, isPrefixOf, partition, transpose) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition @@ -54,7 +57,7 @@ data WriterState = type Org = StateT WriterState -- | Convert Pandoc to Org. -writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeOrg opts document = do let st = WriterState { stNotes = [], stHasMath = False, @@ -62,22 +65,24 @@ writeOrg opts document = do evalStateT (pandocToOrg document) st -- | Return Org representation of document. -pandocToOrg :: PandocMonad m => Pandoc -> Org m String +pandocToOrg :: PandocMonad m => Pandoc -> Org m Text pandocToOrg (Pandoc meta blocks) = do opts <- gets stOptions let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToOrg) - (fmap (render colwidth) . inlineListToOrg) + (fmap render' . blockListToOrg) + (fmap render' . inlineListToOrg) meta body <- blockListToOrg blocks notes <- gets (reverse . stNotes) >>= notesToOrg hasMath <- gets stHasMath - let main = render colwidth $ foldl ($+$) empty $ [body, notes] + let main = render colwidth . foldl ($+$) empty $ [body, notes] let context = defField "body" main - $ defField "math" hasMath + . defField "math" hasMath $ metadata case writerTemplate opts of Nothing -> return main @@ -86,8 +91,7 @@ pandocToOrg (Pandoc meta blocks) = do -- | Return Org representation of notes. notesToOrg :: PandocMonad m => [[Block]] -> Org m Doc notesToOrg notes = - mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>= - return . vsep + vsep <$> zipWithM noteToOrg [1..] notes -- | Return Org representation of a note. noteToOrg :: PandocMonad m => Int -> [Block] -> Org m Doc @@ -219,16 +223,16 @@ blockToOrg (Table caption' _ _ headers rows) = do -- FIXME: Org doesn't allow blocks with height more than 1. let hpipeBlocks blocks = hcat [beg, middle, end] where h = maximum (1 : map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") + sep' = lblock 3 $ vcat (replicate h (text " | ")) + beg = lblock 2 $ vcat (replicate h (text "| ")) + end = lblock 2 $ vcat (replicate h (text " |")) middle = hcat $ intersperse sep' blocks let makeRow = hpipeBlocks . zipWith lblock widthsInChars let head' = makeRow headers' rows' <- mapM (\row -> do cols <- mapM blockListToOrg row return $ makeRow cols) rows let border ch = char '|' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ + (hcat . intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '|' let body = vcat rows' @@ -249,8 +253,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToOrg item num) $ - zip markers' items + contents <- zipWithM orderedListItemToOrg markers' items -- ensure that sublists have preceding blank line return $ blankline $$ vcat contents $$ blankline blockToOrg (DefinitionList items) = do @@ -277,8 +280,8 @@ definitionListItemToOrg :: PandocMonad m => ([Inline], [[Block]]) -> Org m Doc definitionListItemToOrg (label, defs) = do label' <- inlineListToOrg label - contents <- liftM vcat $ mapM blockListToOrg defs - return $ hang 2 "- " $ label' <> " :: " <> (contents <> cr) + contents <- vcat <$> mapM blockListToOrg defs + return . hang 2 "- " $ label' <> " :: " <> (contents <> cr) -- | Convert list of key/value pairs to Org :PROPERTIES: drawer. propertiesDrawer :: Attr -> Doc @@ -310,13 +313,13 @@ attrHtml (ident, classes, kvs) = blockListToOrg :: PandocMonad m => [Block] -- ^ List of block elements -> Org m Doc -blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat +blockListToOrg blocks = vcat <$> mapM blockToOrg blocks -- | Convert list of Pandoc inline elements to Org. inlineListToOrg :: PandocMonad m => [Inline] -> Org m Doc -inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat +inlineListToOrg lst = hcat <$> mapM inlineToOrg lst -- | Convert Pandoc inline element to Org. inlineToOrg :: PandocMonad m => Inline -> Org m Doc @@ -348,7 +351,7 @@ inlineToOrg (Quoted DoubleQuote lst) = do return $ "\"" <> contents <> "\"" inlineToOrg (Cite _ lst) = inlineListToOrg lst inlineToOrg (Code _ str) = return $ "=" <> text str <> "=" -inlineToOrg (Str str) = return $ text $ escapeString str +inlineToOrg (Str str) = return . text $ escapeString str inlineToOrg (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 24898d62e..59f6553e2 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,7 +35,7 @@ import Control.Monad.State import Data.Char (isSpace, toLower) import Data.List (isPrefixOf, stripPrefix) import Data.Maybe (fromMaybe) -import Network.URI (isURI) +import Data.Text (Text, stripEnd) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging @@ -57,33 +57,36 @@ data WriterState = , stHasRawTeX :: Bool , stOptions :: WriterOptions , stTopLevel :: Bool + , stLastNested :: Bool } type RST = StateT WriterState -- | Convert Pandoc to RST. -writeRST :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeRST :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeRST opts document = do let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, stHasRawTeX = False, stOptions = opts, - stTopLevel = True} + stTopLevel = True, stLastNested = False} evalStateT (pandocToRST document) st -- | Return RST representation of document. -pandocToRST :: PandocMonad m => Pandoc -> RST m String +pandocToRST :: PandocMonad m => Pandoc -> RST m Text pandocToRST (Pandoc meta blocks) = do opts <- gets stOptions let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render colwidth let subtit = case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> xs _ -> [] title <- titleToRST (docTitle meta) subtit metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToRST) - (fmap (trimr . render colwidth) . inlineListToRST) + (fmap render' . blockListToRST) + (fmap (stripEnd . render') . inlineListToRST) $ B.deleteMeta "title" $ B.deleteMeta "subtitle" meta body <- blockListToRST' True $ case writerTemplate opts of Just _ -> normalizeHeadings 1 blocks @@ -94,7 +97,7 @@ pandocToRST (Pandoc meta blocks) = do pics <- gets (reverse . stImages) >>= pictRefsToRST hasMath <- gets stHasMath rawTeX <- gets stHasRawTeX - let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics] + let main = render' $ foldl ($+$) empty $ [body, notes, refs, pics] let context = defField "body" main $ defField "toc" (writerTableOfContents opts) $ defField "toc-depth" (show $ writerTOCDepth opts) @@ -343,11 +346,32 @@ blockListToRST' :: PandocMonad m -> RST m Doc blockListToRST' topLevel blocks = do tl <- gets stTopLevel - modify (\s->s{stTopLevel=topLevel}) - res <- vcat `fmap` mapM blockToRST blocks + modify (\s->s{stTopLevel=topLevel, stLastNested=False}) + res <- vcat `fmap` mapM blockToRST' blocks modify (\s->s{stTopLevel=tl}) return res +blockToRST' :: PandocMonad m => Block -> RST m Doc +blockToRST' (x@BlockQuote{}) = do + lastNested <- gets stLastNested + res <- blockToRST x + modify (\s -> s{stLastNested = True}) + return $ if lastNested + then ".." $+$ res + else res +blockToRST' x = do + modify (\s -> s{stLastNested = + case x of + Para [Image _ _ (_,'f':'i':'g':':':_)] -> True + Para{} -> False + Plain{} -> False + Header{} -> False + LineBlock{} -> False + HorizontalRule -> False + _ -> True + }) + blockToRST x + blockListToRST :: PandocMonad m => [Block] -- ^ List of block elements -> RST m Doc diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 67f0fc2e0..5c990f324 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,6 +34,8 @@ import Control.Monad.Except (catchError, throwError) import qualified Data.ByteString as B import Data.Char (chr, isDigit, ord) import Data.List (intercalate, isSuffixOf) +import Data.Text (Text) +import qualified Data.Text as T import qualified Data.Map as M import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class as P @@ -92,15 +94,12 @@ rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError report $ CouldNotDetermineMimeType src return x) (\e -> do - case e of - PandocIOError _ e' -> - report $ CouldNotFetchResource src (show e') - e' -> report $ CouldNotFetchResource src (show e') + report $ CouldNotFetchResource src (show e) return x) rtfEmbedImage _ x = return x -- | Convert Pandoc to a string in rich text format. -writeRTF :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeRTF :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeRTF options doc = do -- handle images Pandoc meta@(Meta metamap) blocks <- walkM (rtfEmbedImage options) doc @@ -126,7 +125,8 @@ writeRTF options doc = do then defField "toc" toc else id) $ metadata - return $ case writerTemplate options of + return $ T.pack + $ case writerTemplate options of Just tpl -> renderTemplate' tpl context Nothing -> case reverse body of ('\n':_) -> body diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 615733a78..2047285eb 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013-2015 John MacFarlane + Copyright : Copyright (C) 2013-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -62,10 +62,10 @@ import Text.Pandoc.XML (escapeStringForXML) -- Variables overwrite metadata fields with the same names. -- If multiple variables are set with the same name, a list is -- assigned. Does nothing if 'writerTemplate' is Nothing. -metaToJSON :: (Functor m, Monad m) +metaToJSON :: (Functor m, Monad m, ToJSON a) => WriterOptions - -> ([Block] -> m String) - -> ([Inline] -> m String) + -> ([Block] -> m a) + -> ([Inline] -> m a) -> Meta -> m Value metaToJSON opts blockWriter inlineWriter meta @@ -75,9 +75,9 @@ metaToJSON opts blockWriter inlineWriter meta -- | Like 'metaToJSON', but does not include variables and is -- not sensitive to 'writerTemplate'. -metaToJSON' :: Monad m - => ([Block] -> m String) - -> ([Inline] -> m String) +metaToJSON' :: (Monad m, ToJSON a) + => ([Block] -> m a) + -> ([Inline] -> m a) -> Meta -> m Value metaToJSON' blockWriter inlineWriter (Meta metamap) = do @@ -98,9 +98,9 @@ addVariablesToJSON opts metadata = where combineMetadata (Object o1) (Object o2) = Object $ H.union o1 o2 combineMetadata x _ = x -metaValueToJSON :: Monad m - => ([Block] -> m String) - -> ([Inline] -> m String) +metaValueToJSON :: (Monad m, ToJSON a) + => ([Block] -> m a) + -> ([Inline] -> m a) -> MetaValue -> m Value metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $ diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index 0e1a0526d..27d26c7d9 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,6 +31,7 @@ Conversion of 'Pandoc' documents to Docbook XML. -} module Text.Pandoc.Writers.TEI (writeTEI) where import Data.Char (toLower) +import Data.Text (Text) import Data.List (isPrefixOf, stripPrefix) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, report) @@ -56,12 +57,13 @@ authorToTEI opts name' = do inTagsSimple "author" (text $ escapeStringForXML name) -- | Convert Pandoc document to string in Docbook format. -writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTEI opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + render' :: Doc -> Text render' = render colwidth startLvl = case writerTopLevelDivision opts of TopLevelPart -> -1 @@ -71,9 +73,9 @@ writeTEI opts (Pandoc meta blocks) = do auths' <- mapM (authorToTEI opts) $ docAuthors meta let meta' = B.setMeta "author" auths' meta metadata <- metaToJSON opts - (fmap (render colwidth . vcat) . - (mapM (elementToTEI opts startLvl)) . hierarchicalize) - (fmap (render colwidth) . inlinesToTEI opts) + (fmap (render' . vcat) . + mapM (elementToTEI opts startLvl) . hierarchicalize) + (fmap render' . inlinesToTEI opts) meta' main <- (render' . vcat) <$> mapM (elementToTEI opts startLvl) elements let context = defField "body" main diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index da4f43ee5..387e55290 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2008-2015 John MacFarlane and Peter Wang +Copyright (C) 2008-2017 John MacFarlane + 2012 Peter Wang This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2015 John MacFarlane and Peter Wang + Copyright : Copyright (C) 2008-2017 John MacFarlane + 2012 Peter Wang License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,7 +37,8 @@ import Data.Char (chr, ord) import Data.List (maximumBy, transpose) import Data.Ord (comparing) import qualified Data.Set as Set -import Network.URI (isURI, unEscapeString) +import Data.Text (Text) +import Network.URI (unEscapeString) import System.FilePath import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition @@ -66,7 +69,7 @@ data WriterState = type TI m = StateT WriterState m -- | Convert Pandoc to Texinfo. -writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTexinfo options document = evalStateT (pandocToTexinfo options $ wrapTop document) $ WriterState { stStrikeout = False, stSuperscript = False, @@ -78,16 +81,18 @@ wrapTop :: Pandoc -> Pandoc wrapTop (Pandoc meta blocks) = Pandoc meta (Header 0 nullAttr (docTitle meta) : blocks) -pandocToTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> TI m String +pandocToTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> TI m Text pandocToTexinfo options (Pandoc meta blocks) = do let titlePage = not $ all null $ docTitle meta : docDate meta : docAuthors meta let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON options - (fmap (render colwidth) . blockListToTexinfo) - (fmap (render colwidth) . inlineListToTexinfo) + (fmap render' . blockListToTexinfo) + (fmap render' . inlineListToTexinfo) meta main <- blockListToTexinfo blocks st <- get diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 0ecb746c3..091a5baca 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -33,6 +33,7 @@ module Text.Pandoc.Writers.Textile ( writeTextile ) where import Control.Monad.State import Data.Char (isSpace) import Data.List (intercalate) +import Data.Text (Text, pack) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition @@ -54,7 +55,7 @@ data WriterState = WriterState { type TW = StateT WriterState -- | Convert Pandoc to Textile. -writeTextile :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeTextile :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTextile opts document = evalStateT (pandocToTextile opts document) WriterState { stNotes = [], @@ -64,17 +65,17 @@ writeTextile opts document = -- | Return Textile representation of document. pandocToTextile :: PandocMonad m - => WriterOptions -> Pandoc -> TW m String + => WriterOptions -> Pandoc -> TW m Text pandocToTextile opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (blockListToTextile opts) (inlineListToTextile opts) meta body <- blockListToTextile opts blocks notes <- gets $ unlines . reverse . stNotes - let main = body ++ if null notes then "" else ("\n\n" ++ notes) + let main = pack $ body ++ if null notes then "" else ("\n\n" ++ notes) let context = defField "body" main metadata case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Nothing -> return main + Just tpl -> return $ renderTemplate' tpl context withUseTags :: PandocMonad m => TW m a -> TW m a withUseTags action = do diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index da8b08de1..5ee239e59 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -1,5 +1,6 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2017 John MacFarlane <jgm@berkeley.edu> + 2017 Alex Ivkin This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ZimWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane, 2017 Alex Ivkin + Copyright : Copyright (C) 2008-2017 John MacFarlane, 2017 Alex Ivkin License : GNU GPL, version 2 or above Maintainer : Alex Ivkin <alex@ivkin.net> @@ -36,15 +37,14 @@ import Control.Monad.State (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) import Data.List (intercalate, isInfixOf, isPrefixOf, transpose) import qualified Data.Map as Map -import Data.Text (breakOnAll, pack) -import Network.URI (isURI) +import Data.Text (breakOnAll, pack, Text) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition import Text.Pandoc.ImageSize import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) -import Text.Pandoc.Shared (escapeURI, linesToPara, removeFormatting, substitute, - trimr) +import Text.Pandoc.Shared (isURI, escapeURI, linesToPara, removeFormatting, + substitute, trimr) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Shared (defField, metaToJSON) @@ -61,17 +61,17 @@ instance Default WriterState where type ZW = StateT WriterState -- | Convert Pandoc to ZimWiki. -writeZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeZimWiki opts document = evalStateT (pandocToZimWiki opts document) def -- | Return ZimWiki representation of document. -pandocToZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> ZW m String +pandocToZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> ZW m Text pandocToZimWiki opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (fmap trimr . blockListToZimWiki opts) (inlineListToZimWiki opts) meta - body <- blockListToZimWiki opts blocks + body <- pack <$> blockListToZimWiki opts blocks --let header = "Content-Type: text/x-zim-wiki\nWiki-Format: zim 0.4\n" let main = body let context = defField "body" main diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index d7fdc4278..67608fb43 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -37,6 +37,8 @@ module Text.Pandoc.XML ( escapeCharForXML, fromEntities ) where import Data.Char (isAscii, isSpace, ord) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Pretty @@ -91,11 +93,10 @@ inTagsIndented :: String -> Doc -> Doc inTagsIndented tagType = inTags True tagType [] -- | Escape all non-ascii characters using numerical entities. -toEntities :: String -> String -toEntities [] = "" -toEntities (c:cs) - | isAscii c = c : toEntities cs - | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs +toEntities :: Text -> Text +toEntities = T.concatMap go + where go c | isAscii c = T.singleton c + | otherwise = T.pack ("&#" ++ show (ord c) ++ ";") -- Unescapes XML entities fromEntities :: String -> String |